Living Review

Introduction

This file is part of the repository hosted at https://gitlab.com/extending-the-earcheck/living-review, and its rendered version is hosted by GitLab Pages at https://extending-the-earcheck.gitlab.io/living-review. The Google Sheet holding the extraction script template is available from https://docs.google.com/spreadsheets/d/1duDKLMmhel_5fBPhF_H-0Dbic896-5eCef9bBCSJMco.

if (!(installed.packages()['ufs', 'Version'] >= "0.4")) {
  stop("You need to have at least version 0.4 of the `ufs` package installed; ",
       "install it with:\n\ninstall.packages('ufs');");
}

# devtools::load_all("B:/Data/R/metabefor");
### Get dev version of metabefor
ufs::quietGitLabUpdate("r-packages/metabefor", quiet = FALSE);
## Skipping install of 'metabefor' from a gitlab remote, the SHA1 (4f414c33) has not changed since last install.
##   Use `force = TRUE` to force installation
### Get version of revtools with bugfix
# ufs::quietRemotesInstall("matherion/revtools@bibtex-importing-bugfix",
#                          func = "install_github", quiet = FALSE);
# ufs::quietRemotesInstall("matherion/synthesisr@bibtex-import-bugfix",
#                          func = "install_github", quiet = FALSE);
ufs::quietRemotesInstall("rmetaverse/synthesisr",
                         func = "install_github", quiet = FALSE);
## Skipping install of 'synthesisr' from a github remote, the SHA1 (c406bc9f) has not changed since last install.
##   Use `force = TRUE` to force installation
### Get additional packages
ufs::checkPkgs('here');             ### To easily access files
                                    ### using 'relative paths'
ufs::checkPkgs("googlesheets4");    ### To import data from google
                                    ### sheets in metabefor
ufs::checkPkgs("synthesisr");       ### To import RIS files
ufs::checkPkgs("stringdist");       ### To compute string distances
###-----------------------------------------------------------------------------
### Settings
###-----------------------------------------------------------------------------

### By default show R code and don't comment output
knitr::opts_chunk$set(echo = TRUE);
knitr::opts_chunk$set(comment = NA);

runScreeningChunks = FALSE;
runExtractionChunks = FALSE;
importExtractedStudiesFromRxsFiles <- FALSE;
runStudyTreePreparation <- FALSE;

###-----------------------------------------------------------------------------
### Paths
###-----------------------------------------------------------------------------

### Set base path
basePath <- here::here();

### Set path for query hit exports
queryHitExportPath <- file.path(basePath, "search");

### Set path for screening
screeningPath <- file.path(basePath, "screening");

### Set path for any rxs specifications
rxsSpecificationsPath <-
  file.path(basePath, "rxs-specifications");

### Set path for extraction script template
extractionScriptTemplatePath <-
  file.path(basePath, "extraction");

extractionScriptPath <-
  file.path(basePath, "extraction", "Extracted studies");

### Set path for intermediate output
outputPath <- file.path(basePath, "output");

### Set path for data files
dataPath <- file.path(basePath, "data");

###-----------------------------------------------------------------------------
### Extraction Script and Aggregation Tree Google sheets URLs
###-----------------------------------------------------------------------------

sheetsURL <- paste0("https://docs.google.com/spreadsheets/d/",
                    "1duDKLMmhel_5fBPhF_H-0Dbic896-5eCef9bBCSJMco");

aggregationTreeURL <- paste0("https://docs.google.com/spreadsheets/d/",
                             "1P5IZekDxAiW3B_TLz392wd68alv4HFF5eTnoPwpGs_Q");

txsIDs <-
 c("12WldMPYFkJIU2VaYTg0Ysj-kiSe32eUY25Dghcnochs",  ### theories
   "18DQ0tPqqsiSIrrXJkAZ4MNsH27i2fSX588XwdUam8uo",  ### hp-measure
  "1XOi1bgmewvVeorQcNXNZHGVwi_PEQxRO2l_6vOzJst8",   ### hp-precise
  "1bpcXEXQYJcmilX4F_ZlJ5RsO2B-Ekkz0B2mVnygUEso",   ### pld-use
  "1DQZtdLYhYNa6CISxOrWpyvkEsarHDr9LaeuAyGir4d4",   ### barrier
  "1wg4PqFnZ2-L6MLYvOPOD4pUy_JbmcA5wlIxDeHwzf-U",   ### participant-categories
   "1KysisDj5bG9j8F4uIU_7C_jIBhA3KeUvKlOCGNHVBCM",  ### grey-literature
  "1FSxP6rdAtJckLrSfIShDV0kkU99edPbQclBTTqebg2I",   ### scale ranges
  NULL);

###-----------------------------------------------------------------------------
### Extraction administration
###-----------------------------------------------------------------------------

extraction_administration_columns_all <- 
  c("qurid", "doi", "title", "year", "author",
    "included", "fulltext", "extractor", "exclude_check",
    "exclude_final", "finished", "comments");

extraction_administration_columns_firstBatch <-
  c(extraction_administration_columns_all,
    "screening_stage1", "included_by_B_not_by_A",
    "screenerastatus", "screenerbstatus1",
    "screeneraconfidence", "screenerbconfidence");

extraction_administration_columns_secondBatch <-
  c(extraction_administration_columns_all,
    "screening_stage2", "included_by_B_not_by_A",
    "screener_a_status_r2", "screenerbstatus1",
    "screeneraconfidence", "screenerbconfidence");
###-----------------------------------------------------------------------------
### Import rxs specifications
###-----------------------------------------------------------------------------

### devtools::load_all("B:/Data/R/metabefor");

options(metabefor.debug = TRUE);

fullObject <-
  metabefor:::rxs_fromSpecifications(
    gs_url = sheetsURL,
    entitiesFilename = file.path(rxsSpecificationsPath,
                                 "entities-local-copy.csv"),
    valueTemplatesFilename = file.path(rxsSpecificationsPath,
                                       "valueTemplates-local-copy.csv"),
    localBackup = list(
      entities = file.path(rxsSpecificationsPath,
                           "entities-local-copy.csv"),
      valueTemplates = file.path(rxsSpecificationsPath,
                                 "valueTemplates-local-copy.csv"),
      definitions = NULL
    ),
    outputFile = file.path(
      extractionScriptTemplatePath,
      "extractionScriptTemplate.rxs.Rmd"
    ),
    instructionHeadingLevel = 4,
    returnFullObject = TRUE
  );
v Reading from "rxs-template-EarAct".
v Range ''entities''.
v Reading from "rxs-template-EarAct".
v Range ''valueTemplates''.
v Reading from "rxs-template-EarAct".
v Range ''definitions''.
v Reading from "rxs-template-EarAct".
v Range ''instructions''.
Successfully read the extraction script specifications from Google sheets.
Stored local backup of entities to 'C:/Users/tdeze/Documents/Earcheck/living-review/rxs-specifications/entities-local-copy.csv'.
Stored local backup of value templates to 'C:/Users/tdeze/Documents/Earcheck/living-review/rxs-specifications/valueTemplates-local-copy.csv'.

rxs_parseEntities read an entity spreadsheet with the following columns: 'identifier', 'parent', 'title', 'description', 'valueTemplate', 'validValues', 'default', 'examples', 'entityRef', 'fieldRef', 'owner', 'list', 'collapsing', 'repeating', 'recurring', 'recursing' & 'identifying'.
Parsed extraction script specifications into extraction script template.
Created diagrams representing the extraction tree.
Successfully wrote extraction script template to 'C:/Users/tdeze/Documents/Earcheck/living-review/extraction/extractionScriptTemplate.rxs.Rmd'.

Method

Screening

Importing hits

Here the search results are imported, deduplicated, and prepared for the screeners.

originFile_col <- "origin_file";
originDatabase_col <- "origin_database";
originInterface_col <- "origin_interface";
originDate_col <- "origin_date";

### Get all subdirectories; search hits are placed in alphabetically
### ordered subdirectories.
bibliographyHitDirs <-
  list.dirs(
    queryHitExportPath,
    full.names = FALSE,
    recursive = FALSE
  );

### Sort alphabetically
bibliographyHitDirs <-
  sort(bibliographyHitDirs);

### Get all files in each subdirectory
bibliographyHitFiles <-
  lapply(
    file.path(queryHitExportPath, bibliographyHitDirs),
    list.files,
    full.names = TRUE,
    pattern = "\\.ris$",
    ignore.case = TRUE,
    recursive = TRUE
  );
bibliographyHitFiles <-
  lapply(
    bibliographyHitFiles,
    grep,
    pattern = "_preprocessed",
    ignore.case = TRUE,
    invert = TRUE,
    value = TRUE
  );
names(bibliographyHitFiles) <- bibliographyHitDirs;

### Get a list of subdirectories with one file or more
bibliographyHitFiles <-
  bibliographyHitFiles[
    unlist(
      lapply(
        bibliographyHitFiles,
        length
      )
    ) > 0
  ];

### Process the files in these subdirectories, reading all files
### separately to keep track of every entry's origin
bibliographyHits <-
  lapply(
    names(bibliographyHitFiles),
    function(currentFileList) {
      cat("\nStarting to read directory '", currentFileList, "':\n", sep="");
      return(
        lapply(
          bibliographyHitFiles[[currentFileList]],
          function(currentFile) {
            cat("\nStarting to read file '", currentFile, "':\n", sep="");
            
            cat("\nPreprocessing file.\n");
            
            newFileName <- paste0(
              tools::file_path_sans_ext(currentFile),
              "_preprocessed_for_synthesisr.",
              tools::file_ext(currentFile)
            );
            
            fileAsText <-
              readLines(currentFile);
            
            # fileAsText <-
            #   gsub("^T1", "TI", fileAsText);
            
            # ### Remove all lines that don't start with a RIS tag
            # fileAsText <-
            #   fileAsText[grepl("^\\s*$|^[A-Z][A-Z0-9]\\s{2}-\\s", fileAsText)];
            # ### Remove C1 lines (weird stuff Ebsco Embase exports added in)
            # fileAsText <-
            #   fileAsText[!grepl("^C1\\s{2}-\\s", fileAsText)];
            
            ### Get all lines that start with TY and ER; depends on whether
            ### the file is a pubmed export
            if (any(grepl("PMID-", fileAsText))) {
              TY_regex <- "PMID-";
              ER_regex <- "SO  -"
              ### Actually, no further processing required; pubmed isn't
              ### where the problem is
            } else {
              TY_regex <- "TY  -";
              ER_regex <- "ER  -"
              TY_lines <- grep(TY_regex, fileAsText);
              ER_lines <- grep(ER_regex, fileAsText);
              
              if (length(TY_lines) != length(ER_lines)) {
                stop("Something seems to have gone wrong; I found ",
                     length(TY_lines), " lines that start a RIS record ",
                     "but ", length(ER_lines), " lines that end one.");
              }
              
              ### Get all indices in between
              recordlines <-
                do.call(
                  c,
                  mapply(
                    seq,
                    TY_lines,
                    ER_lines,
                    SIMPLIFY = FALSE
                  )
                );
              
              fileAsText <- fileAsText[recordlines];
              
            }
            
            writeLines(
              fileAsText,
              newFileName
            );
            
            cat("Importing file.\n");
            
            res <-
              #revtools::read_bibliography(
              synthesisr::read_refs(
                newFileName
              );

            res[, originFile_col] <-
              basename(currentFile);
            return(res);
          }
        )
      )
    }
  );
names(bibliographyHits) <- names(bibliographyHitFiles);

### Process all lists of dataframes in each directory

bibHitDfs <-
  lapply(
    bibliographyHits,
    metabefor::rbind_df_list
  );
names(bibHitDfs) <- names(bibliographyHits);

###-----------------------------------------------------------------------------
### Set the DOIs
###-----------------------------------------------------------------------------

for (i in names(bibHitDfs)) {
  if (!('doi' %in% names(bibHitDfs[[i]]))) {
    bibHitDfs[[i]]$doi <- NA;
  }
  ### Extract DOIs from 'article_id' column
  if ('article_id' %in% names(bibHitDfs[[i]])) {
    bibHitDfs[[i]]$doi <-
      ifelse(
        is.na(bibHitDfs[[i]]$doi),
        metabefor::extract_doi(
          bibHitDfs[[i]]$article_id
        ),
        bibHitDfs[[i]]$doi
      );
  }
  ### Potentially add from 'DO' column
  if ('DO' %in% names(bibHitDfs[[i]])) {
    bibHitDfs[[i]]$doi <-
      ifelse(
        is.na(bibHitDfs[[i]]$doi),
        bibHitDfs[[i]]$DO,
        bibHitDfs[[i]]$doi
      );
  }
}

###-----------------------------------------------------------------------------
### Internal deduplication
###-----------------------------------------------------------------------------

detailedDuplicateInfo <- list();
for (i in names(bibHitDfs)) {
  
  cat("\n\n**Processing ", i, "**\n\n", sep="");
  
  detailedDuplicateInfo[[i]] <-
    list(
      internal_res = metabefor::duplicate_sources(
        bibHitDfs[[i]],
        silent = FALSE,
        returnRawStringDistances = TRUE
      )
    );
      
  detailedDuplicateInfo[[i]]$details_internal <-
    attr(detailedDuplicateInfo[[i]]$internal_res, "duplicateInfo");
  
  bibHitDfs[[i]]$internalDuplicates <-
    detailedDuplicateInfo[[i]]$internal_res;

}

###-----------------------------------------------------------------------------
### Add unique identifiers to every record for future reference
###-----------------------------------------------------------------------------

### Create quasi-unique record identifiers (qurids) for all records;
### always starting from the same date.

recordsPerSearchBatch <-
  unlist(
    lapply(
      bibHitDfs,
      nrow
    )
  );

totalRecords <- sum(recordsPerSearchBatch);

recordsPerSearchBatch_lastIndices <-
  cumsum(recordsPerSearchBatch);

recordsPerSearchBatch_firstIndices <-
  c(1, head(recordsPerSearchBatch_lastIndices, -1) + 1);

quridIndices <-
  mapply(
    c,
    recordsPerSearchBatch_firstIndices,
    recordsPerSearchBatch_lastIndices,
    SIMPLIFY = FALSE
  );

names(quridIndices) <- names(recordsPerSearchBatch);

### Generate the required number of QURIDs

totalQURIDs <-
  metabefor::generate_qurids(
      totalRecords,
      origin=as.POSIXct("2020-11-19 18:00:00")
    );

for (i in names(bibHitDfs)) {

  cat("\n\n**Generating QURIDS for ", i, "**\n\n", sep="");
  
  if (is.null(bibHitDfs[[i]]$qurid)) {
    bibHitDfs[[i]]$qurid <-
      totalQURIDs[quridIndices[[i]][1]:quridIndices[[i]][2]];
  } else {
    bibHitDfs[[i]]$qurid <-
      ifelse(is.na(bibHitDfs[[i]]$qurid) |
               (nchar(bibHitDfs[[i]]$qurid) == 0),
             totalQURIDs[quridIndices[[i]][1]:quridIndices[[i]][2]],
             bibHitDfs[[i]]$qurid);
  }

  ### Also rename "ID" field to something else; this is a reserved
  ### field name for JabRef (at least for version 2.11, which we'll
  ### use for the screening)
  if ("ID" %in% names(bibHitDfs[[i]])) {
    names(bibHitDfs[[i]])[names(bibHitDfs[[i]]) == "ID"] <-
      "old_ID_field";
  }

}

ufs::cat(
  "A total of ",
  nrow(bibHitDfs[[1]]),
  " bibliographic entries have been read from ",
  length(unique(bibHitDfs[[1]][, originFile_col])),
  " files.",
  "\n\n",
  "Out of those ",
  nrow(bibHitDfs[[1]]),
  " entries, a DOI is available for ",
  sum(!is.na(bibHitDfs[[1]]$doi)),
  " entries (so ",
  sum(is.na(bibHitDfs[[1]]$doi)),
  " do not have a DOI)."
);

First search batch

Writing screener packages
###-----------------------------------------------------------------------------
### Process first search batch
### Note that these are sorted by batch
###-----------------------------------------------------------------------------

screenerPackages <-
  metabefor::write_screenerPackage(
    bibliographyDf = bibHitDfs[[1]],
    outputPath = screeningPath,
    basename = "stage1_",
    duplicateField = "duplicate"
  );

### Potentially, to screen with revtools:
# revtools::screen_titles(bibHitDf[[1]]);
Writing screener spreadsheet stage 1

This spreadsheet shows an overview of the screened articles

filesToRead_firstBatch <-
  list.files(
    screeningPath,
    pattern = "^tiab_stage1_.*\\.bib$",
    recursive = TRUE,
    full.names = TRUE
  );

screened_firstBatch_screenerA <-
  synthesisr::read_refs(
    filesToRead_firstBatch[1]
  );

screened_firstBatch_screenerB <-
  synthesisr::read_refs(
    filesToRead_firstBatch[2]
  );

screened_firstBatch_merged <-
  merge(
    screened_firstBatch_screenerA[
      ,
      grep("screenerbstatus",
           names(screened_firstBatch_screenerA),
           invert = TRUE)],
    screened_firstBatch_screenerB[
      ,
      c("qurid", "screenerbstatus1")
    ],
    by = "qurid",
    all = TRUE
  );

screened_firstBatch_merged$included <-
  ifelse(grepl("incl", screened_firstBatch_merged$screenerastatus, ignore.case=TRUE) |
           grepl("incl", screened_firstBatch_merged$screenerbstatus1, ignore.case=TRUE),
         "incl",
         "");

screened_firstBatch_merged$screening_stage1 <-
  paste0(
    screened_firstBatch_merged$screenerastatus,
    "|",
    screened_firstBatch_merged$screenerbstatus1,
    ">",
    screened_firstBatch_merged$included
  );

screened_firstBatch_merged$included_by_B_not_by_A <-
  !grepl("inc", screened_firstBatch_merged$screenerastatus,
         ignore.case = TRUE) &
  grepl("inc", screened_firstBatch_merged$screenerbstatus1,
        ignore.case = TRUE);

screened_firstBatch_merged[,
  setdiff(
    extraction_administration_columns_firstBatch,
    names(screened_firstBatch_merged)
  )] <- "";

openxlsx::write.xlsx(
  screened_firstBatch_merged[
    screened_firstBatch_merged$included == "incl",
    extraction_administration_columns_firstBatch],
  file = file.path(screeningPath,
                   "screening_overview--stage1--generated.xlsx")
);

Second search batch

Deduplication compared to first batch
###-----------------------------------------------------------------------------
### Process second search batch
###-----------------------------------------------------------------------------

secondBatchDuplicates <-
  metabefor::duplicate_sources(
    screened_firstBatch,
    bibHitDfs[[2]],
    silent = FALSE
  );

secondBatchDuplicateInfo <-
  attr(secondBatchDuplicates, "duplicateInfo");

bibHitDfs[[2]]$duplicate <-
  secondBatchDuplicates;
Overview of duplicates
Merging 2nd and 1st search hits and write screener package
###-----------------------------------------------------------------------------
### Merge search hits and write screener package
###-----------------------------------------------------------------------------

round2_screening <-
  metabefor::rbind_dfs(
    screened_firstBatch,
    bibHitDfs[[2]]
  );

hasNoContents <- function(x) {
  isNA <- is.na(x);
  hasChars <- nchar(x) > 0;
  return(ifelse(isNA, TRUE, !hasChars));
}

hasContents <- function(x) {
  isNA <- is.na(x);
  hasChars <- nchar(x) > 0;
  return(ifelse(isNA, FALSE, hasChars));
}

round2_screening$to_be_screened <-
  hasNoContents(round2_screening$duplicate) &
  hasNoContents(round2_screening$screening_stage1);

### Strip duplicates and records that were already screened in round 1
round2_screening_onlyNew <-
  round2_screening[
    round2_screening$to_be_screened,
  ]

screenerPackages_round2 <-
  metabefor::write_screenerPackage(
    bibliographyDf = round2_screening_onlyNew,
    screeners = c("a", "b"),
    screenerFieldsPrefix = "screener_",
    screenerFieldsSuffix = "_status_r2",
    outputPath = screeningPath,
    prevRoundField = "screening_stage1",
    basename = "stage2_",
    duplicateField = "duplicate",
    silent=FALSE
  );

### Test
# test_screening2_pkgs <-
#   synthesisr::read_refs(
#     file.path(
#       screeningPath,
#       "stage2_a",
#       "stage2_a.bib"
#     )
#   );

ufs::cat0(
  "After the first round of screening, and after eliminating duplicate records, there are ",
  nrow(round2_screening_onlyNew),
  " records left to screen for the second search."
);
Writing screener spreadsheet stage 2

This spreadsheet shows an overview of the screened articles.

filesToRead_secondBatch <-
  list.files(
    screeningPath,
    pattern = "^tiab_stage2_.*\\.bib$",
    recursive = TRUE,
    full.names = TRUE
  );

screened_secondBatch_screenerA <-
  synthesisr::read_refs(
    filesToRead_secondBatch[1]
  );

screened_secondBatch_screenerB <-
  synthesisr::read_refs(
    filesToRead_secondBatch[2]
  );

### Adding screening judgement of screener B to file of first screener
screened_secondBatch_merged <-
  merge(
    screened_secondBatch_screenerA,
    screened_secondBatch_screenerB[ , c("qurid", "screenerbstatus1")],
    by = "qurid",
    all = TRUE
  );

screened_secondBatch_merged$included <-
  ifelse(grepl("incl", screened_secondBatch_merged$screener_a_status_r2,
               ignore.case=TRUE) |
           grepl("incl", screened_secondBatch_merged$screenerbstatus1,
                 ignore.case=TRUE),
         "incl",
         "");

screened_secondBatch_merged$screening_stage2 <-
  paste0(
    screened_secondBatch_merged$screener_a_status_r2,
    "|",
    screened_secondBatch_merged$screenerbstatus1,
    ">",
    screened_secondBatch_merged$included
  );

screened_secondBatch_merged$included_by_B_not_by_A <-
  !grepl("inc", screened_secondBatch_merged$screener_a_status_r2,
         ignore.case = TRUE) &
  grepl("inc", screened_secondBatch_merged$screenerbstatus1,
        ignore.case = TRUE);

screened_secondBatch_merged[,
  setdiff(
    extraction_administration_columns_secondBatch,
    names(screened_secondBatch_merged)
  )] <- "";

openxlsx::write.xlsx(
  screened_secondBatch_merged[
    screened_secondBatch_merged$included == "incl",
    extraction_administration_columns_secondBatch
  ],
  file = file.path(screeningPath,
                   "screening_overview--stage2--generated.xlsx")
);

Extraction

Entities to extract

This is an overview of the hierarchy of entities to extract.

print(fullObject$rxsStructure$parsedEntities$extractionScriptTree);

DiagrammeR::render_graph(fullObject$rxsTreeDiagram_simple)

DiagrammeR::export_graph(
  fullObject$rxsTreeDiagram_simple,
  file.path(
    outputPath,
    "extraction-tree--simple.pdf"
  )
);
cat(fullObject$rxsInstructions);
cat(fullObject$entityOverview_list);

Extraction script template

This is the extraction script generated based on the extraction script specification.

cat("\n\n<pre><textarea rows='40' cols='124' style='font-family:monospace;font-size:11px;white-space:pre;'>",
    unlist(fullObject$rxsTemplate),
    "</textarea></pre>\n\n",
    sep="\n");

Analyses

Extraction scripts

Overview

These tabs show the extracted data per study.

# devtools::load_all("B:/Data/R/metabefor");

### Read studies
if (importExtractedStudiesFromRxsFiles) {

  studies <-
    metabefor::rxs_parseExtractionScripts(
      path = extractionScriptPath
    );

  ### Store workspace; first delete most old automatically saved
  ### workspaces
  oldfiles <- list.files(extractionScriptPath,
                         pattern='^autogenerated--rxsObject-preTreePrep--.*\\.rds$',
                         full.names=TRUE);
  
  ### Delete all but most recent one
  if (length(oldfiles) > 1) {
    unlink(head(sort(oldfiles), -1));
  }
  
  ### Housecleaning
  rm(oldfiles);
  
  ### Store new workspace
  currentDate <- Sys.time();
  extractedStudiesFilename <-
    format(currentDate,
           "autogenerated--rxsObject-preTreePrep--%Y-%m-%d--%H-%M.rds");
  tryCatch({
    saveRDS(studies,
            file=file.path(extractionScriptPath,
                           extractedStudiesFilename));
  }, error = function(e) {
    warning(paste0("Encountered error when trying to save extracted studies to ",
                   extractedStudiesFilename, "!\n"));
  });
  
} else {

  ### Load most recently saved object
  
  fileList <- list.files(extractionScriptPath,
                         pattern='^autogenerated--rxsObject-preTreePrep--.*\\.rds$',
                         full.names=TRUE);
  
  if (length(fileList) > 0) {
    extractedStudiesFilename <- tail(sort(fileList), 1);
    studies <- readRDS(extractedStudiesFilename);
    rm(extractedStudiesFilename);
  } else {
    stop("No automatically stored extracted studies objects available!");
  }
  
  rm(fileList);

}

### Make backup of studies object
raw <- metabefor::cloneStudiesObject(studies);

### Log the output
for (studyFileName in names(studies$rxsOutput)) {
 
  ufs::heading(studyFileName, headingLevel = 3);
  
  cat(
    studies$rxsOutput[[studyFileName]],
    collapse="\n"
  );
   
}

Alnuman-Ghnimat–2019.rxs.Rmd

# Validation results Validation successful!

Ameye- Eziyi-Adekunle-Obasi-Amusa–2019.rxs.Rmd

# Validation results Validation successful!

Auchter-LePrell–2014.rxs.Rmd

# Validation results Validation successful!

Balanay-Kearney–2015.rxs.Rmd

# Validation results Validation successful!

Beach-Gilliver–2019.rxs.Rmd

# Validation results Validation successful!

Beach-Nielsen-Gilliver–2016.rxs.Rmd

# Validation results Validation successful!

Beach-Williams-Gilliver–2011.rxs.Rmd

# Validation results Validation successful!

Bogosch-House-Kudla–2005.rxs.Rmd

# Validation results Validation successful!

Callahan-Lass-Foster–2011.rxs.Rmd

# Validation results Validation successful!

Callahan-Lass-Foster-Poe-Steinberg-Duffe–2012.rxs.Rmd

# Validation results Validation successful!

Carter-Black–2017.rxs.Rmd

# Validation results Validation successful!

Cheng-DesRoches-Meunier-Eavey–2005.rxs.Rmd

# Validation results Validation successful!

Chesky-Pair-Lanford-Yoshimura–2009.rxs.Rmd

# Validation results Validation successful!

Crandell-Mills-Gauthier–2004.rxs.Rmd

# Validation results Validation successful!

Dandolini-Matheucci-Teixeira-Sharlach–2018.rxs.Rmd

# Validation results Validation successful!

Danhauer-Johnson-Byrd-DeGood-Meuel-Pecile-Koch–2009.rxs.Rmd

# Validation results Validation successful!

Danhauer-Johnson-Dunne-etal–2012.rxs.Rmd

# Validation results Validation successful!

DeBruijn_Spaans_Jansen_VantRiet_2016.rxs.Rmd

# Validation results Validation successful!

Degeest-Keppler-Corthals-Clays–2017.rxs.Rmd

# Validation results Validation successful!

Degeest-Maes-Leyssens-Keppler–2018.rxs.Rmd

# Validation results Validation successful!

DelGiacco-Serpanos–2015.rxs.Rmd

# Validation results Validation successful!

deLijster-vanderPloeg–2015.rxs.Rmd

# Validation results Validation successful!

Dell-Holmes–2012.rxs.Rmd

# Validation results Validation successful!

Diviani-Zanini-Amann-Chadha-Cieza-Rubinelli–2019.rxs.Rmd

# Validation results Validation successful!

Eilering_Zweet_2018.rxs.Rmd

# Validation results Validation successful!

Gilles-DeRidder-VanHal-Wouters-Punte-VandeHeyning–2012.rxs.Rmd

# Validation results Validation successful!

Gilles-Thuy-DeRycke-VandeHeyning–2014.rxs.Rmd

# Validation results Validation successful!

Gilles-VandeHeyning–2014.rxs.Rmd

# Validation results Validation successful!

Gilles-VanHal-DeRidder-Wouters-VandeHeyning–2013.rxs.Rmd

# Validation results Validation successful!

Gilliver-Carter-Macoun-Rosen-Williams–2012.rxs.Rmd

# Validation results Validation successful!

Gilliver_Beach_Williams_2013.rxs.Rmd

# Validation results Validation successful!

Gilliver_Beach_Williams_2015.rxs.Rmd

# Validation results Validation successful!

Gorter–2012.rxs.Rmd

# Validation results Validation successful!

Griest-Folmer-Martin–2007.rxs.Rmd

# Validation results Validation successful!

Gupta-Sharma-Singh-Goyal-Sao–2014.rxs.Rmd

# Validation results Validation successful!

Herrera-Lacerda-Lurdes-Rocha-Alcaras-Ribeiro–2016.rxs.Rmd

# Validation results Validation successful!

Hickson-Garson-etal–2007.rxs.Rmd

# Validation results Validation successful!

Holmes-Widen-Carver-White–2007.rxs.Rmd

# Validation results Validation successful!

Hoover-Krishnamurti–2010.rxs.Rmd

# Validation results Validation successful!

Hoppenbrouwer-Guérin-VanDoorslaer-VanLeeuwen-Desoete-Roelants–2018.rxs.Rmd

# Validation results Validation successful!

Hunter–2017.rxs.Rmd

# Validation results Validation successful!

Hunter–2018.rxs.Rmd

# Validation results Validation successful!

Johnson-Andrew-Walker-Morgan-Aldren–2014.rxs.Rmd

# Validation results Validation successful!

Kamphuis–2018.rxs.Rmd

# Validation results Validation successful!

Kelly-Boyd-Henehan–2015.rxs.Rmd

# Validation results Validation successful!

Keppler-Dhooge-Degeest-Vinck–2015.rxs.Rmd

# Validation results Validation successful!

Keppler-Dhooge-Vinck–2015.rxs.Rmd

# Validation results Validation successful!

Khan-Evans-Bielko-Rohlman–2018.rxs.Rmd

# Validation results Validation successful!

Lacerda-Soares-Goncalves-Lopes-Testoni–2013.rxs.Rmd

# Validation results Validation successful!

Lee-Han–2019.rxs.Rmd

# Validation results Validation successful!

Lee_Jeong_2021.rxs.Rmd

# Validation results Validation successful!

LourdesQuintanillaDieck-Artunduaga-Eavey–2009.rxs.Rmd

# Validation results Validation successful!

Marron-Marchiondo-etal–2015.rxs.Rmd

# Validation results Validation successful!

Martens-Perenboom-vanderPloeg–2006.rxs.Rmd

# Validation results Validation successful!

Matei_Broad_Goldbart_Ginsborg_2018.rxs.Rmd

# Validation results Validation successful!

Nielsen-Beach-Gilliver–2014.rxs.Rmd

# Validation results Validation successful!

OlsenWiden-Erlandsson–2004.rxs.Rmd

# Validation results Validation successful!

Olson-Gooding-Shikoh-Graf_2016.rxs.Rmd

# Validation results Validation successful!

Peters-Noijen–2019.rxs.Rmd

# Validation results Validation successful!

Portnuff–2011.rxs.Rmd

# Validation results Validation successful!

Portnuff_Fligor_Arehart_2011.rxs.Rmd

# Validation results Validation successful!

Pouwels–2017.rxs.Rmd

# Validation results Validation successful!

Rawool-CollignonWayne–2008.rxs.Rmd

# Validation results Validation successful!

Reddy-Nosa-Mafi-Welch–2021.rxs.Rmd

# Validation results Validation successful!

Reiness-Daugaard-Nielsen–2013.rxs.Rmd

# Validation results Validation successful!

Rijs-Meeuwse-Jurg-Bouman–2007.rxs.Rmd

# Validation results Validation successful!

Rosemberg-McCullagh-Nordstrom–2015.rxs.Rmd

# Validation results Validation successful!

Saunders-Dann-Griest-Frederick–2013.rxs.Rmd

# Validation results Validation successful!

Serpanos_Berg_Renne_2016.rxs.Rmd

# Validation results Validation successful!

Steen–2008.rxs.Rmd

# Validation results Validation successful!

Udoh-Adeyemo–2019.rxs.Rmd

# Validation results Validation successful!

vandenBosch-Degens–2017.rxs.Rmd

# Validation results Validation successful!

Vogel-Brug-Hosli-vdPloeg-Raat–2008.rxs.Rmd

# Validation results Validation successful!

Vogel-Brug-VanderPloeg-Hein Raat–2011.rxs.Rmd

# Validation results Validation successful!

Vogel-Brug-VanderPloeg-Raat–2010.rxs.Rmd

# Validation results Validation successful!

Vogel-Brug-VanderPloeg-Raat–2010b.rxs.Rmd

# Validation results Validation successful!

Vogel-Verschuure-VanderPloeg-Brug-Raat–2009.rxs.Rmd

# Validation results Validation successful!

Wang-etal–2021.rxs.Rmd

# Validation results Validation successful!

WarnerCzyz-Cain–2015.rxs.Rmd

# Validation results Validation successful!

Weichbold-Zorowka–2003.rxs.Rmd

# Validation results Validation successful!

Weichbold-Zorowka–2007.rxs.Rmd

# Validation results Validation successful!

Welch-Ma-Reddy–2019.rxs.Rmd

# Validation results Validation successful!

Welch_Reddy_Hand_Devine_2016.rxs.Rmd

# Validation results Validation successful!

West–2012.rxs.Rmd

# Validation results Validation successful!

Widén–2013.rxs.Rmd

# Validation results Validation successful!

Widén-Bohlin-Johansson–2011.rxs.Rmd

# Validation results Validation successful!

Widen-Holmes-Erlandsson–2006.rxs.Rmd

# Validation results Validation successful!

Widén-Holmes-Johnson-Bohlin-Erlandsson–2009.rxs.Rmd

# Validation results Validation successful!

Widen_Erlandsson_2007.rxs.Rmd

# Validation results Validation successful!

You-Kwak-Han–2020.rxs.Rmd

# Validation results Validation successful!

Zieltjens-deLijster-vanderPloeg–2014.rxs.Rmd

# Validation results Validation successful!

Zocoli-Morata-Marques-Corteletti–2009.rxs.Rmd

# Validation results Validation successful!

Extraction integrity checks

treesWithUniqueNames <-
  unlist(lapply(studies$rxsTrees, data.tree::AreNamesUnique));

if (any(!treesWithUniqueNames)) {
  table(treesWithUniqueNames);
  
  studiesWithDuplicateNames <- 
    names(studies$rxsTrees)[!treesWithUniqueNames];
  
  for (i in studiesWithDuplicateNames) {
    metabefor::cat0("\n\nStudy with duplicate names: ", i, "\n\n");
    allNames <- table(studies$rxsTrees[[i]]$Get('name'));
    dupNames <- which(allNames > 1);
    print(names(allNames)[dupNames]);
  }
} else {
    metabefor::cat0("\n\nAll studies have unique entity node names.\n\n");
}

Supplement values

# devtools::load_all("B:/Data/R/metabefor");

### See the manual page for `explode_vector_to_values` for more
### detail about how vectors are exploded into separate values:
###
###   ?explode_vector_to_values

for (txs_gSheetId in txsIDs) {
  metabefor::supplement_studyTrees_from_txs(
    studies,
    paste0("https://docs.google.com/spreadsheets/d/", txs_gSheetId),
    explode_vector_to_values = TRUE,
    silent = FALSE
  );
}

### Check correct supplementing
# studies$rxsTrees$`Alnuman-Ghnimat--2019.rxs.Rmd`$theory$value
# studies$rxsTrees$`Beach-Nielsen-Gilliver--2016.rxs.Rmd`$theory$value
# studies$rxsTrees$`Beach-Nielsen-Gilliver--2016.rxs.Rmd`$theory_HBM$value
# studies$rxsTrees$`Beach-Williams-Gilliver--2011.rxs.Rmd`$theory$value
### Add the information about each variable that was stored in the
### clustering entity holding the measurement information to the
### clustering entities with the univariate and association results.
###
### The first argument is our studies object (which we don't store
### although the function returns it, because the add the information
### to the data.tree object, which uses reference semantics (i.e. through
### the R6 object oriented programming package).
###
### The second argument is (inside the target entity node), the name
### holding the identifier of the source entity node (the node
### supplying the data). Note that that field is itself an entity
### as specified in the entity specification spreadsheet.
###
### The third argument is the prefix prepended to the newly stored
### fields in the target node.
###
### The fourth argument forces overwriting of values should they
### already exist in the target node.
###
### The fifth argument can be switched to FALSE to get detailed info.
###
### For more info:
### ?metabefor::supplement_data_from_list_inStudyTrees

# devtools::load_all("B:/Data/R/metabefor");

# studies <- metabefor::cloneStudiesObject(raw);


###-----------------------------------------------------------------------------
### univariate results
###-----------------------------------------------------------------------------

### Supplement univariate results with operationalization details

metabefor::supplement_data_from_list_inStudyTrees(
  studies,
  sourceEntityNodeIdField_in_targetEntity="uni.variable",
  prefix="varSpec_",
  sourcePathString_regex = "methods",
  targetPathString_regex = "results",
  forceCopyingOfExistingValues = FALSE,
  silent=TRUE
);

### Supplement univariate results with subsample details

metabefor::supplement_data_from_list_inStudyTrees(
  studies,
  sourceEntityNodeIdField_in_targetEntity="uni.subsample",
  prefix="subsample_",
  sourcePathString_regex = "methods",
  targetPathString_regex = "results",
  forceCopyingOfExistingValues = FALSE,
  silent=TRUE
);

### Then add entities from the study itself

### Conveniently check which entities exist at the 'study' level - 
### note that this list also contains container entities!
allEntitiesInStudy <-
  unique(
    unlist(
      lapply(
        studies$rxsTrees,
        function(node) {
          return(names(node$children));
        }
      )
    )
  );

### Compile a list of what we want to add to the univariate
### results in the study tree; use 'grep' to get all variables
### for vectors that were exploded into single values
entities_fromStudy_toSupplement <-
  c("year.collected",
    "year.published",
    "name.author",
    "country",
    "theory_use",
    "pld_use",
    "grey_literature",
    grep("theory_", allEntitiesInStudy, value=TRUE),
    grep("sample_categories_", allEntitiesInStudy, value=TRUE)
  );

### Loop through the entities to supplement and add them
for (sourceEntityId_to_Supplement in entities_fromStudy_toSupplement) {
  metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
  metabefor::supplement_data_with_value_inStudyTrees(
    studies,
    sourceEntityNodeId = sourceEntityId_to_Supplement,
    targetEntityNode_requiredField = "uni.variable",
    prefix="studyInfo_",
    sourcePathString_regex = NULL,
    targetPathString_regex = "results/",
    forceCopyingOfExistingValues = FALSE,
    silent=TRUE
  );
  metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}

### And from the methods
for (sourceEntityId_to_Supplement in c("N",
                                       "samplingStrategy",
                                       "setting",
                                       "sample",
                                       "methodType")) {
  metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
  metabefor::supplement_data_with_value_inStudyTrees(
    studies,
    sourceEntityNodeId = sourceEntityId_to_Supplement,
    targetEntityNode_requiredField = "uni.variable",
    prefix="studyMethods_",
    sourcePathString_regex = "methods/",
    targetPathString_regex = "results/",
    forceCopyingOfExistingValues = FALSE,
    silent=TRUE
  );
  metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}

###-----------------------------------------------------------------------------
### Associations
###-----------------------------------------------------------------------------

### First details about the measurement

metabefor::supplement_data_from_list_inStudyTrees(
  studies,
  sourceEntityNodeIdField_in_targetEntity="assoc.var1name",
  prefix="var1spec_",
  sourcePathString_regex = "methods",
  targetPathString_regex = "results",
  forceCopyingOfExistingValues = TRUE,
  silent=TRUE
);

metabefor::supplement_data_from_list_inStudyTrees(
  studies,
  sourceEntityNodeIdField_in_targetEntity="assoc.var2name",
  prefix="var2spec_",
  sourcePathString_regex = "methods",
  targetPathString_regex = "results",
  forceCopyingOfExistingValues = TRUE,
  silent=TRUE
);

### Then supplement association results with subsample details

metabefor::supplement_data_from_list_inStudyTrees(
  studies,
  sourceEntityNodeIdField_in_targetEntity="assoc.subsample",
  prefix="subsample_",
  sourcePathString_regex = "methods",
  targetPathString_regex = "results",
  forceCopyingOfExistingValues = FALSE,
  silent=TRUE
);

### Then add sample sizes from the study itself

for (sourceEntityId_to_Supplement in entities_fromStudy_toSupplement) {
  metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
  metabefor::supplement_data_with_value_inStudyTrees(
    studies,
    sourceEntityNodeId = sourceEntityId_to_Supplement,
    targetEntityNode_requiredField = "assoc.var1name",
    prefix="studyInfo_",
    sourcePathString_regex = NULL,
    targetPathString_regex = "results/",
    forceCopyingOfExistingValues = FALSE,
    silent=TRUE
  );
  metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}

### And from the methods

for (sourceEntityId_to_Supplement in c("N",
                                       "samplingStrategy",
                                       "setting",
                                       "sample",
                                       "methodType")) {
  metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
  metabefor::supplement_data_with_value_inStudyTrees(
    studies,
    sourceEntityNodeId = sourceEntityId_to_Supplement,
    targetEntityNode_requiredField = "assoc.var1name",
    prefix="studyMethods_",
    sourcePathString_regex = "methods/",
    targetPathString_regex = "results/",
    forceCopyingOfExistingValues = FALSE,
    silent=TRUE
  );
  metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}

Aggregation Tree

# devtools::load_all("B:/Data/R/metabefor");

aggregationTree <-
  metabefor::read_aggregationTree_from_gs(
    aggregationTreeURL
  );

#aggregationTree;
metabefor::renderTreeAsGraph(aggregationTree);

metabefor::add_aggregationTree_information(
  studies = studies,
  aggregationTree = aggregationTree,
  fieldName = "varSpec_oper.constructIdentifier"
);
### Process all study trees
standardizeMean <- function(mean, min, max) {
  
  if (is.null(mean) || is.null(min) || is.null(max)) {
    return(NA);
  }

  mean <- as.numeric(mean);
  min <- as.numeric(min);
  max <- as.numeric(max);
  
  # tryCatch(mean <- as.numeric(mean),
  #          warning = function(w) {
  #            warning("Encountered warning ('",
  #                    w$message, "') when trying to coerce ",
  #                    "the mean to numeric from value '",
  #                    mean, "'.");
  #            invokeRestart(muffleWarning=TRUE);
  #          });
  # tryCatch(min <- as.numeric(min),
  #          warning = function(w) {
  #            warning("Encountered warning ('",
  #                    w$message, "') when trying to coerce ",
  #                    "the min to numeric from value '",
  #                    min, "'.");
  #            invokeRestart(muffleWarning=TRUE);
  #          });
  # tryCatch(max <- as.numeric(max),
  #          warning = function(w) {
  #            warning("Encountered warning ('",
  #                    w$message, "') when trying to coerce ",
  #                    "the max to numeric from value '",
  #                    max, "'.");
  #            invokeRestart(muffleWarning=TRUE);
  #          });

  if (is.na(mean) || is.na(min) || is.na(max)) {
    return(NA);
  }

  return(
    100 * (mean - min) / (max - min)
  );
}

metabefor::transform_in_every_clusteringEntity(
  studies,
  fun = standardizeMean,
  funArgs = c(mean = "uni.mean",
              min = "varSpec_scaleRange_min",
              max = "varSpec_scaleRange_max"),
  newEntityName = "uni.mean_rescaled",
  requiredField_regex = "uni.name"
);

# studies$rxsTrees$`Balanay-Kearney--2015.rxs.Rmd`$results$YANS_1stfactor_score$value;


################################################################################
############# Compose Standardized Standard Deviations #########################

standardizeSD <- function(sd, mean, new_mean, min) {
  if (is.null(mean) || is.null(sd) || is.null(new_mean)) {
    return(NA);
  }
  
  mean <- as.numeric(mean) - as.numeric(min);
  sd <- as.numeric(sd);
  new_mean <- as.numeric(new_mean);
  
  if (is.na(mean) || is.na(sd) || is.na(new_mean)) {
    return(NA);
  }

  return(
    (sd / mean) * new_mean
  );
}

metabefor::transform_in_every_clusteringEntity(
  studies,
  fun = standardizeSD,
  funArgs = c(mean = "uni.mean",
                sd = "uni.sd",
          new_mean = "uni.mean_rescaled",
               min = "varSpec_scaleRange_min"),
  newEntityName = "uni.sd_rescaled",
  requiredField_regex = "uni.name"
);

Data frames

# devtools::load_all("B:/Data/R/metabefor");

# metabefor::get_singleValue_fromTree(
#   studies$rxsTrees$`Hoover-Krishnamurti--2010.rxs.Rmd`,
#   'volume_restriction_freq', returnDf = TRUE
# );
# 
# metabefor::get_singleValue_fromTree(
#   studies$rxsTrees$`Balanay-Kearney--2015.rxs.Rmd`,
#   'hpuse_concerts_asc', returnDf = TRUE
# );
# 
# univarDf <-
#   get_valueList_asDf_fromStudyTree(
#     studies$rxsTrees$`Hoover-Krishnamurti--2010.rxs.Rmd`,
#     requiredFields = "uni.variable",
#     silent=FALSE
#   );

# devtools::load_all("B:/Data/R/metabefor");

univarDfLog <-
  capture.output(
    univarDf <-
      metabefor::get_valueList_asDf(
        studies,
        requiredFields = "uni.variable",
        pathString_regex_select = "results",
        flattenVectorsInDf = TRUE,
        pathString_regex_explode = "results",
        fieldname_regex_alwaysFlatten = "varSpec_",
        silent=FALSE
      )
  );

### GJ to add functionality to metabefor::get_valueList_asDf:
###   - specify columns to ignore when comparing nr of columns;
###   - store logs as attributes
###   - optionally print warnings only

# grep("WARNING", univarDfLog);
# 
# cat(univarDfLog[80:110], sep="\n")
# 
# setdiff(names(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$results$perception_sound_levels_nc$value), names(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$results$age_participants$value))

writexl::write_xlsx(
  univarDf,
  path = file.path(dataPath, "univariate.xlsx")
);

### iconv(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$comments$value, from = "UTF-8", to = "UTF-8")

write.csv(
  univarDf,
  file.path(dataPath, "univariate.csv"),
  row.names = FALSE,
  fileEncoding = "UTF-8"
);

###-----------------------------------------------------------------------------
### Prepare TXS files
###-----------------------------------------------------------------------------

### These scripts are only necessary once, and so are placed in an external
### file commented out

# source(here::here("scripts", "prepare-txs-files.R"));
# devtools::load_all("B:/Data/R/metabefor");
assocDfLog <-
  capture.output(
    assocDf <-
      metabefor::get_valueList_asDf(
        studies,
        requiredFields = c("assoc.var1name", "assoc.var2name"),
        flattenVectorsInDf = TRUE,
        silent=FALSE
      )
  );

writexl::write_xlsx(
  assocDf,
  file.path(dataPath, "associations.xlsx")
);

### iconv(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$comments$value, from = "UTF-8", to = "UTF-8")

write.csv(
  assocDf,
  file.path(dataPath, "associations.csv")
);
if (runStudyTreePreparation) {
  
  ### Save to disk

  ### Store workspace; first delete most old automatically saved
  ### workspaces
  oldfiles <- list.files(
    extractionScriptPath,
    pattern='^autogenerated--rxsObject-postTreePrep--.*\\.rds$',
    full.names=TRUE
  );
  
  ### Delete all but most recent one
  if (length(oldfiles) > 1) {
    unlink(head(sort(oldfiles), -1));
  }
  
  ### Housecleaning
  rm(oldfiles);
  
  ### Store new workspace
  currentDate <- Sys.time();
  extractedStudiesFilename <-
    format(
      currentDate,
      "autogenerated--rxsObject-postTreePrep--%Y-%m-%d--%H-%M.rds"
    );
  tryCatch({
    saveRDS(studies,
            file=file.path(extractionScriptPath,
                           extractedStudiesFilename));
  }, error = function(e) {
    warning(
      paste0(
        "Encountered error when trying to save extracted studies to ",
        extractedStudiesFilename, "!\n"
      )
    );
  });
  
} else {

  ### Load most recently saved object
  
  fileList <- list.files(
    extractionScriptPath,
    pattern='^autogenerated--rxsObject-postTreePrep--.*\\.rds$',
    full.names=TRUE
  );
  
  if (length(fileList) > 0) {
    extractedStudiesFilename <- tail(sort(fileList), 1);
    studies <- readRDS(extractedStudiesFilename);
    rm(extractedStudiesFilename);
  } else {
    stop("No automatically stored extracted studies objects available!");
  }
  
  rm(fileList);
  
  univarDf <-
    as.data.frame(
      readxl::read_xlsx(
        path = file.path(dataPath, "univariate.xlsx")
      )
    );
  
  assocDf <-
    as.data.frame(
      readxl::read_xlsx(
        file.path(dataPath, "associations.xlsx")
      )
    );

}

Decisions and justifications

if ("justifier" %in% installed.packages()[, "Package"]) {
  
  ### Clean workspace so we can start storing out decisions.
  justifier::clean_workspace(force = TRUE, silent=FALSE);

  ###---------------------------------------------------------------------------
  
  justifier::log_decision(
    
    "When doing analyses that result in a situation where we have multiple estimates per study/sample, we decide to pool all estimates.",
    
    justification = c(
      justifier::justify(
        
        "Pooling estimates within each study means all estimates in the meta-analysis are independent.",
        
        weight = 1
        
      ),
      justifier::justify(
        
        "Pooling estimates within each study may mean that the pooled estimate is less valid than the best estimate in that study was. For example, if a study measures both Perceived Norms and Descriptive Norms, and we decide to aggregate these by pooling to run analyses on the level of Perceived Norms, the original Perceived Norms measure from that study would be more valid than the pooled estimate, which pulls the estimate towards Descriptive Norms (and so, away from Injunctive Norms).",
        
        weight = -.8
        
      )
    ),
    
    type = "data_preparation",
    
    alternatives = c(
      "Pool all estimates",
      "Selectively pool based on construct definitions",
      "Never pool, deal with dependence of estimates"
    )
    
  );

  ###---------------------------------------------------------------------------

  ### Show decisions and justifications (note: we may want to defer this to
  ### the end of the document)
  plot(justifier::workspace());
  
}
Your justifier workspace has been cleaned and is now empty.

Simple results

Studies per country

###-----------------------------------------------------------------------------
### Get a list of all countries
###-----------------------------------------------------------------------------

### NOTE: flattenVectorsInDf should be FALSE, but that throws an error;
### debug!

allCountries <-
  metabefor::get_singleValue(
    studies,
    "country",
    flattenVectorsInDf = TRUE#FALSE
  );

countryTable <-
  table(allCountries$country);

countryDf <-
  data.frame(iso_a2 = toupper(names(countryTable)),
             freq = unname(unclass(countryTable)));

###-----------------------------------------------------------------------------
### Add ISO Alpha-3 codes
###-----------------------------------------------------------------------------

### Get ISO Alpha-2 to ISO Alpha-3 country code translation table
### https://www.iban.com/country-codes
country_iso_codes <-
  utils::read.delim(file.path(dataPath,
                              "country-iso-alpha2-alpha3.csv"),
                    encoding = "UTF-8");

iso_a3 <-
  stats::setNames(country_iso_codes$iso_a3,
                  nm = country_iso_codes$iso_a2);

### Add ISO Alpha-3
countryDf$iso_a3 <-iso_a3[countryDf$iso_a2];

###-----------------------------------------------------------------------------
### Create and save plot
###-----------------------------------------------------------------------------

### Get world map
spdf_world <- rnaturalearth::ne_countries(returnclass = "sf");

spdf_world_withUsers <-
  merge(x = spdf_world,
        y = countryDf,
        by.x = "gu_a3",
        by.y = "iso_a3",
        all.x = TRUE,
        all.y = FALSE);

worldMap <-
  ggplot2::ggplot(data = spdf_world_withUsers) +
    ggplot2::geom_sf(ggplot2::aes(fill = freq),
                     na.rm=TRUE) +
    ggplot2::scale_fill_viridis_c(
      name = "Studies",
      trans="log10",
      na.value="black"
    ) +
    ggplot2::theme_void() +
    ggplot2::theme(
      plot.margin = ggplot2::unit(c(.5,.5,.5,.5), units="cm")
    );

ufs::knitAndSave(
  worldMap,
  path = outputPath,
  figCaption = "World map of studies",
  figWidth = 20,
  figHeight = 10);
World map of studies.

World map of studies.

Mean age

table(
  univarDf$uni.variable[
    univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj"]
  );

   age gender 
   123    214 
rows_about_age <-
  (univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj") &
      (univarDf$uni.variable == "age");

ageDf <- univarDf[rows_about_age, ];

mean(ageDf$uni.mean, na.rm=TRUE);
[1] 20.22113
### Alternatively:

mean(
  univarDf[
    (univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj") &
      (univarDf$uni.variable == "age"),
    'uni.mean'
  ],
  na.rm=TRUE
);
[1] 20.22113
hist(
  univarDf[
    (univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj") &
      (univarDf$uni.variable == "age"),
    'uni.mean'
  ]
);

Sandbox

### Start with:
###   0.1 Run ufs::quietGitLabUpdate("r-packages/metabefor", quiet = FALSE);
###   0.2 Restart R

### 1) Run setup chunk to load paths
### 2) Run this to load studies:

if (!exists('studies')) {
  studies <-
    metabefor::rxs_parseExtractionScripts(
      path = extractionScriptPath
    );
}


### 3) Run the following chunks to supplement data and create data frames:
###   3.1) supplement-values-from-txs-specs
###   3.2) supplement-values-within-trees
###   3.3) produce-univar-df
###   3.4) produce-assoc-df
###
### Then you have uniVarDf and assocDf

#-----------------------------------------------------------------------------

### Change data frame to wide format (and back to long):
# ?tidyr::pivot_wider
# ?tidyr::pivot_longer


#############################################################################
# How can I find the row number of a particular value in column X ?
which(rownames(univarDf) == "1042") # "[1] 2 ".
which(grepl(86.80, univarDf$uni.percentage)) # "[1] 91".

##############################################################################
############### Prepare univariate data frame (general)#######################

### Combine names authors study and year of publication and add to univarDf:
univarDf$author_year <-
  paste0(univarDf$studyInfo_name.author,
         " (",
         univarDf$studyInfo_year.published,
         ")")

### 'author_year' as first column:
univarDf <- dplyr::relocate(univarDf, author_year, .before = uni.name)
options(digits = 4)
View(univarDf)
library(tidyverse)
# Remove duplicate rows:
univarDf <- distinct(univarDf) # No fast way to do this; rows are distinct on the variable that 'splits' the row (when in metafor for certain variables a vector is used instead of a single value [e.g. for 'year of study' or 'country'], it causes the formation of a duplicate row).
# Remove empty columns:
univarDf <- Filter(function(x)!all(is.na(x)), univarDf)
# Remove study by Saunders et al. (2014) (reason: M age > 30 years):
univarDf <- univarDf[univarDf$studyId!="Saunders-Dann-Griest-Frederick--2013.rxs.Rmd", ]
# Add row numbers:
univarDf$row_number <- seq.int(nrow(univarDf))
univarDf <- relocate(univarDf, row_number, .before = author_year)
# Use the correct N for each result:
univarDf <- univarDf %>% 
  mutate(
    N = coalesce(
      as.numeric(univarDf$uni.n),
      as.numeric(univarDf$subsample_subsample.N),
      as.numeric(univarDf$studyMethods_N)
    )
  )
# Relocate new N column
univarDf <- relocate(univarDf, N, .before = uni.variable)

View(univarDf) # df now contains 1356 rows and 79 columns.


##############################################################################
############ Prepare univariate data frame of BEHAVIOUR ######################

# Keep only rows about behaviour:
univBeh <- univarDf[univarDf$varSpec_oper.constructIdentifier=="behaviour_79n2w1bj", ]
View(univBeh) # df with 363 rows and 81 columns.
# Add row numbers:
univBeh$row_number <- seq.int(nrow(univBeh))
univBeh <- dplyr::relocate(univBeh, row_number, .before = author_year)
# Remove columns:
univBeh <- univBeh[ ,-c(8:10,17:20,22:23,27:32,35:39,41:61,63,66:69,75:76,79:81)]
# Relocate columns:
univBeh <- dplyr::relocate(univBeh, varSpec_hp_measure, .before = uni.subsample)
univBeh <- dplyr::relocate(univBeh, varSpec_hp_precise, .before = uni.subsample)
univBeh <- dplyr::relocate(univBeh, studyInfo_pld_use, .before = uni.subsample)
univBeh <- dplyr::relocate(univBeh, uni.mean_rescaled, .before = uni.percentage)

View(univBeh) # df now has 363 rows and 30 columns.


##############################################################################
############ Prepare univariate data frame HP-use ############################

# Remove studies (exclusively) about pld-use:
univHp <- univBeh[univBeh$studyInfo_pld_use!="yes, only pld use", ]
View(univHp)# df has 337 rows and 30 columns.

# Create separate data frames for 3 types of hp use:
univHpEver <- univHp[univHp$varSpec_hp_measure=="ever used", ]
univHpGen <- univHp[univHp$varSpec_hp_measure=="general", ]
univHpPrec <- univHp[univHp$varSpec_hp_measure=="precise", ]


# For homogeneous results, only 1 result per study. These results are AVERAGED. 

# When an average score is calculated based on homogeneous results in a study, and the N for the two (or more) results differ, then a WEIGHTED average is calculated (taking into account N, not the variance). This approach will be followed for all analyses. 

# When a weighted average was composed, it was placed in the row of its constituents, and the rows of the other(s) variable(s) was/were removed. Note that apart from the new average, the information in the other columns is not adjusted! So if e.g. the new N is needed, additional analyses are required. 

##############################################################################
###### 1)Ever HP used ########################################################

# Remove empty rows:
View(univHpEver)
univHpEver <- univHpEver[rowSums(is.na(univHpEver)) != ncol(univHpEver), ]
# Add row numbers:
univHpEver$row_number <- seq.int(nrow(univHpEver))
univHpEver <- dplyr::relocate(univHpEver, row_number, .before = author_year)
View(univHpEver) # 21 rows, 30 columns.

# Total: 7 studies, all nominal with 2 categories, except Kamphuis (2018) which has 3 categories (but only 1 is "yes, hp use"). All studies report 1 result, except Kamphuis (2018) which reports 3 results ("ever used" for 3 settings: festival, concert, dance party).

univHpEver <- univHpEver[-c(1,4,5,7,8,10,11,13,14,17,19,20), ] # Remove the "no hp use" rows.

# Kamphuis (2018),compose average % for 3 results (2 options):

# 1. Weighted (for sample sizes) average:
univHpEver[3,13] <- with(univHpEver[c(3:5), ], sum(N * uni.percentage)/sum(N))
# 58.198% weighted average.

# 2. Average (unweighted):
# univHpEver[3,13] <- with(univHpEver[c(3:5), ], sum(uni.percentage)/3)
# 58.33% unweighted average.

univHpEver <- univHpEver[-c(4:5), ] # Remove old rows.
View(univHpEver) # df now has 7 rows and 30 columns.

mean(univHpEver$uni.percentage) # 40.93%  
# With all 7 studies equal weights, although N ranges from 43 to 1443. Note that participants differ in age and continent.
# The weighted average for Kamphuis (2018) was used.

# Basic bar plot:
plotunivHpEver <-ggplot(univHpEver, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotunivHpEver 


###############################################################################
#### 2)General use of HP ######################################################

View(univHpGen)
# Remove empty rows
univHpGen <- univHpGen[rowSums(is.na(univHpGen)) != ncol(univHpGen), ]
# Remove empty columns
univHpGen <- Filter(function(x)!all(is.na(x)), univHpGen)
univHpGen <- univHpGen[ ,-c(12)]
# Add row numbers
univHpGen$row_number <- seq.int(nrow(univHpGen))
univHpGen <- relocate(univHpGen, row_number, .before = author_year)
# Remove duplicate rows
univHpGen <- univHpGen[-c(16,18), ]
View(univHpGen) # df now has 43 rows and 24 columns.
# Relocate operationalisation of values
univHpGen <- relocate(univHpGen, varSpec_oper.values, .before = uni.percentage)
univHpGen <- relocate(univHpGen, varSpec_oper.labels, .before = uni.percentage)
# Count number of unique studies:
univHpGenCount <- univHpGen %>% group_by(author_year) %>% summarize(count=n())
View(univHpGenCount) # 18 studies (1-5 results each), only percentages. The results of 14 studies are dichotomous, 2 studies use 2 categories, and 2 studies use 5 categories. Therefore, all results will be transformed into dichotomous results, so results can be aggregated.

# Values are not operationalized in a uniform way (e.g. 1 is not always 'yes') and some results are dichototomous, while others are categorical: 1) Recode, so 2 is always 'yes'; 2)Construct dichotomous results (if not yet present): 'sometimes' counts as 'no', 'always' and 'frequently' as 'yes'; 3) For studies with multiple categories, join all the '2s'. 

## 1) Recode, so 2 is always 'yes':

# Add row numbers
univHpGen$row_number <- seq.int(nrow(univHpGen))
univHpGen <- relocate(univHpGen, row_number, .before = author_year)
View(univHpGen)
# DelGiacco (2015):
univHpGen[c(9:13),9] <- ifelse(univHpGen[c(9:13),9] > 3, 2, ifelse(univHpGen[c(9:13),9], 1, 99)) 
# Gilliver (2012):
univHpGen[c(22:23),9] <- ifelse(univHpGen[c(22:23),9] > 1, 1, ifelse(univHpGen[c(22:23),9], 2)) 
# Griest (2007):
univHpGen[c(24:26),9] <- ifelse(univHpGen[c(24:26),9] < 3, 1, ifelse(univHpGen[c(24:26),9], 2)) 
# Hoppenbrouwer (2018):
univHpGen[c(31:35),9] <- ifelse(univHpGen[c(31:35),9]== 1, 2, ifelse(univHpGen[c(31:35),9], 1)) 
# Martens (2006):
univHpGen[c(38:40),9] <- ifelse(univHpGen[c(38:40),9]== 1, 2, ifelse(univHpGen[c(38:40),9], 1)) 
# Pouwels:
univHpGen[41,9] <- 2
# Zieltjes (2014):
univHpGen[c(42:43),9] <- ifelse(univHpGen[c(42:43),9]== 1, 2, ifelse(univHpGen[c(42:43),9], 1)) 

## 2)Keep only the results for 'hp use = yes' (i.e. value 2):
univHpGen1 <- univHpGen[univHpGen$uni.value==2, ]
View(univHpGen1) # df has 19 rows and 24 columns.
# Add row numbers
univHpGen1$row_number <- seq.int(nrow(univHpGen1))
univHpGen1 <- relocate(univHpGen1, row_number, .before = author_year)
## 3) Join percentage (for study with multiple 'yes' [i.e.'2s']) and remove old row:
# Del Giacco (2015):
univHpGen1[6,12] <- sum(univHpGen1[c(5:6),12])
univHpGen1 <- univHpGen1[-5, ] # df has 18 rows and 24 columns.

# Calculate mean % of hp use:
mean(univHpGen1$uni.percentage) # 12.37% is the (unweighted) mean % of "general" hp use, N of studies ranges from 41 to 9458.

# Basic bar plot:
plotHpGen1 <-ggplot(univHpGen1, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotHpGen1 



###############################################################################
#### 3) Use of HP in specific situations ######################################

View(univHpPrec)
# Remove empty rows
univHpPrec <- univHpPrec[rowSums(is.na(univHpPrec)) != ncol(univHpPrec), ]
# The df now has 245 rows.
# Add row numbers
univHpPrec$row_number <- seq.int(nrow(univHpPrec))
univHpPrec <- relocate(univHpPrec, row_number, .before = author_year)
# Relocate columns
univHpPrec <- relocate(univHpPrec, varSpec_oper.values, .before = uni.percentage)
univHpPrec <- relocate(univHpPrec, varSpec_oper.labels, .before = uni.percentage)
univHpPrec <- relocate(univHpPrec, uni.value, .before = uni.percentage)
univHpPrec <- relocate(univHpPrec, varSpec_oper.datatype, .before = uni.value)

### Remove duplicate rows:

# Widen et al. (2006) [duplicate rows]:
univHpPrec <- univHpPrec[-c(214,216,220,222), ]
# Gilles et al. (2014)[duplicate rows]:
univHpPrec <- univHpPrec[-c(73,75,77,79,81,83,85,87,89,91), ] 
# Matei et al. (2018)[duplicate rows]:
univHpPrec <- univHpPrec[-c(124,127,130,133,136,139,142,145,148), ] 
View(univHpPrec) # The DF now has 222 rows.

## Count number of unique studies
univHpPrecCount <- univHpPrec %>% group_by(author_year) %>% summarize(count=n())
View(univHpPrecCount) # Total: 26 studies, nominal, ordinal, and numeric. Almost 2/3 of the results is dichotomous. Therefore, the remaining 1/3 of the results is converted in a dichotomous result, so the results of the studies can be aggregated. 
# Count number of results for each setting of hp use:
univHpPrecSettingC <- univHpPrec %>% group_by(varSpec_hp_precise) %>% summarize(count=n())
View(univHpPrecSettingC) # Total: 9 kind of settings (incl. 'other'), with results ranging from 3 to 48. 

### Df1:

### Recode, so 2 is always 'yes' ('sometimes'= 'no'; 'always' & 'frequently' = 'yes'):
univHpPrec1 <- univHpPrec
View(univHpPrec1)
# Auchter M.& Le Prell C.G. (2014):
univHpPrec1[c(1:5),15] <- ifelse(univHpPrec1[c(1:5),15] > 3, 2, ifelse(univHpPrec1[c(1:5),15], 1, 99)) 
# Balanay, 2015:
univHpPrec1[c(6:19),15] <- ifelse(univHpPrec1[c(6:19),15]== 1, 2, ifelse(univHpPrec1[c(6:19),15], 1, 99)) 
# Beach E., Nielsen L.B. & Gilliver M. (2016): numeric.
# Bogosch I.I., House R.A. & Kudla I. (2005):
univHpPrec1[c(24:26),15] <- ifelse(univHpPrec1[c(24:26),15]== 3, 2, ifelse(univHpPrec1[c(24:26),15], 1, 99)) 
# Callahan et al. (2014):
univHpPrec1[c(27:30),15] <- ifelse(univHpPrec1[c(27:30),15]== 1, 2, ifelse(univHpPrec1[c(27:30),15], 1, 99)) 
# Carter L. & Black D. (2017):
univHpPrec1[c(31:66),15] <- ifelse(univHpPrec1[c(31:66),15]== 3, 2, ifelse(univHpPrec1[c(31:66),15], 1, 99)) 
# de Lijster G.P.A. & van der Ploeg C.P.B. (2015):
univHpPrec1[c(67:68),15] <- ifelse(univHpPrec1[c(67:68),15]== 1, 2, ifelse(univHpPrec1[c(67:68),15], 1, 99)) 
# Eilering, M. & Zweet, D. (2018):
univHpPrec1[c(69:72),15] <- ifelse(univHpPrec1[c(69:72),15]== 1, 2, ifelse(univHpPrec1[c(69:72),15], 1, 99)) 
# Gilles et al. (2014):
univHpPrec1[c(73:82),15] <- ifelse(univHpPrec1[c(73:82),15] > 3, 2, ifelse(univHpPrec1[c(73:82),15], 1, 99)) 
# Gorter, A.F. (2012):
univHpPrec1[c(83:84),15] <- ifelse(univHpPrec1[c(83:84),15]== 1, 2, ifelse(univHpPrec1[c(83:84),15], 1, 99)) 
# Hickson et al. (2007): row 85-98, correct values (i.e. 2=yes).
# Holmes et al., (2007): row 99-118, correct values (i.e. 2=yes).
# Khan et al. (2018): row 119-121, numeric.
# Matei et al. (2018): row 122-139, correct values (i.e. 2=yes).
# Olsen et al. (2004): row 140-143, correct values (i.e. 2=yes).
# Olson et al. (2016):
univHpPrec1[c(144:147),15] <- ifelse(univHpPrec1[c(144:147),15]== 1, 2, ifelse(univHpPrec1[c(144:147),15], 1, 99)) 
# Peters et al. (2018): row 148-152, numeric.
# Rawool V.W. & Collignon-Wayne L.A. (2008):
univHpPrec1[c(154:155),15] <- ifelse(univHpPrec1[c(154:155),15]== 1, 2, ifelse(univHpPrec1[c(154:155),15], 1, 99)) 
# The response to the question used by Rawool (2008) (‘I use noisy equipment (such as..) without HP') was reversed (strong disagreement now is regarded as 'hp use').
univHpPrec1[c(156:160),15] <- ifelse(univHpPrec1[c(156:160),15]==1, 2, ifelse(univHpPrec1[c(156:160),15], 1, 99)) 
# Vogel I., Brug J., Van der Ploeg C.P.B. & Raat H. (2010): correct values (i.e. 2=yes).
# Warner-Czyz A.D. & Cain S. (2015): 163-186
univHpPrec1[c(163:186),15] <- ifelse(univHpPrec1[c(163:186),15]== 3, 2, ifelse(univHpPrec1[c(163:186),15], 1, 99)) 
# Weichbold V.& Zorowka P. (2003): row 187-191, correct values (i.e. 2=yes).
# Weichbold V.& Zorowka P. (2007):      "       "   .
# Widén S, Bohlin M, Johansson I. (2011): numeric.
# Widén S.E. (2013): numeric.
# Widen S.E., Holmes A.E. & Erlandsson S.I. (2006)
univHpPrec1[c(195:202),15] <- ifelse(univHpPrec1[c(195:202),15]== 1, 2, ifelse(univHpPrec1[c(195:202),15], 1, 99)) 
# Widén S.E., Holmes A.E., Johnson T., Bohlin M., & Erlandsson (2009): row 203 - 220, correct values (i.e. 2=yes).
# Numeric studies:
univHpPrec1[c(20:23,119:121,148:153,192:194),15] <- 2
View(univHpPrec1)

### Df2:

### Keep only the results for 'hp use = yes' (i.e. value 2):
univHpPrec2 <- univHpPrec1[univHpPrec1$uni.value==2, ]
# Add row numbers
univHpPrec2$row_number <- seq.int(nrow(univHpPrec2))
univHpPrec2 <- relocate(univHpPrec2, row_number, .before = author_year)
# Remove empty row (that suddenly appeared...):
univHpPrec2 <- univHpPrec2[-c(89), ]
View(univHpPrec2)# The DF now has 105 rows.

### Join percentages and rescaled mean scores:

# Auchter M.& Le Prell C.G. (2014), sum the the two '2s':
univHpPrec2[1,16] <- sum(univHpPrec2[c(1:2),16])
univHpPrec2 <- univHpPrec2[-2, ]
# Gilles et al. (2014), sum 'often' and 'always' (i.e. four '2s'):
univHpPrec2[31,16] <- sum(univHpPrec2[c(31:32),16])
univHpPrec2 <- univHpPrec2[-32, ]
univHpPrec2[32,16] <- sum(univHpPrec2[c(32:33),16])
univHpPrec2 <- univHpPrec2[-33, ] # The DF now has 102 rows.
# Add the results of the rescaled means to the column with the percentages:
univHpPrec2[c(9:12),16] <- univHpPrec2[c(9:12),11]
univHpPrec2[c(51:53),16] <- univHpPrec2[c(51:53),11]
univHpPrec2[c(67:72),16] <- univHpPrec2[c(67:72),11]
univHpPrec2[c(86:88),16] <- univHpPrec2[c(86:88),11]
View(univHpPrec2) # The column uni.percentage now has no 'NAs'.


### Df3:

univHpPrec3 <- univHpPrec2
# Results that fall in the same category (e.g. "concert/festival") are AVERAGED, thus only 1 result per study! 
# If N for two homogeneous results differ, then for the (dichotomous) results a WEIGHTED average (taking into account N, not the variance) for that study is calculated. 

# Beach et al. (2016): calculate mean hp-use for concert & festival (n=51 for both subgroups. Therefore, no weights used.) (row 10 and 11):       
univHpPrec3[10,16] <- mean(univHpPrec3[c(10:11),16])
univHpPrec3 <- univHpPrec3[-11, ]
# Add row numbers:
univHpPrec3$row_number <- seq.int(nrow(univHpPrec3))
univHpPrec3 <- relocate(univHpPrec3, row_number, .before = author_year)
View(univHpPrec3) # The df now has 101 rows.

# Widen (2006) and Carter & Black(2017) are present respectively four and ten times, label the subgroups 'a' and 'b' to distinguish them in the plots:
univHpPrec3[c(15:20),2] <- "Carter L. & Black D. (2017) a"
univHpPrec3[c(21:26),2] <- "Carter L. & Black D. (2017) b"
univHpPrec3[c(88:89),2] <- "Widen S.E., Holmes A.E. & Erlandsson S.I. (2006) a"
univHpPrec3[c(90:91),2] <- "Widen S.E., Holmes A.E. & Erlandsson S.I. (2006) b"
# Widen (2006): a=USA, b=Sweden; Carter (2017): a=normal hearing, b=hearing impairment.

# Peters, G.J.Y & Noijen, J. (2019): Do not use results, as neither 'epw' nor 'epw' is similar to measures of 'hp use' in other studies.

univHpPrec3 <- univHpPrec3[-c(66:71), ]
View(univHpPrec3) # The df now has 95 rows.
# Do not use 'epc' (row 67,69,71) and use mean of behavior_concert_epw and behavior_festival_epw:    
# univHpPrec3 <- univHpPrec3[-c(67,69,71), ]
# univHpPrec3[67,11] <- mean(univHpPrec3[c(67,68),11]) 
# univHpPrec3 <- univHpPrec3[-68, ] # Concert is n=272, and festival is n=332. This difference is not taken into account.

# The df 'univHpPrec3' can now be used to calculate the average % of hp use for the specified different settings.


###############################################################################
### 3.1. Mean % of hp use at concerts/festivals ###############################

mean(univHpPrec3[univHpPrec3$varSpec_hp_precise=="concert/festival","uni.percentage"]) # 18.2%. 

# or:
concertHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="concert/festival", ]
mean(concertHp$uni.percentage) # 18.2% (range 1.90% - 61.20%). Unweighted average. The N of the studies ranges from 49 - 130.000!
# Add row numbers:
concertHp$row_number <- seq.int(nrow(concertHp))
concertHp <- relocate(concertHp, row_number, .before = author_year)
View(concertHp) # Df with 17 rows/results from 15 studies.

## Count number of unique studies
concertHpCount <- concertHp %>% group_by(author_year) %>% summarize(count=n())
View(concertHpCount) # Total: 15 studies. Carter (2018) and Widen (2006) both have 2 subgroups and thus each two results.

# In Carter (2018) the groups consist of people with and without a hearing impairment, in Widen (2006) two similar studies have been performed in the US and in Sweden.
# As the researchers and the methods used were the same in both cases, using the two results from each study may decrease heterogeneity of the results in the review. On the other side, the two groups in each study are very different. In the case of Widen (2006), the results even moved in opposite directions. For the latter reason, I use BOTH (!) results of the 2 studies. 

# Basic bar plot:
plotConcertHp <-ggplot(concertHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotConcertHp 


###############################################################################
### 3.2. Mean % of hp use while making music ##################################

mean(univHpPrec3[univHpPrec3$varSpec_hp_precise=="making music","uni.percentage"]) # 13.71%, for 21 rows/results from 10 studies. 

# Some studies include multiple results. Use only an overall/average result from these studies:

makeMusicHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="making music", ]
# Add row numbers:
makeMusicHp$row_number <- seq.int(nrow(makeMusicHp))
makeMusicHp <- relocate(makeMusicHp, row_number, .before = author_year)
View(makeMusicHp)
# Callahan et al.(2011): calculate mean of "Using HPD during ensemble performances" (i.e. hpuse_music_freq) and "Using HPD when practicing or rehearsing" (i.e. hpuse_performance_freq):
makeMusicHp[3,16] <- mean(makeMusicHp[c(3:4),16])
makeMusicHp <- makeMusicHp[-4, ]
# Olson et al. (2014): calculate mean of "hpuse_rehearsal_freq and "hpuse_performance_freq":
makeMusicHp[17,16] <- mean(makeMusicHp[c(17:18),16])
makeMusicHp <- makeMusicHp[-18, ]
# Matei et al. (2018): reports numerous results, use only "hpuse_all_freq" and drop the other results.
makeMusicHp <- makeMusicHp[-c(8:15), ]
View(makeMusicHp) # Df now has 11 rows from 10 studies (Carter, 2017 has results for both subgroups).

# Mean % of hp use while making music (with adjusted df):
mean(makeMusicHp$uni.percentage) # 11.80% (range 0.0% - 35.0%). The N of the studies is not taken into account (i.e. no weighted mean). N ranges from 6 - 211. 
# Basic bar plot:
plotMakeMusicHp <-ggplot(makeMusicHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotMakeMusicHp 


###############################################################################
### 3.3. Mean % of hp use during club/disco visit #############################

discoHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="club/discotheque", ]
# Add row numbers:
discoHp$row_number <- seq.int(nrow(discoHp))
discoHp <- relocate(discoHp, row_number, .before = author_year)
View(discoHp) # The df has 17 rows/results from 15 studies (Carter, 2017 and Widen, 2006 each have 2 results [as they have 2 subgroups]) .

# Mean % of hp use at disco/club:
mean(discoHp$uni.percentage) # 9.55% (range 0.00% - 37.50%). The N of the studies is not taken into account (i.e. no weighted mean). The N ranges from 51 - 1757. 

# Basic bar plot:
plotDiscoHp <-ggplot(discoHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotDiscoHp
dev.off()

###############################################################################
### 3.4. Mean % of hp use during use of power tool ############################

toolsHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="use of power tool", ]
# Add row numbers:
toolsHp$row_number <- seq.int(nrow(toolsHp))
toolsHp <- relocate(toolsHp, row_number, .before = author_year)
View(toolsHp)
# Compose weighted averages for studies with >1 (homogeneous) result:

# Balanay J.A.G. & Kearney G.D. (2015):
toolsHp[1,16] <- with(toolsHp[c(1:2), ], sum(N * uni.percentage)/sum(N))# 31.757%
toolsHp <- toolsHp[-2, ] # Remove old row.
# Hickson et al. (2007):
toolsHp[4,16] <- with(toolsHp[c(4:5), ], sum(N * uni.percentage)/sum(N))
# 28.92%
toolsHp <- toolsHp[-5, ]
# Holmes et al. (2007):
toolsHp[5,16] <- with(toolsHp[c(5:6), ], sum(N * uni.percentage)/sum(N))
# 13.05%
toolsHp <- toolsHp[-6, ]
# Warner-Czyz et al. (2015):
toolsHp[7,16] <- with(toolsHp[c(7:8), ], sum(N * uni.percentage)/sum(N))
# 6.76% 
toolsHp <- toolsHp[-8, ]
# Widén et al. (2009):
toolsHp[8,16] <- with(toolsHp[c(8:9), ], sum(N * uni.percentage)/sum(N))# 21.91%
toolsHp <- toolsHp[-9, ]
View(toolsHp) # Df with 8 rows from 7 studies (Carter, 2017 two results)

# Mean % of hp use while using power tools:
mean(toolsHp$uni.percentage) # 21.69% (range 6.76% - 36.13%). The N of the studies is not taken into account (i.e. no weighted mean). Before (!) scores were averaged (for studies with >1 result), N ranged from 25 - 824.

# The % of hp use while using power tools on average is higher than during lawn mowing. In studies were these 2 behaviors were aggregated, the % may therefore be lower than in studies were only use of power tools was measured. After aggregation a true comparison, thus, is no longer possible. This may also apply to other parts of the review were results that are not completely homogeneous have been aggregated.

# Basic bar plot:
plotToolsHp <-ggplot(toolsHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotToolsHp
dev.off()

###############################################################################
### 3.5. Mean % of hp use during hobby & sports ###############################

hobbyHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="hobby and sports", ]
# Add row numbers:
hobbyHp$row_number <- seq.int(nrow(hobbyHp))
hobbyHp <- relocate(hobbyHp, row_number, .before = author_year)
View(hobbyHp) # df with 9 rows/results from 4 studies.

# Compose weighted averages for studies with >1 (homogeneous) result:

# Holmes et al. (2007):
hobbyHp[2,16] <- with(hobbyHp[c(2:4), ], sum(N * uni.percentage)/sum(N))#14.97%
hobbyHp <- hobbyHp[-c(3:4), ]
# Warner-Czyz et al. (2015):
hobbyHp[3,16] <- with(hobbyHp[c(3:4), ], sum(N * uni.percentage)/sum(N))#2.22%
hobbyHp <- hobbyHp[-4, ]
# Widén et al. (2009):
hobbyHp[4,16] <- with(hobbyHp[c(4:6), ], sum(N * uni.percentage)/sum(N))#9.37%
hobbyHp <- hobbyHp[-c(5:6), ]
View(hobbyHp) # df with 4 rows from 4 studies.

# Mean % of hp use during hobby/sports:
mean(hobbyHp$uni.percentage) #8.14% (range 2.22% - 14.97%). The N of the studies is not taken into account (i.e. no weighted mean). Before scores were averaged (for studies with >1 result), N ranged from 23 - 1285.

# The settings in this category are quite diverse: hp-use is highest for nascar (20-30%), while it is lowest in aerobics class (about 0%).

# Basic bar plot:
plotHobbyHp <-ggplot(hobbyHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotHobbyHp


###############################################################################
### 3.6. Mean % of hp use during shooting #####################################

shootingHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="shooting", ]
# Add row numbers:
shootingHp$row_number <- seq.int(nrow(shootingHp))
shootingHp <- relocate(shootingHp, row_number, .before = author_year)
View(shootingHp) # df with 7 rows/results from 6 studies (Carter, 2017 twice).

# Mean % of hp use while using firearms:
mean(shootingHp$uni.percentage) #52.70% (range 28.00% - 79.20%), for 7 rows/results from 6 studies. The N of the studies is not taken into account (i.e. no weighted mean). The N ranges from 11 - 697.

# Basic bar plot:
plotShootingHp <-ggplot(shootingHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotShootingHp


###############################################################################
### 3.7. Mean % of hp use during work #########################################

workHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="at work", ]
# Add row numbers:
workHp$row_number <- seq.int(nrow(workHp))
workHp <- relocate(workHp, row_number, .before = author_year)
# For Khan et al. (2018) use only the overall result:
workHp <- workHp[-c(4:5), ]
View(workHp) # df with 5 rows/results from 5 studies.
# Mean % of hp use at work:
mean(workHp$uni.percentage) #16.30% (range 10.90% - 29.11%). The N of the studies is not taken into account (i.e. no weighted mean). The N ranges from 50 - 192.

# Basic bar plot:
plotworkHp <-ggplot(workHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotworkHp
dev.off()


###############################################################################
### 3.8. Mean % of hp use while at the pub ####################################
pubHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="pub", ]
# Add row numbers:
pubHp$row_number <- seq.int(nrow(pubHp))
pubHp <- relocate(pubHp, row_number, .before = author_year)
View(pubHp)# df with 3 rows/results from 2 studies. 

# Mean % of hp use at the pub:
mean(pubHp$uni.percentage) #4.83% (range 0.00% - 12.50%). The N of the studies is not taken into account (i.e. no weighted mean). N ranges from 51 - 178 (subgroups: 115 & 63).

# Basic bar plot:
plotPubHp <-ggplot(pubHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotPubHp


###############################################################################
### 3.9. Plot of 'specific', 'ever', and 'general' hp use combined ############

## Make one data frame of 'hp precise', 'hp ever' and 'hp general':

# Hp precise (data 3.1 - 3.8.):
dfP1 <- concertHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP2 <- discoHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP3 <- makeMusicHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP4 <- toolsHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP5 <- hobbyHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP6 <- shootingHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP7 <- workHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP8 <- pubHp[ ,c("varSpec_hp_precise","uni.percentage")]
# Hp ever:
View(univHpEver)
univHpEver <- univHpEver[ ,-7]
univHpEver <- rename(univHpEver, varSpec_hp_precise = varSpec_hp_measure)
dfEv <- univHpEver[ ,c("varSpec_hp_precise","uni.percentage")]
View(dfEv)
# Hp general:
View(univHpGen1)
univHpGen1 <- rename(univHpGen1, varSpec_hp_precise = varSpec_hp_measure)
dfGen <- univHpGen1[ ,c("varSpec_hp_precise","uni.percentage")]
View(dfGen)
# Merge the dfs:
dfPrecise <- dfP1 %>% full_join(dfP2)
dfPrecise <- dfPrecise %>% full_join(dfP3)
dfPrecise <- dfPrecise %>% full_join(dfP4)
dfPrecise <- dfPrecise %>% full_join(dfP5)
dfPrecise <- dfPrecise %>% full_join(dfP6)
dfPrecise <- dfPrecise %>% full_join(dfP7)
dfPrecise <- dfPrecise %>% full_join(dfP8)
dfPrecise <- dfPrecise %>% full_join(dfEv)
dfPrecise <- dfPrecise %>% full_join(dfGen)
View(dfPrecise)
# Rename columns:
dfPrecise <- rename(dfPrecise, setting = varSpec_hp_precise)
dfPrecise <- rename(dfPrecise, percentage = uni.percentage)

## Basic box plot with dots:
precisePlot <- ggplot(dfPrecise, aes(x=setting, y=percentage)) + 
  geom_boxplot()+ geom_dotplot(binaxis='y', stackdir='center', dotsize=0.3) + coord_flip()
precisePlot
dev.off()

# If 'hp use ever' and 'hp use general' are shown in the same plot with the 8 different categories/settings of 'hp precise', these two boxes should be at one extreme of the plot and in a different color. 



##############################################################################
####### Prepare univariate data frame of PSYCHOLOGICAL determinants ##########

## Drop demographic variables and behavior from data frame:
univDet <- univarDf[univarDf$varSpec_oper.constructIdentifier!="inflBiological_79n2w1bj", ]
univDet <- univDet[univDet$varSpec_oper.constructIdentifier!="behaviour_79n2w1bj", ] 
# Add the row numbers:
univDet$row_number <- seq.int(nrow(univDet))
univDet <- dplyr::relocate(univDet, row_number, .before = author_year)
View(univDet)   # df with 659 rows and 79 columns.
# Drop subsamples scores "attitude_soundculture" from Chesky,2009
univDet <- univDet[-c(80:81), ] 
# Gilliver et al. (2012) contains duplicate rows. In addition, 'volume_both_other_score' should be dropped (not usable for review):
univDet <- univDet[-c(207, 210:212), ]                                  
#Drop duplicate rows from Widen (2006):
univDet <- univDet[-c(589,591,593,595), ] # Widen (2006) now has 4 rows/results (row 589 - 592)
View(univDet) # The df now has 649 rows and 79 columns.


###############################################################################
################ Psych. det. data frame for measures of central tendency ###### 
# Only MEAN, as no median has been extracted.
univDetMean <- univDet[complete.cases(univDet[ ,11]), ] # Select results for means.
View(univDetMean)
# Relocate columns:
univDetMean <- relocate(univDetMean, uni.mean_rescaled, .after = uni.mean)
univDetMean <- relocate(univDetMean, uni.sd_rescaled, .after = uni.sd)
# Add row numbers:
univDetMean$row_number <- seq.int(nrow(univDetMean))
univDetMean <- dplyr::relocate(univDetMean, row_number, .before = author_year)
View(univDetMean)
# Gilliver et al. (2012): the mean standardized score on 'risk_perception_79n2fh4t' has to be reversed:
univDetMean[56,12] <- 21.21

View(univDetMean)# df with 168 rows and 79 columns

# Create COUNT overview of psych. det. with MEAN sorted by the parent UCID:
univDetMeanId <- univDetMean %>% group_by(univDetMean$varSpec_oper.constructIdentifier_aggr2)%>% summarise(n = n())
View(univDetMeanId) # 15 DCTs (+ NAs) with range 1 - 24.




##############################################################################
################ Psych. det. data frame for frequencies ######################

# Data is nominal, ordinal, and numeric. 
univDetFreq <- univDet[complete.cases(univDet[ ,12]), ]
# Add row numbers:
univDetFreq$row_number <- seq.int(nrow(univDetFreq))
univDetFreq <- relocate(univDetFreq, row_number, .before = author_year)
View(univDetFreq) # df with 480 rows and 79 columns.

# Relocate columns:
univDetFreq <- relocate(univDetFreq, uni.percentage, .before = uni.minimum)
univDetFreq <- relocate(univDetFreq, varSpec_oper.constructIdentifier_aggr2, .before = uni.minimum)
univDetFreq <- relocate(univDetFreq, varSpec_oper.description, .before = uni.minimum)
univDetFreq <- relocate(univDetFreq, varSpec_oper.comment, .before = uni.minimum)
univDetFreq <- relocate(univDetFreq, studyInfo_pld_use, .before = uni.minimum)
univDetFreq <- relocate(univDetFreq, varSpec_oper.datatype, .before = uni.minimum)
univDetFreq <- relocate(univDetFreq, varSpec_oper.values, .before = uni.minimum)
univDetFreq <- relocate(univDetFreq, varSpec_oper.labels, .before = uni.minimum)
View(univDetFreq)

# COUNT overview of psych. det. for frequencies sorted by the parent UCID:
univDetFreqId <- univDetFreq %>% group_by(univDetFreq$varSpec_oper.constructIdentifier_aggr2)%>% summarise(n = n())
View(univDetFreqId) # 20 DCTs with range of rows 3 - 83. However, 1 item on a 5-points Likert scale count as 5 rows etc., and duplicate rows are included.

# COUNT overview of psych. det. for frequencies sorted by the number of categories used (for ordinal/nominal variables):
univDetFreqId2 <- univDetFreq %>% group_by(univDetFreq$varSpec_oper.values)%>% summarise(n = n())
View(univDetFreqId2) # '0' & '1' & '1' & '2' = 157 rows (=78 results)/ '1', '2' & '3' = 69 rows (=35 results)/ '1', '2', '3' & '4' = 75 rows (=37 results)/ '1', '2', '3', '4' & '5' = 154 rows (=77 results)/ 25 rows have no value (i.e. likely are numeric).

# As a substantial part of the results (>50% of total) have been measured using 3 or more categories, it does not make sense to dichotomize all results. Therefore, when 3 or more categories are used, the result is converted into a (rescaled) MEAN score (with SD) and ADDED to the df for means. The remaining part (i.e. the dichotomous results), will be aggregated separately.


###############################################################################
############ Aggregation of FREQUENCIES for equal DCTs ########################

### Set up:

# First select de results that will be used in the "analysis of means". The frequencies of these studies are used to calculate a mean and SD, these calculated values are added to the respective columns, the min. and max. value are added to the txs-spreadsheet, and subsequently a rescaled M+SD are calculated and added to the df. These rescaled values are used to calculate the weighted averages (using a random effects model) for each DCT (including the new and old studies that report a M+SD).

# Transform all variables that are not used in the analysis of means into dichotomous variables (if not already dichotomous) and, subsequently, aggregate all results (for the same DCT) that express agreement (i.e. "yes", "agree and completely agree", "very likely", etc.).

# The direction of the univariate results will be such that a high score (likely) results in a high level of h.p.b., and vice versa. For some constructs this interpretation is intuitive (e.g. for 'intention'), for other constructs provision of additional info will be needed (e.g. for 'descriptive norms').

# For all the results from Reiness (2018), Griest (2007), and Kamphuis (2018) weighted AVERAGES have to be computed first, as these studies report separate results for subgroups!

# For studies with multiple results in the same category (i.e. with the same DCT) (e.g. Beach et al, 2016), weighted AVERAGES will be used, unless the items are too heterogeneous and/or subcategories are created (e.g. taking into account a specific setting or social referent).


### Checking the DCTs (going from small to large number of results per DCT), to decide if/how data can be aggregated and if results are used as frequencies or means:

# Frequencies that can be transformed, are converted into means using this template:
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234) # Answer frequencies for each of the categories (here 5 cat.).
a # Gives the n for each of the 5 groups.
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
b # Vector of the 125 values.
mean(b)
sd (b) 
# Min.=1, max.=5. (values are needed for rescaling).

# A high score (e.g. 5 on a 5-points scale) indicates a high presence of the specific construct (often high agreement with a statement), and vice versa. If scoring is in the opposite direction, scores have to be reversed: i.e. in the vector for the transformation, fill in the frequency of the highest value (e.g. 5) first, and the frequency of the lowest value (e.g. 1) last.

## 1. cues_79n2w1bj:
cues <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="cues_79n2w1bj", ] 
# Study/studies: 1 (Reiness) with 2 subgroups. 
# Type of data: nominal (2 cat).
# Nr. of results: 1.
# Steps to take: A weighted (for the N of the subgroups) average has to be calculated. 
# Decision: Only 1 result. Drop, or join with data of other construct (and relabel DCT)?
# Transfer to "Means" df: 0

## 2.injunctiveNorms_73dnt5zj:
injNorms <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="injunctiveNorms_73dnt5zj", ] 
View(injNorms)
# Study/studies: 2 (Pouwels & Reiness)
# Type of data: ordinal (5 cat.) & nominal (2 cat.)
# Nr. of results: 2.
# Steps to take: join "agree" and "totally agree" for Pouwels. Use answer "likely" from Reiness. Then join the % of the results from the two studies.
# Decision: Only 2 results. Join results with parent DCT ("attitude"), or drop data? Or could result be joined with results for "means" (see previous part)?
# Transfer to "Means" df: 1, Pouwels, L. (2017). Note: no inj norms in "Means" df! 

# Transform Pouwels, L. (2017):
# hp_if_friends_positive_freq:
a <- 675*c(0.55, 0.29, 0.087, 0.062, 0.011)
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
mean(b) # 1.693
sd (b)# 0.9416
# Min.=1, max.=5.


#3. threat_severity_79n2fh4r:
severity <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="threat_severity_79n2fh4r", ] 
View(severity)
# Study/studies: 2 (Griest & Hoppenbrouwer)
# Type of data: nominal (2 & 5 cat.)
# Nr. of results: 2.
# Steps to take: use results for 'disagree', as the statement is 'reversed'. Griest has 2 subgroups so results first have to be averaged!
# Decision: Only 2 results. Join or drop?
# Transfer to "Means" df: 1, Hoppenbrouwer (2018).

# Transform Hoppenbrouwer (2018):
# hearingloss_problem_freq: ("'I don't think losing part of my hearing would be a big problem"; 'agree', 'neutral', 'disagree', 'I don’t know' & 'no reply'): 'agree'=1, ('neutral' + 'I don’t know'=2), 'disagree'=3, 'no reply' =no used (so 5-point scale becomes 3-point scale and total is not 100%). High disagreement/high score=high severity, so direction is okay.
a <- 1443*c(0.126, 0.212, 0.645)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.528
sd (b) # 0.7114  
# Min.=1, max.=3.


#4. emotion_79n2r0sy:
emotion <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="emotion_79n2r0sy", ] 
View(emotion)
# Study/studies: 1 (Callahan, 2012)
# Type of data: ordinal (5 cat.)
# Nr. of results: 2
# Steps to take: join "agree" and "totally agree" for both results.
# Decision: Only 2 results. Both items measure "worry" (for hearing loss/tinnitus). Items can, therefore, be relabeled as "risk perception".
# Transfer to "Means" df: 1, Callahan (2012). There, relabel as "risk perception".

# Transform Callahan (2012), 2 results:
# Both results had to be reversed (i.e. filled in in reversed order in vector)!
# worried_hearingloss_freq:
a <- 123*c(0.084,0.072,0.264,0.392,0.184)
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
mean(b) # 3.533
sd (b) # 1.13 
# Min.=1, max.=5.

# worried_tinnitus_freq:
a <- 126*c(0.167,0.103,0.365,0.294,0.071)
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
mean(b) # 3.0
sd (b) # 1.166
# Min.=1, max.=5.



#5. instrumentalAttitude_73dnt5zb
instrAtt <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="instrumentalAttitude_73dnt5zb", ] 
View(instrAtt)
# Study/studies: 3 
# Type of data: nominal (2,3 & 5 cat.)
# Nr. of results: 3
# Steps to take: use results for 'agree' and 'yes'. For Gorter (2012) use results for 'disagree', as statement is 'reversed'!
# Decision: Only 3 results. Join or drop?
# Transfer to "Means" df: 2, Gorter, A.F. (2012) & Hoppenbrouwer (2018).

# Transform Gorter, A.F. (2012):
# hpuse_unnecessary_ifhearinggood_freq:
# "'It is not necessary to wear hearing protection if your hearing is still good", with 1=agree, 3=disagree; so scoring does not have to be reversed (high disagreement/high score = high instr. attitude).
a <- 20000*c(0.54,0.19,0.27)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b)# 1.73
sd (b) # 0.8586  
# Min.=1, max.=3.

# Transform Hoppenbrouwer (2018):
# hp_prevent_hearingloss_freq:
# "I am convinced that I can prevent hearing loss....". Scoring has to be reversed. After reversing: 'agree'=3, ('neutral' + 'I don’t know'=2), 'disagree'=1, 'no reply' =not used (so 5-point scale becomes 3-point scale and total is not 100%). High agreement/high score=high instr. attitude.
a <- 1443*c(0.175,0.425,0.39)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b)# 2.217
sd (b)# 0.7241 
# Min.=1, max.=3.


#6. motivationToComply_73dnt5zf
motComply <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="motivationToComply_73dnt5zf", ] 
View(motComply)
# Study/studies: 4
# Type of data: dichotomous
# Nr. of results: 4
# Steps to take: use the 'yes'. All 4 studies are about m.t.c. towards doctor, so that has to be mentioned (i.e. it is not a general m.t.c.)!
# Decision: If used, it is not about the importance of a certain psych. construct, but about the importance of the 'referent' (here: a doctor), so not on the same level as other constructs.
# Transfer to "Means" df: 0.


#7. attitude_73dnt5zc
attitude <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="attitude_73dnt5zc", ] 
View(attitude)
# Study/studies: 3
# Type of data: ordinal (5 cat.)
# Nr. of results: 3
# Steps to take: use the answers that express agreement with h.p.b.
# Decision: Martens is about 'The importance of hearing well versus the importance of loud music', while the other 2 studies are about attitude towards the use of earplugs. Can answer 'Hearing more important than loud music' on this item be aggregated with positive attitudes towards earplug use (in this case 'totally agree' and 'Very important')?
# Transfer to "Means" df: 2, Pouwels, L. (2017) & Zieltjens (2014).

# Transform Pouwels, L. (2017):
# hpuse_important_freq:
# High score=Positive attitude towards wearing earplugs.
a <- 675*c(0.044,0.186,0.103,0.498,0.169)
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
mean(b)# 3.559
sd (b)# 1.107 
# Min.=1, max.=5.

# Transform Zieltjens (2014):
# opinion_hearing_protection_freq:
# "'What do you think about hearing protection?" 'Very important', 'A little bit important', 'Not important', 'I am not thinking about that at all'. The first 3 values are ordinal, but the 4th is not. Scores also have to be reversed.
# New order: 'Very important'=4, 'A little bit important'=3, 'I am not thinking about that at all'=2, 'Not important'=1. This is not completely correct, but it is the best possible option (now high score=pro-earplug attitude).
a <- 95*c(0.0737,0.3158,0.4421,0.1684)
b <- unlist(lapply(1:4, function(i){rep(i,round(a[i]))}))
mean(b) # 2.705
sd (b) # 0.8363
# Min.=1, max.=4.


#8. perceivedBehavioralControl_73dnt603
pbc <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="perceivedBehavioralControl_73dnt603", ] 
View(pbc)
# Study/studies: 3
# Type of data: ordinal (4 cat.)
# Nr. of results: 3 (2 studies by same author, in all studies same instrument used)
# Steps to take: use the agreement answers.
# Decision: Only 3 results. As pbc is a 'parent' results can not be joined with other construct.
# Transfer to "Means" df: 3 studies, Danhauer (2009), Danhauer (2011), You (2020). Note: reorder categories first. 

# Transform Danhauer (2009):
# hearingloss_prevented_freq:
# 'Do you think that hearing loss caused by noise can be prevented?''Yes', 'No', 'Maybe' & 'I don't know'. Join the latter two categories, and switch the order (then high score=high pbc).
a <- 553*c(0.034,0.153,0.823)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b)# 2.78
sd (b)# 0.4899  
# Min.=1, max.=3.

# Transform Danhauer (2012):
# hearingloss_prevented_freq:
# Same question & answers: # 'Do you think that hearing loss caused by noise can be prevented?''Yes', 'No', 'Maybe' & 'I don't know'. Join the latter two categories, and switch the order (then high score=high pbc).
a <- 126*c(0.024,0.087,0.889)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.865
sd (b) # 0.407 
# Min.=1, max.=3.

# Transform You (2020):
# hearingloss_prevented_freq:
# # Same question & answers as in 2 studies by Danhauer: # 'Do you think that hearing loss caused by noise can be prevented?''Yes', 'No', 'Maybe' & 'I don't know'. Join the latter two categories, and switch the order (then high score=high pbc).
a <- 1009*c(0.096,0.242,0.662)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.566
sd (b) # 0.6621  
# Min.=1, max.=3.


#9. perceivedNorms_73dnt5zq
norms <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="perceivedNorms_73dnt5zq", ] 
View(norms)
# Study/studies: 2 
# Type of data: dichotomous & ordinal (5 cat.)
# Nr. of results: 3
# Steps to take: The dichotomous by Chung is not a clear case of "norms": better to drop it. In the 2 items by Hoppenbrouwer are injunctive and descriptive norms, and should be placed in these categories. 
# Decision: Relocate data Hoppenbrouwer. Do not use Chung.
# Transfer to "Means" df: 1 (2 results), Hoppenbrouwer (2018).

# Transform Hoppenbrouwer (2018) [1]:
# hp_friendsdo_freq: 'agree'=3, ('neutral' + 'I don’t know'=2), 'disagree'=1, 'no reply' =not used. So 5-point scale becomes 3-point scale, total is not 100%, and scores are reversed. Then high agreement/high score=high influence of norms on hearing protection.
a <- 1443*c(0.016,0.566,0.283)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.308
sd (b) # 0.5004  
# Min.=1, max.=3.

# Transform Hoppenbrouwer (2018) [2]:
# hp_friendsthink_freq: Same as other item.
a <- 1443*c(0.229,0.722,0.030)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b)# 1.797
sd (b)# 0.4718 
# Min.=1, max.=3.


#10. inflEnvironment_79n2r0sy
inflEnvironment <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="inflEnvironment_79n2r0sy", ] 
View(inflEnvironment)
# Study/studies: 6
# Type of data: 5x dichotomous, 1x ordinal (3 cat.)
# Nr. of results: 6
# Steps to take: Questions are quite heterogeneous and may (also) measure other construct. 
# Decision: Possibly place items in other category (i.e. give other DCT), and drop items that can not be placed elsewhere.
# Transfer to "Means" df: 1, Martens (2006). Note: relabel (i.e. DCT) for Martens?

# Transform Martens (2006):
# volumerestriction_mp3_freq:
# 'Should there be a sound limiter on your walkman?'. Reverse the scoring. Then high score = pro-sound limiter (which DCT?).
a <- 501*c(0.725,0.165,0.11)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b)# 1.385
sd (b) # 0.6762
# Min.=1, max.=3.


#11. motivation_79n2fh4q
motivation <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="motivation_79n2fh4q", ] 
View(motivation)
# Study/studies: 4
# Type of data: nominal (2 cat.), ordinal (3 cat.) & numeric.
# Nr. of results: 6
# Steps to take: The 3 results from Hoover have to be averaged! The results from the other 2 studies are slightly different; too heterogeneous to combine in 1 DCT? The score (i.e. %) for Steen is in the opposite direction (i.e. amotivation)! This score can not simply be reversed, as it does not mean that the other participants are motivated. Drop item? 
# Decision: too heterogeneous to combine in 1 DCT? Possibly relocate (some) items?
# Transfer to "Means" df: 1 study with 3 results [can be averaged], Hoover A. & Krishnamurti S. (2010).

# Transform Hoover A. & Krishnamurti S. (2010) [1]:
# For all 3 questions the original order can be maintained.
# 1. decrease_time_freq:
a <- 390*c(0.162,0.343,0.495)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b)# 2.333
sd (b)# 0.7394
# Min.=1, max.=3.

# Transform Hoover A. & Krishnamurti S. (2010) [2]:
# 2. turndown_volume_freq:
a <- 391*c(0.013,0.358,0.629)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b)# 2.616
sd (b) # 0.5125
# Min.=1, max.=3.


# Transform Hoover A. & Krishnamurti S. (2010) [3]:
# 3. buy_specialearphones_freq: 
a <- 391*c(0.09,0.414,0.496)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.407
sd (b) # 0.6491
# Min.=1, max.=3.


#12. locus_of_control_7ddfj4wn
loc <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="locus_of_control_7ddfj4wn", ] 
View(loc)
# Study/studies: 4
# Type of data: nominal (1 & 3 cat.)
# Nr. of results: 4 (after results for Beach and Kamphuis have been averaged!)
# Steps to take: Do items express L.O.C.? Items appear to measure th same construct, but that may not be L.O.C., or even a psych. construct.
# Decision: Use these outcomes in the discussion, but not presented as a frequency and/or plot?
# Transfer to "Means" df: 0 (all nominal).


#13. intention_73dnt604
intention <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="intention_73dnt604", ] 
View(intention)
# Study/studies: 8
# Type of data: nominal & ordinal (cat. 1 -5)
# Nr. of results: 8 (after results for Beach and Griest have been averaged! Both 2 results)
# Steps to take: all items are in the same direction: i.e. agreement means high intention to engage in h.p.b. (which are quite diverse here), so the % for 'yes', 'agree', etc. can be used to compute an overall outcome. 
# Decision: The univariate data for this construct can be used (as it is).
# Transfer to "Means" df: 3, Danhauer (2009), You(2020), Martens (2006).

# Transform Danhauer (2009):
# volumelimit_software_freq: Reverse scores and make 1 cat. of 'Maybe' & 'I don't know'.
a <- 551*c(0.165, 0.375, 0.459)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.294
sd (b) # 0.7341
# Min.=1, max.=3.

# Transform You(2020):
# volumelimit_software_freq: Reverse scores and make 1 cat. of 'Maybe' & 'I don't know'.
a <- 1009*c(0.181, 0.26, 0.558)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.377
sd (b) # 0.7737
# Min.=1, max.=3.

# Transform Martens (2006):
# intention_hpuse_freq: scores can be used as they are.
a <- 453*c(0.411,0.397,0.172,0.015,0.004)
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
mean(b) # 1.806
sd (b) # 0.805
# Min.=X, max.=X.


#14. descriptiveNorms_73dnt5zp
descriptiveNorms <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="descriptiveNorms_73dnt5zp", ] 
View(descriptiveNorms)
# Study/studies: 6
# Type of data: mostly dichotomous, but also ordinal (5 cat.)
# Nr. of results: 9. The 'reference' may be either 'friends' or '(pop) musicians'; these results should be aggregated separately! So here it is justified that studies report more than >1 result.
# Steps to take: create two subgroups and join responses showing that descriptive norms stimulate h.p.b.
# Decision: Results are split over 2 groups ('friends' and 'musicians'), so result per group are very few. It is not possible to combine these groups. The question then is, like with previous DCTs: is it meaningful to report a summary of fewer than 5 results? What is the minimum number of results needed for such a summary?
# Transfer to "Means" df: 1, Pouwels, L. (2017).

# Transform Pouwels, L. (2017):
# hp_if_friends_wear_freq: high score = high influence of descriptive norms on earplug use.
a <- 675*c(0.559, 0.283, 0.075, 0.075, 0.009)
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
mean(b) # 1.695
sd (b) # 0.9609 
# Min.=1, max.=5.


#15. attitude_soundculture_7c082lmy
attitude_soundculture <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="attitude_soundculture_7c082lmy", ] 
View(attitude_soundculture)
# Study/studies: 5
# Type of data: nominal & ordinal (3 & 4 cat.)
# Nr. of results: 8. Here again, it seems justified to accept more than 1 result per study, as attitude towards level of sound is related to the kind of event (concert, festival, etc.)
# Steps to take: Create subgroups for different kind of events and aggregate % of people in each group that find sound level too high.
# Decision: By spliting up the results, the groups become very small again. However, aggregating all results is not an option, as it will result in a loss of information.
# Transfer to "Means" df: 5 studies, Beach E.F. & Gilliver M. (2019), Gorter, A.F. (2012), Kamphuis, L. (2018), Martens (2006), Zieltjens (2014). Note: all measures should point in the same direction (not the case right now)! If aggregated, information related to the specific context is lost.

# Transform Beach E.F. & Gilliver M. (2019) [2 subgroups; can later be aggregated]:
# 1. perception_sound_levels_nc (nightclub):
# 'Not as loud as you’d like', 'Just right', 'Loud, but tolerable' & 'Louder than you’d like': reverse the scoring, so a high score = positive attitude towards loud noise (just like the YANS).

a <- 555*c(0.26,0.597,0.128,0.014)
b <- unlist(lapply(1:4, function(i){rep(i,round(a[i]))}))
mean(b) # 1.897
sd (b) # 0.6603  
# Min.=1, max.=4.

# Transform Beach E.F. & Gilliver M. (2019) [2 subgroups; can later be aggregated]:
# 2. perception_sound_levels_lm (live music):
# See previous item.
a <- 378*c(0.172,0.626,0.183,0.019)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.011
sd (b) # 0.6017 
# Min.=1, max.=3.

# Transform Gorter, A.F. (2012):
# attitude_volume_freq: 
# Present order: 'sound volume acceptable, no burden'(=1), "sound volume acceptable, but burden" (=2),"sound volume loud, no burden" (=3),"sound volume loud, but burden" (=4).
# New order: "sound volume loud, but burden" (=1), 'sound volume acceptable, but burden'(=2), "sound volume loud, no burden" (=3), 'sound volume acceptable, no burden'(=4). High value = positive attitude towards loud noise.
# Is this acceptable? The scale is not really ordinal... Values for new order:
a <- 130000*c(0.18,0.29,0.35,0.18)
b <- unlist(lapply(1:4, function(i){rep(i,round(a[i]))}))
mean(b) # 2.53
sd (b) # 0.9844 
# Min.=1, max.=4.

# Transform Kamphuis, L. (2018)[3 subgroups; results van later be aggregated]:
# 1. attitude_volume_freq_f (festivals):
# Values are in the correct order (high value = positive attitude towards loud music).
a <- 1086*c(0.33,0.64,0.03)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 1.701
sd (b) # 0.5203 
# Min.=1, max.=3.

# Transform Kamphuis, L. (2018)[3 subgroups; results van later be aggregated]:
# 2. attitude_volume_freq_c (concerts):
# Values are in the correct order (high value = positive attitude towards loud music).
a <- 1326*c(0.34,0.63,0.02)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 1.677
sd (b) # 0.5099
# Min.=1, max.=3.

# Transform Kamphuis, L. (2018)[3 subgroups; results van later be aggregated]:
# 3. attitude_volume_freq_dp (dance party):
# Values are in the correct order (high value = positive attitude towards loud music).
a <- 412*c(0.36,0.55,0.09)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 1.731
sd (b) # 0.6143 
# Min.=1, max.=3.

# Transform Martens (2006):
# volume_nightlife_lower_freq: values are in the correct order (high value = positive attitude towards loud music).
a <- 503*c(0.05,0.342,0.608)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 2.559
sd (b) # 0.5888
# Min.=1, max.=3.

# Transform , Zieltjens (2014):
# volume_nightlife_freq: values are in the correct order (high value = positive attitude towards loud music). 
# Teh original scale is a 5-points scale. However, I convert it to a 3-points scale as the distinction between the categories is not clear; with 3 categories no artificial differences are created (e.g. a score of 2 in the old order was not really different from a score 3): 2&3 and 4&5 are joined. 
a <- 64*c(0.125,0.8594,0.0156)
b <- unlist(lapply(1:3, function(i){rep(i,round(a[i]))}))
mean(b) # 1.891
sd (b) # 0.3615
# Min.=1, max.=3.



#16. autonomy_belief_73dnt5zt
autonomy_belief <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="autonomy_belief_73dnt5zt", ] 
View(autonomy_belief)
# Relocate column:
autonomy_belief <- relocate(autonomy_belief, varSpec_barrier_category, .before = varSpec_oper.constructIdentifier_aggr2)
# Remove empty rows:
autonomy_belief <- autonomy_belief[rowSums(is.na(autonomy_belief)) != ncol(autonomy_belief), ]
# Add row numbers:
autonomy_belief$row_number <- seq.int(nrow(autonomy_belief))
autonomy_belief <- relocate(autonomy_belief, row_number, .before = author_year)
# Remove column:
autonomy_belief <- autonomy_belief[ ,-10]
# Remove duplicate rows:
autonomy_belief <- autonomy_belief[-c(5,7,19:20,22:23), ]
# Adjust incorrect category labels Pouwels (2017:
autonomy_belief[20,9] <- "forget to bring/wear"
autonomy_belief[21,9] <- "other"    
autonomy_belief[22,9] <- "uncomfortable"
View(autonomy_belief)
# Study/studies: 9 
# Type of data: numeric, ordinal, nominal (2-5 cat.)
# Nr. of results: 12
# Steps to take: 'autonomy_belief' stands for 'perceived barriers to use earplugs'. Different barriers are identified. Based on the txs-specification every answer has been placed in a category. Subsequently the average frequency a barrier is named as being a reason not to wear earplugs can be reported. However, the number of results for the barrier categories are very unequal. The 1 result from Chung (2005) likely can not be used, as it does not fit in any of the categories.
# Decision: These results can be used, but probably should be presented separately from the results for the other DCTs (i.e. not in one plot), as the format here is different (not a single average frequency, but a number of (average) frequencies for the 'barrier' categories).
# Transfer to "Means" df: 4 studies could be transferred. However, all items relate to one specific barrier. In the "means df" most scores are an average of the answers to multiple questions measuring different barriers (and, thus, reflecting the overall perceived presence of barriers). Aggregation thus would result in combining single and mean scores. In addition, the information is lost which barriers are perceived to be the most important. Would using the outcomes for an aggregated mean and also treating them categorically (i.e. reporting the frequency each particular barrier is mentioned) be an option? 

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# [COPY]



#17. risk_perception_79n2fh4t
risk_perception <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="risk_perception_79n2fh4t", ] 
View(risk_perception)
# Study/studies: 18 
# Type of data: numeric, ordinal, nominal (2-5 cat.)
# Nr. of results: 22
# Steps to take: Some studies report >1 result (slightly different questions, but same DCT). If no subcategories are constructed, these results have to be averaged so there is 1 result per study.
# Decision: For this, and possible other DCTs, an option could be to compose one overall frequency/score, plus the average frequency for the main sub-themes/questions (if clear themes can be identified).
# Transfer to "Means" df: 9 studies, Gorter, A.F. (2012)(2 results), Rawool V.W. & Collignon-Wayne L.A. (2008),You S.,Kwak C. & Han W. (2020), Beach E.F. & Gilliver M. (2019), Danhauer (2009), Danhauer (2012)(2 results), You S.,Kwak C. & Han W. (2020), Carter L. & Black D. (2017). 
# Lee D. & Han W. (2019) now is dichotomous, but could be reextracted and added to the "means df" (item originally was ordinal, but was dichotomized to facilitate comparison with same item in MTV-survey [which was dichotomous]).

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.


#18. knowledge_79n2fh4b
knowledge <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="knowledge_79n2fh4b", ] 
View(knowledge)
# Study/studies: 19 
# Type of data: numeric, ordinal, nominal (2-5 cat.)
# Nr. of results: Some studies report >1 result.
# Steps to take: as said before, an option could be to compose one overall frequency/score, plus the average frequency for two or more main themes, e.g. a knowledge theme that is often reported, is whether participants know what produces the ringing sound in the ears after exposure to loud sounds.  
# Decision: Not the knowledge level in itself, but if knowledge can be a factor that influences h.p.b. is of importance here. In that light the questions should be evaluated. 
# Transfer to "Means" df: 5 studies, Danhauer (2009)(2 results), Danhauer (2012)(2 results), Hoppenbrouwer (2018), You (2020), Gorter (2012).

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.



#19. threat_susceptibility_79n2fh4s
threat_susceptibility <- univDetFreq[univDetFreq$varSpec_oper.constructIdentifier_aggr2=="threat_susceptibility_79n2fh4s", ] 
View(threat_susceptibility)
# Study/studies: 19 
# Type of data: numeric, ordinal, nominal (2-5 cat.)
# Nr. of results: Some studies report >1 result. For these studies weighted averages have to be computed.
# Steps to take: The questions are fairly homogeneous, so data can quite easily be aggregated.
# Decision: Include.
# Transfer to "Means" df: 10 studies, Callahan (2012), Danhauer (2012), DelGiacco (2015), Gorter, A.F. (2012)(2 results), Herrera (2016), Rawool (2008) (2 results), Steen, L. (2008), Bogosch I.I., House R.A. & Kudla I. (2005), Hoover A. & Krishnamurti S. (2010), Johnson (2014). Note: the 'NA' in the latter should be value '3'.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.

# Transform XXXX (XX):
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234)
# a
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
# b
mean(b)
sd (b) 
# Min.=X, max.=X.



##############################################################################
########## Calculate M+SD from data (if not reported) ########################

View(univDetFreq) # Callahan et al. (2012), "worried_hearingloss_freq", n=123, is used for the following example:
a <- 123*c(0.184,0.392,0.264,0.072,0.084) # The vector contains the % of participants that chose that answer category (for each of the 5 categories).
a # Gives the n for each of the 5 groups.
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
b # Gives vector of the 123 values.
mean(b) # Mean and SD can now be calculated.
sd (b)


# Calculate M+SD for Callahan et al. (2012), variable 'exposure_damage_freq', n=125, frequencies reported for the 5 answer options (5-points Likert scale):
View(threat_susceptibility)
a <- 125*c(0.008, 0.121, 0.226, 0.411, 0.234) # Answer frequencies for each of the 5 categories.
a # Gives the n for each of the 5 groups.
# sum(c(1.000, 15.125, 28.250, 51.375, 29.250)) # 125 (check if n's sum up to N)
b <- unlist(lapply(1:5, function(i){rep(i,round(a[i]))}))
b # Vector of the 125 values.
mean(b) # 3.741935
sd (b) # 0.9783923
# The latter 2 values can now be used in the "means df". 
# To calculate rescaled M+SD, the min. & max. possible scores are needed. In this case (as a 5-points scale is used), the min.=1, and the max.=5. 






############################################################################
################ Aggregation of MEANS for equal DCTs #######################

# Can results of constructs with equal DCTs in 'univDetMean' be aggregated?

# 1. Some means are frequencies. These results sometimes can be aggregated with the rescaled means (these are on the same scale, i.e. from 0 - 100). If not, they can be aggregated with 'frequencies' in the other df.
# 2. For 'perceivedNorms_73dnt5zq' the results for Gilliver (2012) can NOT be aggregated with the other studies (too heterogeneous).
# 3. For 'risk_perception_79n2fh4t' the results for Gilliver (2012) of variable 'risk_volume' should be REVERSED.
# 4. For 'attitude_soundculture_7c082lmy' in Zocoli (2009) two versions have been extracted (i.e. a 7- and 8-item version of YANS Factor 1), use ONLY the 8-item result. In Chesky (2009) use ONLY the result of the total sample, not of the subgroups.
# 5. Take into consideration the hearing protective behavior and setting that has been studied, e.g. 'risk perception' with regard to a pop concert and pld-use is not the same. This means that for some DCTs SEPARATE RESULTS have to be aggregated, depending on the behavior that has been studied. For some determinants, the behavior does not matter, e.g. 'risk severity' (i.e. how severe one perceives hearing problems to be) is independent from the behavior.

# Taking these five points info consideration, the results of constructs with the same DCT can be aggregated.



#############################################################################
################## Standardized scores of DCTs ##############################

### Use the standardized scores of variables with equal DCTs and which refer to comparable behavior and/or settings in 'univDetMean' to build plots:

# Relocate the column containing the DCTs:
univDetMean <- relocate(univDetMean, varSpec_oper.constructIdentifier_aggr2, .before = uni.subsample)
# Relocate the column containing info whether the DCT is linked to PLD-use: 
univDetMean <- relocate(univDetMean, studyInfo_pld_use, .before = uni.subsample)
# For convenience, relocate 'uni_mean' and 'uni.mean_rescaled':
univDetMean <- relocate(univDetMean, uni.mean_rescaled, .before = uni.subsample)
univDetMean <- relocate(univDetMean, uni.mean, .before = uni.subsample)
View(univDetMean)

# Drop the DCTs which (after aggregation) have less than 4 measurements (see 'univDetMeanId'):
univDetMean <- univDetMean[univDetMean$varSpec_oper.constructIdentifier_aggr2!="prototype_79n2fh4t", ] # -1
univDetMean <- univDetMean[univDetMean$varSpec_oper.constructIdentifier_aggr2!="habit_79n2r0sz", ] # -2
univDetMean <- univDetMean[univDetMean$varSpec_oper.constructIdentifier_aggr2!="knowledge_79n2fh4b", ] # -2
univDetMean <- univDetMean[univDetMean$varSpec_oper.constructIdentifier_aggr2!="motivation_79n2fh4q", ] # -2
# Drop variables without DCT:
univDetMean <- univDetMean[complete.cases(univDetMean[ ,5]), ] # -4
View(univDetMean)
# These steps have reduced the number of rows (i.e. results) from 168 to 156.



##############################################################################
################### Create separate groups for PLD & Noise ###################

### Make two separate groups of univDetMean-studies that either measure (only) hearing protective behavior (h.p.b.) when using a PLD or when exposed to loud noise (in leisure and/or work settings) :

# See 'txs-specification--dct-pld' for correct classification. This spreadsheet has not been imported into the script (yet).

### 1) Group PLD-USE (h.p.b. mainly consists of volume restriction):
rows_about_pld <-
        (univDetMean$studyId == "DeBruijn_Spaans_Jansen_VantRiet_2016.rxs.Rmd"|univDetMean$studyId == "Diviani-Zanini-Amann-Chadha-Cieza-Rubinelli--2019.rxs.Rmd"|univDetMean$studyId == "Gilliver-Carter-Macoun-Rosen-Williams--2012.rxs.Rmd"|univDetMean$studyId == "Lee_Jeong_2021.rxs.Rmd"|univDetMean$studyId == "Portnuff_Fligor_Arehart_2011.rxs.Rmd"|univDetMean$studyId == "Portnuff--2011.rxs.Rmd"|univDetMean$studyId == "Vogel-Brug-VanderPloeg-Hein Raat--2011.rxs.Rmd")
univDetMeanPld <- univDetMean[rows_about_pld, ]
# Add the correct row numbers:
univDetMeanPld$row_number <- seq.int(nrow(univDetMeanPld))
univDetMeanPld <- relocate(univDetMeanPld, row_number, .before = author_year)
# Exclude the results that are not specifically linked to pld-use (these results are transferred to 'univDetMeanNoise'): 
univDetMeanPld <-univDetMeanPld[-c(2,25,27), ]
View(univDetMeanPld)


### 2) Group LOUD ENVIRONMENTAL NOISE (h.p.b. here mainly consists of wearing earplugs or preventing exposure, both in leisure settings [e.g. concerts] as during work): 
# Exclude results that are explicitly linked to pld-use: 
univDetMeanNoise <- univDetMean[-c(5,27:31,52,80:84,88:97,116,118,120:122), ]
# Add the correct row numbers:
univDetMeanNoise$row_number <- seq.int(nrow(univDetMeanNoise))
univDetMeanNoise <- relocate(univDetMeanNoise, row_number, .before = author_year)
View(univDetMeanNoise)


### Create COUNT overview of psych. determinants for the 2 subgroups: 

# Noise:
univDetMeanNoiseId <- univDetMeanNoise %>% group_by(univDetMeanNoise$varSpec_oper.constructIdentifier_aggr2)%>% summarise(n = n())
View(univDetMeanNoiseId)
# PLD:
univDetMeanPldId <- univDetMeanPld %>% group_by(univDetMeanPld$varSpec_oper.constructIdentifier_aggr2)%>% summarise(n = n())
View(univDetMeanPldId)

# Applying the criterion of a minimum of 4 measurements:

# univDetMeanNoise: 9 DCTs remain (range 9-24 measurements/DCT); perceivedBehavioralControl_73dnt603 and risk_perception_79n2fh4t are dropped.
univDetMeanNoise <- univDetMeanNoise[univDetMeanNoise$varSpec_oper.constructIdentifier_aggr2 != "perceivedBehavioralControl_73dnt603", ] # -2
univDetMeanNoise <- univDetMeanNoise[univDetMeanNoise$varSpec_oper.constructIdentifier_aggr2 != "risk_perception_79n2fh4t", ] # -3
View(univDetMeanNoise)

# univDetMeanPld: 5 DCTs remain (range 4-6 measurements/DCT); risk_perception_79n2fh4t and intention_73dnt604 are dropped.
univDetMeanPld <- univDetMeanPld[univDetMeanPld$varSpec_oper.constructIdentifier_aggr2 != "intention_73dnt604", ] # -2
univDetMeanPld <- univDetMeanPld[univDetMeanPld$varSpec_oper.constructIdentifier_aggr2 != "risk_perception_79n2fh4t", ] # -1
View(univDetMeanPld)


##############################################################################
################### Plots for MEANS Psych. Det. ##############################

# Make (separate) plots for 'univDetMean', 'univDetMeanNoise', and 'univDetMeanPld': 


### BOX PLOT with the (final) 'univDetMean' dataset: 

#Load fonts (https://fonts.google.com/) and colors:
library(showtext)
font_add_google("Montserrat", "montserrat") 
showtext_auto()
library(dutchmasters)
library(RColorBrewer)
library(paletteer) 
library(ggsci)          # The palet 'default_igv' has > 12 colors.
palettes_d              # For more palettes
display.brewer.all()

# First plot:
univDetPlot <- ggplot(univDetMean, aes(x=varSpec_oper.constructIdentifier_aggr2, y=uni.mean_rescaled, fill=varSpec_oper.constructIdentifier_aggr2)) + geom_boxplot () + geom_dotplot(binaxis='y', stackdir='center', dotsize=0.4, fill="black") + stat_summary(fun.y=mean, geom="point", shape=16, size=4, colour="red") + labs(title="Standardized score by DCT", x= "DCT", y = "Standardized score (0-100)") + theme(plot.title = element_text(hjust = 0, vjust = 3, size = 12, face = "plain")) + theme(text = element_text(family = "roboto")) + scale_fill_paletteer_d("ggsci::default_igv", direction = -1) + guides(fill = "none") + theme_classic() + coord_flip()

univDetPlot

# Improved version of same plot:
univDetPlot2 <- ggplot(univDetMean, aes(x=varSpec_oper.constructIdentifier_aggr2, y=uni.mean_rescaled, fill=varSpec_oper.constructIdentifier_aggr2)) + geom_boxplot(varwidth = F, outlier.shape = NA) + geom_dotplot(binaxis='y', stackdir='center', dotsize=0.2, stackratio = 1.5, fill="black") + stat_summary(fun=mean, geom="point", shape=16, size=3, colour="red") + labs(title="Standardized score by psychological construct", x= "Construct name", y = "Standardized score (0-100)") + scale_x_discrete(labels=c("threat_susceptibility_79n2fh4s" = "Threat susceptibility (n=18)", "threat_severity_79n2fh4r" = "Threat severity (n=13)","risk_perception_79n2fh4t" = "Risk perception (n=4)", "perceivedNorms_73dnt5zq" = "Perceived norms (n=13)","perceivedBehavioralControl_73dnt603" = "Perceived Behavioral Control (n=7)","intention_73dnt604" = "Intention (n=12)","instrumentalAttitude_73dnt5zb" = "Instrumental attitude (n=20)","capacity_73dnt602" = "Capacity (n=9)","autonomy_belief_73dnt5zt" = "Autonomy belief (n=18)","attitude_noise_7c08258d" = "Attitude towards noise in general (n=20)","attitude_soundculture_7c082lmy" = "Attitude towards noise in youth culture (n=24)")) + scale_fill_dutchmasters(palette = "milkmaid") + guides(fill = "none") + theme_bw() + coord_flip()

univDetPlot2 <- univDetPlot2 + theme(plot.title = element_text(hjust = 0.4, vjust = 2, size = 12, face = "plain"))+ theme(text = element_text(family = "montserrat"))
univDetPlot2
# Code to possibly add for scaled points (note that results are not weighted for sample size and only N of total sample has been used!):
 + geom_point(aes(size =studyMethods_N)) + scale_size_area("Sample size", breaks = c(100, 250, 500, 1000, 2000)) + 
 

  
### BOX PLOT for 'univDetMeanNoise' (i.e. studies about h.p.b. and environmental noise):
univDetPlotNoise <- ggplot(univDetMeanNoise, aes(x=varSpec_oper.constructIdentifier_aggr2, y=uni.mean_rescaled, fill=varSpec_oper.constructIdentifier_aggr2)) + geom_boxplot (color="black") + geom_dotplot(binaxis='y', stackdir='center', dotsize=0.2, fill="black") + stat_summary(fun=mean, geom="point", shape=16, size=3, colour="red") + labs(title="Standardized score by psychological construct for loud noise settings", x= "Construct name", y = "Standardized score (0-100)") + scale_x_discrete(labels=c("threat_susceptibility_79n2fh4s" = "Threat susceptibility (n=13)", "threat_severity_79n2fh4r" = "Threat severity (n=9)", "perceivedNorms_73dnt5zq" = "Perceived norms (n=11)", "intention_73dnt604" = "Intention (n=10)","instrumentalAttitude_73dnt5zb" = "Instrumental attitude (n=15)","capacity_73dnt602" = "Capacity (n=9)","autonomy_belief_73dnt5zt" = "Autonomy belief (n=13)","attitude_noise_7c08258d" = "Attitude towards noise in general (n=20)","attitude_soundculture_7c082lmy" = "Attitude towards noise in youth culture (n=24)")) + scale_fill_dutchmasters(palette = "milkmaid") + guides(fill = "none") + theme_bw() + coord_flip()

univDetPlotNoise <- univDetPlotNoise + theme(plot.title = element_text(hjust = 0.4, vjust = 2, size = 12, face = "plain"))+ theme(text = element_text(family = "montserrat"))
univDetPlotNoise



### BOX PLOT for 'univDetMeanPld' (i.e. studies about h.p.b. and pld-use):
univDetPlotPld <- ggplot(univDetMeanPld, aes(x=varSpec_oper.constructIdentifier_aggr2, y=uni.mean_rescaled, fill=varSpec_oper.constructIdentifier_aggr2)) + geom_boxplot (color="black") + geom_dotplot(binaxis='y', stackdir='center', dotsize=0.2, fill="black") + stat_summary(fun=mean, geom="point", shape=16, size=3, colour="red") + labs(title="Standardized score by psychological construct for PLD use", x= "Construct name", y = "Standardized score (0-100)") + scale_x_discrete(labels=c("threat_susceptibility_79n2fh4s" = "Threat susceptibility (n=5)", "threat_severity_79n2fh4r" = "Threat severity (n=4)", "perceivedBehavioralControl_73dnt603" = "Perceived Behavioral Control (n=5)", "instrumentalAttitude_73dnt5zb" = "Instrumental attitude (n=5)", "autonomy_belief_73dnt5zt" = "Autonomy belief (n=5)")) + scale_fill_dutchmasters(palette = "milkmaid") + guides(fill = "none") + theme_bw() + coord_flip()

univDetPlotPld <- univDetPlotPld + theme(plot.title = element_text(hjust = 0.4, vjust = 2, size = 12, face = "plain"))+ theme(text = element_text(family = "montserrat"))
univDetPlotPld



############################################################################

### Notes: 

# Reverse YANS score? Now, for all DCTs a high score (potentially) stimulates h.p.b. (e.g. high score on instrumental attitude, or threat severity). Howver, for the YANS it is the opposite (high score = pro-noise). For people that now the YANS, reversing the scores could however cause confusion.
# Add N for sum of total number of participants for each construct (i.e. sum of participants per study), or mention N in text?

# In the previous box plots NO WEIGHTS have been assigned to studies (!): i.e. studies with small and large samples contribute equally, and the variance within studies is not taken into account.

# The subgroups 'Noise' and 'PLD' can also be JOINED (to prevent small groups). In that case, the responses on the items/questions are used to refer to GENERAL hearing protective behaviours and/or general risks of loud noise (i.e. not specifically to PLD-use or noisy settings such as concerts)? If this is the approach, the "univDetMean" data frame is used for the analyses (and "univDetMeanPld" and "univDetMeanNoise" are ignored). 

# If results from the subgroups 'Noise' and 'PLD' are JOINED, for studies with multiple homogeneous results (e.g. Vogel et al. have measured the same constructs with regard to noise in discotheques and pld-use, using the same sample, for different publications), AVERAGE scores have to be calculated to prevent inclusion of correlated results! 

# Outliers have been checked and there is no reason to correct the data or exclude studies.

# What does each score tell us, e.g. score of 50 on 'autonomy belief' (a.k.a. 'barrier')?

#############################################################################


### YANS

##############################################################################
######### Compare scores of studies on the 1st Factor of the YANS ############
names(univarDf)

table(univarDf$uni.variable[univarDf$varSpec_oper.constructIdentifier == "attitude_soundculture_7c082lmy"])

rows_about_att_soundculture <-
        (univarDf$varSpec_oper.constructIdentifier == "attitude_soundculture_7c082lmy") &
        (univarDf$uni.variable == "attitude_noise"|univarDf$uni.variable == "attitude_soundculture"|univarDf$uni.variable == "attitude_soundculture_8items")

att_soundcultureDf <- univarDf[rows_about_att_soundculture, ]
View(att_soundcultureDf)
## Dropping studies (with incomparable data) from data frame:
att_soundcultureDf <- att_soundcultureDf[att_soundcultureDf$studyId!="OlsenWiden-Erlandsson--2004.rxs.Rmd", ]
att_soundcultureDf <- att_soundcultureDf[att_soundcultureDf$studyId!="Widén-Bohlin-Johansson--2011.rxs.Rmd", ]
att_soundcultureDf <- att_soundcultureDf[att_soundcultureDf$studyId!="Widén--2013.rxs.Rmd", ]
# Add row numbers:
att_soundcultureDf$row_number <- seq.int(nrow(att_soundcultureDf))
att_soundcultureDf <- relocate(att_soundcultureDf, row_number, .before = author_year)
View(att_soundcultureDf)
# Remove duplicate rows Widen et al. (2006), subgroups Chesky (2009), and 7-item score Zocoli,2009:
att_soundcultureDf <- att_soundcultureDf[-c(2,3,19,21,24), ]
# Widen (2006) is present twice, label the subgroups to distinguish them in plots:
att_soundcultureDf[17,2] <- "Widen S.E., Holmes A.E. & Erlandsson S.I. (2006) a"
att_soundcultureDf[18,2] <- "Widen S.E., Holmes A.E. & Erlandsson S.I. (2006) b"
# a=USA, b=Sweden.


#############################################################################
######### Weighted scores (inverse variance method) for YANS F1  ############
library(metafor)
# Make data frame smaller (for convenience):
att_soundcultureDf2 <- att_soundcultureDf[ ,-c(5,7:9,14,17:25,27:71,74:83)]
# Add row numbers:
att_soundcultureDf2$row_number <- seq.int(nrow(att_soundcultureDf2))
att_soundcultureDf2 <- relocate(att_soundcultureDf2, row_number, .before = author_year)
View(att_soundcultureDf2)
# escalc function (not necessary here):
att_sc <- escalc(measure="MN", mi=uni.mean, sdi=uni.sd, ni=N, data=att_soundcultureDf2)
att_sc
# What does 'escalc' do with measure="MN"?
getAnywhere (escalc)
if (measure == "MN") {
                yi <- mi
                vi <- sdi^2/ni
            }
# Studies with NAs are omitted from model. For plot, exclude 'NA' for now:
att_soundcultureDf2 <- att_soundcultureDf2[-c(7), ] 
View(att_soundcultureDf2)

# Random effects model (REML, weighted, i.e. the default settings):
att_sc_fit_RE <- rma(measure="MN", mi=uni.mean, sdi=uni.sd, ni=N, data=att_soundcultureDf2)  # For plot, add "slab=(author_year)".
summary.rma(att_sc_fit_RE) # Weighted average = 2.9566 (95% CI 2.7075 - 3.2057)

weights(att_sc_fit_RE) # Equal weights for all studies (about 5% contribution each to overall average).
# In the fixed effects model, the differences in weights is very large (2 studies make up > 95% of weighted average!).

mean(att_soundcultureDf2$uni.mean) # Average = 2.957.
# In the R.E. model, the estimate thus is EQUAL to this calculated mean! Therefore, there is NO added benefit in using the R.E. model. 

options(scipen = 999) # off / options(scipen = 0) # on

# Basic forest plot R.E. Model:
forest(att_sc_fit_RE, showweights=T, header=TRUE, xlim=c(1.5,4.5), at=seq(1.75,4.25,by=0.25), refline=2.957, xlab="Mean Score on YANS Factor 1")
op <- par(cex=.59, font=2)
text(c(4.0), 20.0, c("Weight"))

par(op)

# The current author names are too long for the plot, so I did not use them for now.

##############################################################################
####### Plots YANS F1 (unweighted) with data frame 'att_soundcultureDf' ######

# YANS F1 plot:
YF1 <- ggplot(att_soundcultureDf, aes(x=uni.mean, y=author_year, fill=author_year)) + geom_bar(stat="identity", color="blue") +
geom_errorbar(aes(xmin=uni.mean-uni.sd, xmax=uni.mean+uni.sd),    width=.4,position=position_dodge(.9))+ labs(title="Attitude towards noise, YANS Factor 1", x= "Mean score and standard deviation", y = element_blank()) +
theme(plot.title = element_text(hjust = 0, vjust = 3, size = 12, face = "plain")) + theme(axis.title.x = element_text(hjust = 0.1, size=10)) + theme(text = element_text(family = "roboto")) + scale_fill_paletteer_d("ggsci::default_igv", direction = -1) + guides(fill = "none")

YF1
# Save last plot in wd:
ggsave("plot.png", width = 15, height = 15) 
# Changes the font of all text lines in the figure:
YF1 + theme(text = element_text(family = "dosis"))
# Unload showtext:
detach("package:showtext", unload = TRUE)
# Remove all plots:
dev.off()
### Possible adjustments of plot:
# Y-axis from A-Z (not Z-A).
# add '+ theme_minimal()' to script.
# use better looking pallet.


###########################################################################
##################### Compare YANS Overall scores #########################

View(univarDf) # DF with author_year as first column (see previous part)

table(univarDf$uni.variable[univarDf$varSpec_oper.constructIdentifier == "attitude_noise_7c08258d"])

rows_about_att_noise <-
        (univarDf$varSpec_oper.constructIdentifier == "attitude_noise_7c08258d")

att_noiseDf <- univarDf[rows_about_att_noise, ]
# View(att_noiseDf)

## Dropping studies (with incomparable data) from data frame:
att_noiseDf <- att_noiseDf[att_noiseDf$studyId!="OlsenWiden-Erlandsson--2004.rxs.Rmd", ]
att_noiseDf <- att_noiseDf[att_noiseDf$studyId!="Widén-Bohlin-Johansson--2011.rxs.Rmd", ]
att_noiseDf <- att_noiseDf[att_noiseDf$studyId!="Widén--2013.rxs.Rmd", ]
View(att_noiseDf)

## Using only one score from three studies with multiple scores:
# att_noiseDf <- att_noiseDf[-8, ]      #Drop duplicate row Hickson,2007
att_noiseDf <- att_noiseDf[-c(17,19), ] #Widen (2006) drop 2 duplicate rows
View(att_noiseDf)

# Widen (2006) is present twice, label the subgroups 'a' and 'b'.
att_noiseDf <- edit(att_noiseDf)
# a=USA, b=Sweden

## Now 'att_noiseDf' is ready for plotting:

# YANS OVERALL plot:
yans_overall <- ggplot(att_noiseDf, aes(x=uni.mean, y=author_year, fill=author_year)) + geom_bar(stat="identity", color="blue") +
geom_errorbar(aes(xmin=uni.mean-uni.sd, xmax=uni.mean+uni.sd),    width=.4,position=position_dodge(.9))+ labs(title="Attitude towards noise, YANS Overall Score (19 items)", x= "Mean score and standard deviation", y = element_blank()) +
theme(plot.title = element_text(hjust = 0, vjust = 3, size = 12, face = "plain")) + theme(axis.title.x = element_text(hjust = 0.1, size=10)) + theme(text = element_text(family = "roboto")) + scale_fill_paletteer_d("ggsci::default_igv", direction = 1) + guides(fill = "none")

yans_overall
##############################################################################
## N.B. After 'sample_categories', and possible other 'txs' variables, have been CORRECTED and vector is no longer split (i.e. redundant rows/observations created in data frame), the script has to be adjusted. If not, rows are removed that should not be removed.
##############################################################################


### BAHPHL


##############################################################################
############### Compare scores on the 7 BAHPHL Subscales #####################

View(univarDf) # DF with author_year as first column (see previous code).

# Select the 8 studies that have used the (unmodified) BAHPHL:
uniBAHPHL <- univarDf[univarDf$studyId=="Degeest-Keppler-Corthals-Clays--2017.rxs.Rmd"|univarDf$studyId=="Degeest-Maes-Leyssens-Keppler--2018.rxs.Rmd"|univarDf$studyId=="Gilles-VandeHeyning--2014.rxs.Rmd"|univarDf$studyId=="Gilles-VanHal-DeRidder-Wouters-VandeHeyning--2013.rxs.Rmd"|univarDf$studyId=="Keppler-Dhooge-Degeest-Vinck--2015.rxs.Rmd"|univarDf$studyId=="Keppler-Dhooge-Vinck--2015.rxs.Rmd"|univarDf$studyId=="Reddy-Nosa-Mafi-Welch--2021.rxs.Rmd"|univarDf$studyId=="Udoh-Adeyemo--2019.rxs.Rmd", ]
View(uniBAHPHL)

# From the 8 studies, select data from the 7 BAHPHL subscales (this step can be skipped):
uBah <- uniBAHPHL[uniBAHPHL$varSpec_oper.constructIdentifier=="intention_73dnt604"|uniBAHPHL$varSpec_oper.constructIdentifier=="threat_susceptibility_79n2fh4s"|
uniBAHPHL$varSpec_oper.constructIdentifier=="threat_severity_79n2fh4r"|
uniBAHPHL$varSpec_oper.constructIdentifier=="instrumentalAttitude_73dnt5zb"|
uniBAHPHL$varSpec_oper.constructIdentifier=="autonomy_conditionPresence_73dnt5zr"|uniBAHPHL$varSpec_oper.constructIdentifier=="perceivedNorms_73dnt5zq"|
uniBAHPHL$varSpec_oper.constructIdentifier=="capacity_73dnt602",  ]
View(uBah)

#### Compare the M+SD on the 7 BAHPHL SUBSCALES with a PLOT:

### 1)INTENTION:
uBah_intention <- uniBAHPHL[uniBAHPHL$varSpec_oper.constructIdentifier=="intention_73dnt604", ]
View(uBah_intention)

# BAHPHL INTENTION plot (RColorBrewer palette):
intentionPlot <- ggplot(uBah_intention, aes(x=uni.mean, y=author_year, fill=author_year)) + geom_bar(stat="identity", color="blue") +
geom_errorbar(aes(xmin=uni.mean-uni.sd, xmax=uni.mean+uni.sd),    width=.4,position=position_dodge(.9))+ labs(title="Intention BAHPHL subscale", x= "Mean score and standard deviation", y = element_blank()) +
theme(plot.title = element_text(hjust = 0, vjust = 3, size = 12, face = "plain")) + theme(axis.title.x = element_text(hjust = 0.1, size=10)) + theme(text = element_text(family = "roboto")) + scale_fill_brewer(palette="Set2") + guides(fill = "none")

intentionPlot
# Same plot, with a Paletteer palette:
intentionPlot <- ggplot(uBah_intention, aes(x=uni.mean, y=author_year, fill=author_year)) + geom_bar(stat="identity", color="blue") +
geom_errorbar(aes(xmin=uni.mean-uni.sd, xmax=uni.mean+uni.sd),    width=.4,position=position_dodge(.9))+ labs(title="Intention BAHPHL subscale", x= "Mean score and standard deviation", y = element_blank()) +
theme(plot.title = element_text(hjust = 0, vjust = 3, size = 12, face = "plain")) + theme(axis.title.x = element_text(hjust = 0.1, size=10)) + theme(text = element_text(family = "roboto")) + scale_fill_paletteer_d("ggsci::default_igv", direction = -1) + guides(fill = "none")

intentionPlot

## 2) THREAT SUSCEPTIBILITY:

uBah_susceptibility <- uniBAHPHL[uniBAHPHL$varSpec_oper.constructIdentifier=="threat_susceptibility_79n2fh4s", ]
View(uBah_susceptibility)

# BAHPHL threat susceptibility plot:
susceptibilityPlot <- ggplot(uBah_susceptibility, aes(x=uni.mean, y=author_year, fill=author_year)) + geom_bar(stat="identity", color="blue") +
geom_errorbar(aes(xmin=uni.mean-uni.sd, xmax=uni.mean+uni.sd),    width=.4,position=position_dodge(.9))+ labs(title="Threat susceptibility BAHPHL subscale", x= "Mean score and standard deviation", y = element_blank()) +
theme(plot.title = element_text(hjust = 0, vjust = 3, size = 12, face = "plain")) + theme(axis.title.x = element_text(hjust = 0.1, size=10)) + theme(text = element_text(family = "roboto")) + scale_fill_brewer(palette="Set2") + guides(fill = "none")

susceptibilityPlot

### 3) THREAT SEVERITY:

# Etc.






###############################################################################
############################ Make 'TABLE 1' for review ########################

# In Table 1 the following variables are presented in columns (from left to right): 1.Study (names of authors), 2. Year of publication, 3. Country, 4. Number of participants, 5. Description participants (categories), 6. Age of participants, 7. Gender, 8. Leisure/work/both, 9. Hearing-protective behavior (categories), 10. Psychological determinants assessed, 11. Theoretical framework, 12. Grey literature (yes/no). If the table becomes to large, some items will have to be presented elsewhere.

table1 <- univarDf
View(table1)
table1 <- relocate(univarDf, studyMethods_N, .before = N)

t1_age <- table1[table1$uni.variable=="age", ]
# Add row numbers:
t1_age$row_number <- seq.int(nrow(t1_age))
t1_age <- relocate(t1_age, row_number, .before = author_year)
# Remove duplicate rows:
t1_age <- t1_age[-c(22,29,31,35,41,66:73,98,101,105:106), ]
View(t1_age) # The df now has 105 rows.

# For some studies no mean age is reported but only min. & max., or the frequencies for age categories are reported (these studies still occupy multiple rows), and for some studies no age is reported at all. 

# Still to finish.......





########################################################################

Extracted constructs

Aspects per UCID

allMethods <-
  metabefor::get_singleValue(
    studies,
    "methodType"
  );

qualitativeStudies <-
  allMethods[allMethods$methodType == "qualitative", "Id"];
allAspects <-
  metabefor::rbind_df_list(
    lapply(
      qualitativeStudies,
      function(studyId) {
        
        allVars <-
          data.tree::FindNode(
            studies$rxsTrees[[studyId]],
            "variables"
          )$children
        
        allVars <-
          data.tree::Get(
            allVars,
            "value"
          );
        
        varsList <-
          lapply(
            allVars,
            as.data.frame
          );
        
        varsDf <-
          metabefor::rbind_df_list(varsList);
        
        varsDf$studyId <- rep(studyId, nrow(varsDf));
  
        return(
          varsDf
        );
      }
    )
  );

allAspects$ucid <-
  ifelse(
    is.na(allAspects$aspect.constructIdentifier),
    allAspects$oper.constructIdentifier,
    allAspects$aspect.constructIdentifier
  );

allAspects$aspectContent <-
  ifelse(
    is.na(allAspects$asp.description) ||
      (nchar(trimws(allAspects$asp.description)) == 0),
    allAspects$asp.content,
    allAspects$asp.description
  );

for (current_ucid in sort(unique(allAspects$ucid))) {
  
  cat("\n\n#### ", current_ucid, "\n\n", sep="");
  
  cat(
    paste0(
      "\n- ",
      allAspects[
        allAspects$ucid == current_ucid,
        "aspectContent"
      ],
      " <span style='color: gray'>`",
      allAspects[
        allAspects$ucid == current_ucid,
        "studyId"
      ],
      "`</span>\n"
    )
  );
}