This file is part of the repository hosted at https://gitlab.com/extending-the-earcheck/living-review, and its rendered version is hosted by GitLab Pages at https://extending-the-earcheck.gitlab.io/living-review. The Google Sheet holding the extraction script template is available from https://docs.google.com/spreadsheets/d/1duDKLMmhel_5fBPhF_H-0Dbic896-5eCef9bBCSJMco.
if (!(installed.packages()['ufs', 'Version'] >= "0.4")) {
stop("You need to have at least version 0.4 of the `ufs` package installed; ",
"install it with:\n\ninstall.packages('ufs');");
}
# devtools::load_all("B:/Data/R/metabefor");
### Get dev version of metabefor
ufs::quietGitLabUpdate("r-packages/metabefor", quiet = FALSE);
## Skipping install of 'metabefor' from a gitlab remote, the SHA1 (4f414c33) has not changed since last install.
## Use `force = TRUE` to force installation
### Get version of revtools with bugfix
# ufs::quietRemotesInstall("matherion/revtools@bibtex-importing-bugfix",
# func = "install_github", quiet = FALSE);
# ufs::quietRemotesInstall("matherion/synthesisr@bibtex-import-bugfix",
# func = "install_github", quiet = FALSE);
ufs::quietRemotesInstall("rmetaverse/synthesisr",
func = "install_github", quiet = FALSE);
## Skipping install of 'synthesisr' from a github remote, the SHA1 (c406bc9f) has not changed since last install.
## Use `force = TRUE` to force installation
### Get additional packages
ufs::checkPkgs('here'); ### To easily access files
### using 'relative paths'
ufs::checkPkgs("googlesheets4"); ### To import data from google
### sheets in metabefor
ufs::checkPkgs("synthesisr"); ### To import RIS files
ufs::checkPkgs("stringdist"); ### To compute string distances
###-----------------------------------------------------------------------------
### Settings
###-----------------------------------------------------------------------------
### By default show R code and don't comment output
knitr::opts_chunk$set(echo = TRUE);
knitr::opts_chunk$set(comment = NA);
runScreeningChunks = FALSE;
runExtractionChunks = FALSE;
importExtractedStudiesFromRxsFiles <- FALSE;
runStudyTreePreparation <- FALSE;
###-----------------------------------------------------------------------------
### Paths
###-----------------------------------------------------------------------------
### Set base path
basePath <- here::here();
### Set path for query hit exports
queryHitExportPath <- file.path(basePath, "search");
### Set path for screening
screeningPath <- file.path(basePath, "screening");
### Set path for any rxs specifications
rxsSpecificationsPath <-
file.path(basePath, "rxs-specifications");
### Set path for extraction script template
extractionScriptTemplatePath <-
file.path(basePath, "extraction");
extractionScriptPath <-
file.path(basePath, "extraction", "Extracted studies");
### Set path for intermediate output
outputPath <- file.path(basePath, "output");
### Set path for data files
dataPath <- file.path(basePath, "data");
###-----------------------------------------------------------------------------
### Extraction Script and Aggregation Tree Google sheets URLs
###-----------------------------------------------------------------------------
sheetsURL <- paste0("https://docs.google.com/spreadsheets/d/",
"1duDKLMmhel_5fBPhF_H-0Dbic896-5eCef9bBCSJMco");
aggregationTreeURL <- paste0("https://docs.google.com/spreadsheets/d/",
"1P5IZekDxAiW3B_TLz392wd68alv4HFF5eTnoPwpGs_Q");
txsIDs <-
c("12WldMPYFkJIU2VaYTg0Ysj-kiSe32eUY25Dghcnochs", ### theories
"18DQ0tPqqsiSIrrXJkAZ4MNsH27i2fSX588XwdUam8uo", ### hp-measure
"1XOi1bgmewvVeorQcNXNZHGVwi_PEQxRO2l_6vOzJst8", ### hp-precise
"1bpcXEXQYJcmilX4F_ZlJ5RsO2B-Ekkz0B2mVnygUEso", ### pld-use
"1DQZtdLYhYNa6CISxOrWpyvkEsarHDr9LaeuAyGir4d4", ### barrier
"1wg4PqFnZ2-L6MLYvOPOD4pUy_JbmcA5wlIxDeHwzf-U", ### participant-categories
"1KysisDj5bG9j8F4uIU_7C_jIBhA3KeUvKlOCGNHVBCM", ### grey-literature
"1FSxP6rdAtJckLrSfIShDV0kkU99edPbQclBTTqebg2I", ### scale ranges
NULL);
###-----------------------------------------------------------------------------
### Extraction administration
###-----------------------------------------------------------------------------
extraction_administration_columns_all <-
c("qurid", "doi", "title", "year", "author",
"included", "fulltext", "extractor", "exclude_check",
"exclude_final", "finished", "comments");
extraction_administration_columns_firstBatch <-
c(extraction_administration_columns_all,
"screening_stage1", "included_by_B_not_by_A",
"screenerastatus", "screenerbstatus1",
"screeneraconfidence", "screenerbconfidence");
extraction_administration_columns_secondBatch <-
c(extraction_administration_columns_all,
"screening_stage2", "included_by_B_not_by_A",
"screener_a_status_r2", "screenerbstatus1",
"screeneraconfidence", "screenerbconfidence");
###-----------------------------------------------------------------------------
### Import rxs specifications
###-----------------------------------------------------------------------------
### devtools::load_all("B:/Data/R/metabefor");
options(metabefor.debug = TRUE);
fullObject <-
metabefor:::rxs_fromSpecifications(
gs_url = sheetsURL,
entitiesFilename = file.path(rxsSpecificationsPath,
"entities-local-copy.csv"),
valueTemplatesFilename = file.path(rxsSpecificationsPath,
"valueTemplates-local-copy.csv"),
localBackup = list(
entities = file.path(rxsSpecificationsPath,
"entities-local-copy.csv"),
valueTemplates = file.path(rxsSpecificationsPath,
"valueTemplates-local-copy.csv"),
definitions = NULL
),
outputFile = file.path(
extractionScriptTemplatePath,
"extractionScriptTemplate.rxs.Rmd"
),
instructionHeadingLevel = 4,
returnFullObject = TRUE
);
v Reading from "rxs-template-EarAct".
v Range ''entities''.
v Reading from "rxs-template-EarAct".
v Range ''valueTemplates''.
v Reading from "rxs-template-EarAct".
v Range ''definitions''.
v Reading from "rxs-template-EarAct".
v Range ''instructions''.
Successfully read the extraction script specifications from Google sheets.
Stored local backup of entities to 'C:/Users/tdeze/Documents/Earcheck/living-review/rxs-specifications/entities-local-copy.csv'.
Stored local backup of value templates to 'C:/Users/tdeze/Documents/Earcheck/living-review/rxs-specifications/valueTemplates-local-copy.csv'.
rxs_parseEntities read an entity spreadsheet with the following columns: 'identifier', 'parent', 'title', 'description', 'valueTemplate', 'validValues', 'default', 'examples', 'entityRef', 'fieldRef', 'owner', 'list', 'collapsing', 'repeating', 'recurring', 'recursing' & 'identifying'.
Parsed extraction script specifications into extraction script template.
Created diagrams representing the extraction tree.
Successfully wrote extraction script template to 'C:/Users/tdeze/Documents/Earcheck/living-review/extraction/extractionScriptTemplate.rxs.Rmd'.
Here the search results are imported, deduplicated, and prepared for the screeners.
originFile_col <- "origin_file";
originDatabase_col <- "origin_database";
originInterface_col <- "origin_interface";
originDate_col <- "origin_date";
### Get all subdirectories; search hits are placed in alphabetically
### ordered subdirectories.
bibliographyHitDirs <-
list.dirs(
queryHitExportPath,
full.names = FALSE,
recursive = FALSE
);
### Sort alphabetically
bibliographyHitDirs <-
sort(bibliographyHitDirs);
### Get all files in each subdirectory
bibliographyHitFiles <-
lapply(
file.path(queryHitExportPath, bibliographyHitDirs),
list.files,
full.names = TRUE,
pattern = "\\.ris$",
ignore.case = TRUE,
recursive = TRUE
);
bibliographyHitFiles <-
lapply(
bibliographyHitFiles,
grep,
pattern = "_preprocessed",
ignore.case = TRUE,
invert = TRUE,
value = TRUE
);
names(bibliographyHitFiles) <- bibliographyHitDirs;
### Get a list of subdirectories with one file or more
bibliographyHitFiles <-
bibliographyHitFiles[
unlist(
lapply(
bibliographyHitFiles,
length
)
) > 0
];
### Process the files in these subdirectories, reading all files
### separately to keep track of every entry's origin
bibliographyHits <-
lapply(
names(bibliographyHitFiles),
function(currentFileList) {
cat("\nStarting to read directory '", currentFileList, "':\n", sep="");
return(
lapply(
bibliographyHitFiles[[currentFileList]],
function(currentFile) {
cat("\nStarting to read file '", currentFile, "':\n", sep="");
cat("\nPreprocessing file.\n");
newFileName <- paste0(
tools::file_path_sans_ext(currentFile),
"_preprocessed_for_synthesisr.",
tools::file_ext(currentFile)
);
fileAsText <-
readLines(currentFile);
# fileAsText <-
# gsub("^T1", "TI", fileAsText);
# ### Remove all lines that don't start with a RIS tag
# fileAsText <-
# fileAsText[grepl("^\\s*$|^[A-Z][A-Z0-9]\\s{2}-\\s", fileAsText)];
# ### Remove C1 lines (weird stuff Ebsco Embase exports added in)
# fileAsText <-
# fileAsText[!grepl("^C1\\s{2}-\\s", fileAsText)];
### Get all lines that start with TY and ER; depends on whether
### the file is a pubmed export
if (any(grepl("PMID-", fileAsText))) {
TY_regex <- "PMID-";
ER_regex <- "SO -"
### Actually, no further processing required; pubmed isn't
### where the problem is
} else {
TY_regex <- "TY -";
ER_regex <- "ER -"
TY_lines <- grep(TY_regex, fileAsText);
ER_lines <- grep(ER_regex, fileAsText);
if (length(TY_lines) != length(ER_lines)) {
stop("Something seems to have gone wrong; I found ",
length(TY_lines), " lines that start a RIS record ",
"but ", length(ER_lines), " lines that end one.");
}
### Get all indices in between
recordlines <-
do.call(
c,
mapply(
seq,
TY_lines,
ER_lines,
SIMPLIFY = FALSE
)
);
fileAsText <- fileAsText[recordlines];
}
writeLines(
fileAsText,
newFileName
);
cat("Importing file.\n");
res <-
#revtools::read_bibliography(
synthesisr::read_refs(
newFileName
);
res[, originFile_col] <-
basename(currentFile);
return(res);
}
)
)
}
);
names(bibliographyHits) <- names(bibliographyHitFiles);
### Process all lists of dataframes in each directory
bibHitDfs <-
lapply(
bibliographyHits,
metabefor::rbind_df_list
);
names(bibHitDfs) <- names(bibliographyHits);
###-----------------------------------------------------------------------------
### Set the DOIs
###-----------------------------------------------------------------------------
for (i in names(bibHitDfs)) {
if (!('doi' %in% names(bibHitDfs[[i]]))) {
bibHitDfs[[i]]$doi <- NA;
}
### Extract DOIs from 'article_id' column
if ('article_id' %in% names(bibHitDfs[[i]])) {
bibHitDfs[[i]]$doi <-
ifelse(
is.na(bibHitDfs[[i]]$doi),
metabefor::extract_doi(
bibHitDfs[[i]]$article_id
),
bibHitDfs[[i]]$doi
);
}
### Potentially add from 'DO' column
if ('DO' %in% names(bibHitDfs[[i]])) {
bibHitDfs[[i]]$doi <-
ifelse(
is.na(bibHitDfs[[i]]$doi),
bibHitDfs[[i]]$DO,
bibHitDfs[[i]]$doi
);
}
}
###-----------------------------------------------------------------------------
### Internal deduplication
###-----------------------------------------------------------------------------
detailedDuplicateInfo <- list();
for (i in names(bibHitDfs)) {
cat("\n\n**Processing ", i, "**\n\n", sep="");
detailedDuplicateInfo[[i]] <-
list(
internal_res = metabefor::duplicate_sources(
bibHitDfs[[i]],
silent = FALSE,
returnRawStringDistances = TRUE
)
);
detailedDuplicateInfo[[i]]$details_internal <-
attr(detailedDuplicateInfo[[i]]$internal_res, "duplicateInfo");
bibHitDfs[[i]]$internalDuplicates <-
detailedDuplicateInfo[[i]]$internal_res;
}
###-----------------------------------------------------------------------------
### Add unique identifiers to every record for future reference
###-----------------------------------------------------------------------------
### Create quasi-unique record identifiers (qurids) for all records;
### always starting from the same date.
recordsPerSearchBatch <-
unlist(
lapply(
bibHitDfs,
nrow
)
);
totalRecords <- sum(recordsPerSearchBatch);
recordsPerSearchBatch_lastIndices <-
cumsum(recordsPerSearchBatch);
recordsPerSearchBatch_firstIndices <-
c(1, head(recordsPerSearchBatch_lastIndices, -1) + 1);
quridIndices <-
mapply(
c,
recordsPerSearchBatch_firstIndices,
recordsPerSearchBatch_lastIndices,
SIMPLIFY = FALSE
);
names(quridIndices) <- names(recordsPerSearchBatch);
### Generate the required number of QURIDs
totalQURIDs <-
metabefor::generate_qurids(
totalRecords,
origin=as.POSIXct("2020-11-19 18:00:00")
);
for (i in names(bibHitDfs)) {
cat("\n\n**Generating QURIDS for ", i, "**\n\n", sep="");
if (is.null(bibHitDfs[[i]]$qurid)) {
bibHitDfs[[i]]$qurid <-
totalQURIDs[quridIndices[[i]][1]:quridIndices[[i]][2]];
} else {
bibHitDfs[[i]]$qurid <-
ifelse(is.na(bibHitDfs[[i]]$qurid) |
(nchar(bibHitDfs[[i]]$qurid) == 0),
totalQURIDs[quridIndices[[i]][1]:quridIndices[[i]][2]],
bibHitDfs[[i]]$qurid);
}
### Also rename "ID" field to something else; this is a reserved
### field name for JabRef (at least for version 2.11, which we'll
### use for the screening)
if ("ID" %in% names(bibHitDfs[[i]])) {
names(bibHitDfs[[i]])[names(bibHitDfs[[i]]) == "ID"] <-
"old_ID_field";
}
}
ufs::cat(
"A total of ",
nrow(bibHitDfs[[1]]),
" bibliographic entries have been read from ",
length(unique(bibHitDfs[[1]][, originFile_col])),
" files.",
"\n\n",
"Out of those ",
nrow(bibHitDfs[[1]]),
" entries, a DOI is available for ",
sum(!is.na(bibHitDfs[[1]]$doi)),
" entries (so ",
sum(is.na(bibHitDfs[[1]]$doi)),
" do not have a DOI)."
);
###-----------------------------------------------------------------------------
### Process first search batch
### Note that these are sorted by batch
###-----------------------------------------------------------------------------
screenerPackages <-
metabefor::write_screenerPackage(
bibliographyDf = bibHitDfs[[1]],
outputPath = screeningPath,
basename = "stage1_",
duplicateField = "duplicate"
);
### Potentially, to screen with revtools:
# revtools::screen_titles(bibHitDf[[1]]);
This spreadsheet shows an overview of the screened articles
filesToRead_firstBatch <-
list.files(
screeningPath,
pattern = "^tiab_stage1_.*\\.bib$",
recursive = TRUE,
full.names = TRUE
);
screened_firstBatch_screenerA <-
synthesisr::read_refs(
filesToRead_firstBatch[1]
);
screened_firstBatch_screenerB <-
synthesisr::read_refs(
filesToRead_firstBatch[2]
);
screened_firstBatch_merged <-
merge(
screened_firstBatch_screenerA[
,
grep("screenerbstatus",
names(screened_firstBatch_screenerA),
invert = TRUE)],
screened_firstBatch_screenerB[
,
c("qurid", "screenerbstatus1")
],
by = "qurid",
all = TRUE
);
screened_firstBatch_merged$included <-
ifelse(grepl("incl", screened_firstBatch_merged$screenerastatus, ignore.case=TRUE) |
grepl("incl", screened_firstBatch_merged$screenerbstatus1, ignore.case=TRUE),
"incl",
"");
screened_firstBatch_merged$screening_stage1 <-
paste0(
screened_firstBatch_merged$screenerastatus,
"|",
screened_firstBatch_merged$screenerbstatus1,
">",
screened_firstBatch_merged$included
);
screened_firstBatch_merged$included_by_B_not_by_A <-
!grepl("inc", screened_firstBatch_merged$screenerastatus,
ignore.case = TRUE) &
grepl("inc", screened_firstBatch_merged$screenerbstatus1,
ignore.case = TRUE);
screened_firstBatch_merged[,
setdiff(
extraction_administration_columns_firstBatch,
names(screened_firstBatch_merged)
)] <- "";
openxlsx::write.xlsx(
screened_firstBatch_merged[
screened_firstBatch_merged$included == "incl",
extraction_administration_columns_firstBatch],
file = file.path(screeningPath,
"screening_overview--stage1--generated.xlsx")
);
###-----------------------------------------------------------------------------
### Process second search batch
###-----------------------------------------------------------------------------
secondBatchDuplicates <-
metabefor::duplicate_sources(
screened_firstBatch,
bibHitDfs[[2]],
silent = FALSE
);
secondBatchDuplicateInfo <-
attr(secondBatchDuplicates, "duplicateInfo");
bibHitDfs[[2]]$duplicate <-
secondBatchDuplicates;
###-----------------------------------------------------------------------------
### Merge search hits and write screener package
###-----------------------------------------------------------------------------
round2_screening <-
metabefor::rbind_dfs(
screened_firstBatch,
bibHitDfs[[2]]
);
hasNoContents <- function(x) {
isNA <- is.na(x);
hasChars <- nchar(x) > 0;
return(ifelse(isNA, TRUE, !hasChars));
}
hasContents <- function(x) {
isNA <- is.na(x);
hasChars <- nchar(x) > 0;
return(ifelse(isNA, FALSE, hasChars));
}
round2_screening$to_be_screened <-
hasNoContents(round2_screening$duplicate) &
hasNoContents(round2_screening$screening_stage1);
### Strip duplicates and records that were already screened in round 1
round2_screening_onlyNew <-
round2_screening[
round2_screening$to_be_screened,
]
screenerPackages_round2 <-
metabefor::write_screenerPackage(
bibliographyDf = round2_screening_onlyNew,
screeners = c("a", "b"),
screenerFieldsPrefix = "screener_",
screenerFieldsSuffix = "_status_r2",
outputPath = screeningPath,
prevRoundField = "screening_stage1",
basename = "stage2_",
duplicateField = "duplicate",
silent=FALSE
);
### Test
# test_screening2_pkgs <-
# synthesisr::read_refs(
# file.path(
# screeningPath,
# "stage2_a",
# "stage2_a.bib"
# )
# );
ufs::cat0(
"After the first round of screening, and after eliminating duplicate records, there are ",
nrow(round2_screening_onlyNew),
" records left to screen for the second search."
);
This spreadsheet shows an overview of the screened articles.
filesToRead_secondBatch <-
list.files(
screeningPath,
pattern = "^tiab_stage2_.*\\.bib$",
recursive = TRUE,
full.names = TRUE
);
screened_secondBatch_screenerA <-
synthesisr::read_refs(
filesToRead_secondBatch[1]
);
screened_secondBatch_screenerB <-
synthesisr::read_refs(
filesToRead_secondBatch[2]
);
### Adding screening judgement of screener B to file of first screener
screened_secondBatch_merged <-
merge(
screened_secondBatch_screenerA,
screened_secondBatch_screenerB[ , c("qurid", "screenerbstatus1")],
by = "qurid",
all = TRUE
);
screened_secondBatch_merged$included <-
ifelse(grepl("incl", screened_secondBatch_merged$screener_a_status_r2,
ignore.case=TRUE) |
grepl("incl", screened_secondBatch_merged$screenerbstatus1,
ignore.case=TRUE),
"incl",
"");
screened_secondBatch_merged$screening_stage2 <-
paste0(
screened_secondBatch_merged$screener_a_status_r2,
"|",
screened_secondBatch_merged$screenerbstatus1,
">",
screened_secondBatch_merged$included
);
screened_secondBatch_merged$included_by_B_not_by_A <-
!grepl("inc", screened_secondBatch_merged$screener_a_status_r2,
ignore.case = TRUE) &
grepl("inc", screened_secondBatch_merged$screenerbstatus1,
ignore.case = TRUE);
screened_secondBatch_merged[,
setdiff(
extraction_administration_columns_secondBatch,
names(screened_secondBatch_merged)
)] <- "";
openxlsx::write.xlsx(
screened_secondBatch_merged[
screened_secondBatch_merged$included == "incl",
extraction_administration_columns_secondBatch
],
file = file.path(screeningPath,
"screening_overview--stage2--generated.xlsx")
);
This is an overview of the hierarchy of entities to extract.
print(fullObject$rxsStructure$parsedEntities$extractionScriptTree);
DiagrammeR::render_graph(fullObject$rxsTreeDiagram_simple)
DiagrammeR::export_graph(
fullObject$rxsTreeDiagram_simple,
file.path(
outputPath,
"extraction-tree--simple.pdf"
)
);
cat(fullObject$rxsInstructions);
cat(fullObject$entityOverview_list);
This is the extraction script generated based on the extraction script specification.
cat("\n\n<pre><textarea rows='40' cols='124' style='font-family:monospace;font-size:11px;white-space:pre;'>",
unlist(fullObject$rxsTemplate),
"</textarea></pre>\n\n",
sep="\n");
These tabs show the extracted data per study.
# devtools::load_all("B:/Data/R/metabefor");
### Read studies
if (importExtractedStudiesFromRxsFiles) {
studies <-
metabefor::rxs_parseExtractionScripts(
path = extractionScriptPath
);
### Store workspace; first delete most old automatically saved
### workspaces
oldfiles <- list.files(extractionScriptPath,
pattern='^autogenerated--rxsObject-preTreePrep--.*\\.rds$',
full.names=TRUE);
### Delete all but most recent one
if (length(oldfiles) > 1) {
unlink(head(sort(oldfiles), -1));
}
### Housecleaning
rm(oldfiles);
### Store new workspace
currentDate <- Sys.time();
extractedStudiesFilename <-
format(currentDate,
"autogenerated--rxsObject-preTreePrep--%Y-%m-%d--%H-%M.rds");
tryCatch({
saveRDS(studies,
file=file.path(extractionScriptPath,
extractedStudiesFilename));
}, error = function(e) {
warning(paste0("Encountered error when trying to save extracted studies to ",
extractedStudiesFilename, "!\n"));
});
} else {
### Load most recently saved object
fileList <- list.files(extractionScriptPath,
pattern='^autogenerated--rxsObject-preTreePrep--.*\\.rds$',
full.names=TRUE);
if (length(fileList) > 0) {
extractedStudiesFilename <- tail(sort(fileList), 1);
studies <- readRDS(extractedStudiesFilename);
rm(extractedStudiesFilename);
} else {
stop("No automatically stored extracted studies objects available!");
}
rm(fileList);
}
### Make backup of studies object
raw <- metabefor::cloneStudiesObject(studies);
### Log the output
for (studyFileName in names(studies$rxsOutput)) {
ufs::heading(studyFileName, headingLevel = 3);
cat(
studies$rxsOutput[[studyFileName]],
collapse="\n"
);
}
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
# Validation results Validation successful!
treesWithUniqueNames <-
unlist(lapply(studies$rxsTrees, data.tree::AreNamesUnique));
if (any(!treesWithUniqueNames)) {
table(treesWithUniqueNames);
studiesWithDuplicateNames <-
names(studies$rxsTrees)[!treesWithUniqueNames];
for (i in studiesWithDuplicateNames) {
metabefor::cat0("\n\nStudy with duplicate names: ", i, "\n\n");
allNames <- table(studies$rxsTrees[[i]]$Get('name'));
dupNames <- which(allNames > 1);
print(names(allNames)[dupNames]);
}
} else {
metabefor::cat0("\n\nAll studies have unique entity node names.\n\n");
}
# devtools::load_all("B:/Data/R/metabefor");
### See the manual page for `explode_vector_to_values` for more
### detail about how vectors are exploded into separate values:
###
### ?explode_vector_to_values
for (txs_gSheetId in txsIDs) {
metabefor::supplement_studyTrees_from_txs(
studies,
paste0("https://docs.google.com/spreadsheets/d/", txs_gSheetId),
explode_vector_to_values = TRUE,
silent = FALSE
);
}
### Check correct supplementing
# studies$rxsTrees$`Alnuman-Ghnimat--2019.rxs.Rmd`$theory$value
# studies$rxsTrees$`Beach-Nielsen-Gilliver--2016.rxs.Rmd`$theory$value
# studies$rxsTrees$`Beach-Nielsen-Gilliver--2016.rxs.Rmd`$theory_HBM$value
# studies$rxsTrees$`Beach-Williams-Gilliver--2011.rxs.Rmd`$theory$value
### Add the information about each variable that was stored in the
### clustering entity holding the measurement information to the
### clustering entities with the univariate and association results.
###
### The first argument is our studies object (which we don't store
### although the function returns it, because the add the information
### to the data.tree object, which uses reference semantics (i.e. through
### the R6 object oriented programming package).
###
### The second argument is (inside the target entity node), the name
### holding the identifier of the source entity node (the node
### supplying the data). Note that that field is itself an entity
### as specified in the entity specification spreadsheet.
###
### The third argument is the prefix prepended to the newly stored
### fields in the target node.
###
### The fourth argument forces overwriting of values should they
### already exist in the target node.
###
### The fifth argument can be switched to FALSE to get detailed info.
###
### For more info:
### ?metabefor::supplement_data_from_list_inStudyTrees
# devtools::load_all("B:/Data/R/metabefor");
# studies <- metabefor::cloneStudiesObject(raw);
###-----------------------------------------------------------------------------
### univariate results
###-----------------------------------------------------------------------------
### Supplement univariate results with operationalization details
metabefor::supplement_data_from_list_inStudyTrees(
studies,
sourceEntityNodeIdField_in_targetEntity="uni.variable",
prefix="varSpec_",
sourcePathString_regex = "methods",
targetPathString_regex = "results",
forceCopyingOfExistingValues = FALSE,
silent=TRUE
);
### Supplement univariate results with subsample details
metabefor::supplement_data_from_list_inStudyTrees(
studies,
sourceEntityNodeIdField_in_targetEntity="uni.subsample",
prefix="subsample_",
sourcePathString_regex = "methods",
targetPathString_regex = "results",
forceCopyingOfExistingValues = FALSE,
silent=TRUE
);
### Then add entities from the study itself
### Conveniently check which entities exist at the 'study' level -
### note that this list also contains container entities!
allEntitiesInStudy <-
unique(
unlist(
lapply(
studies$rxsTrees,
function(node) {
return(names(node$children));
}
)
)
);
### Compile a list of what we want to add to the univariate
### results in the study tree; use 'grep' to get all variables
### for vectors that were exploded into single values
entities_fromStudy_toSupplement <-
c("year.collected",
"year.published",
"name.author",
"country",
"theory_use",
"pld_use",
"grey_literature",
grep("theory_", allEntitiesInStudy, value=TRUE),
grep("sample_categories_", allEntitiesInStudy, value=TRUE)
);
### Loop through the entities to supplement and add them
for (sourceEntityId_to_Supplement in entities_fromStudy_toSupplement) {
metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
metabefor::supplement_data_with_value_inStudyTrees(
studies,
sourceEntityNodeId = sourceEntityId_to_Supplement,
targetEntityNode_requiredField = "uni.variable",
prefix="studyInfo_",
sourcePathString_regex = NULL,
targetPathString_regex = "results/",
forceCopyingOfExistingValues = FALSE,
silent=TRUE
);
metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}
### And from the methods
for (sourceEntityId_to_Supplement in c("N",
"samplingStrategy",
"setting",
"sample",
"methodType")) {
metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
metabefor::supplement_data_with_value_inStudyTrees(
studies,
sourceEntityNodeId = sourceEntityId_to_Supplement,
targetEntityNode_requiredField = "uni.variable",
prefix="studyMethods_",
sourcePathString_regex = "methods/",
targetPathString_regex = "results/",
forceCopyingOfExistingValues = FALSE,
silent=TRUE
);
metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}
###-----------------------------------------------------------------------------
### Associations
###-----------------------------------------------------------------------------
### First details about the measurement
metabefor::supplement_data_from_list_inStudyTrees(
studies,
sourceEntityNodeIdField_in_targetEntity="assoc.var1name",
prefix="var1spec_",
sourcePathString_regex = "methods",
targetPathString_regex = "results",
forceCopyingOfExistingValues = TRUE,
silent=TRUE
);
metabefor::supplement_data_from_list_inStudyTrees(
studies,
sourceEntityNodeIdField_in_targetEntity="assoc.var2name",
prefix="var2spec_",
sourcePathString_regex = "methods",
targetPathString_regex = "results",
forceCopyingOfExistingValues = TRUE,
silent=TRUE
);
### Then supplement association results with subsample details
metabefor::supplement_data_from_list_inStudyTrees(
studies,
sourceEntityNodeIdField_in_targetEntity="assoc.subsample",
prefix="subsample_",
sourcePathString_regex = "methods",
targetPathString_regex = "results",
forceCopyingOfExistingValues = FALSE,
silent=TRUE
);
### Then add sample sizes from the study itself
for (sourceEntityId_to_Supplement in entities_fromStudy_toSupplement) {
metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
metabefor::supplement_data_with_value_inStudyTrees(
studies,
sourceEntityNodeId = sourceEntityId_to_Supplement,
targetEntityNode_requiredField = "assoc.var1name",
prefix="studyInfo_",
sourcePathString_regex = NULL,
targetPathString_regex = "results/",
forceCopyingOfExistingValues = FALSE,
silent=TRUE
);
metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}
### And from the methods
for (sourceEntityId_to_Supplement in c("N",
"samplingStrategy",
"setting",
"sample",
"methodType")) {
metabefor::cat0("\nStarting to supplement `", sourceEntityId_to_Supplement, "`... ");
metabefor::supplement_data_with_value_inStudyTrees(
studies,
sourceEntityNodeId = sourceEntityId_to_Supplement,
targetEntityNode_requiredField = "assoc.var1name",
prefix="studyMethods_",
sourcePathString_regex = "methods/",
targetPathString_regex = "results/",
forceCopyingOfExistingValues = FALSE,
silent=TRUE
);
metabefor::cat0("Done with `", sourceEntityId_to_Supplement, "`.\n");
}
# devtools::load_all("B:/Data/R/metabefor");
aggregationTree <-
metabefor::read_aggregationTree_from_gs(
aggregationTreeURL
);
#aggregationTree;
metabefor::renderTreeAsGraph(aggregationTree);
metabefor::add_aggregationTree_information(
studies = studies,
aggregationTree = aggregationTree,
fieldName = "varSpec_oper.constructIdentifier"
);
### Process all study trees
standardizeMean <- function(mean, min, max) {
if (is.null(mean) || is.null(min) || is.null(max)) {
return(NA);
}
mean <- as.numeric(mean);
min <- as.numeric(min);
max <- as.numeric(max);
# tryCatch(mean <- as.numeric(mean),
# warning = function(w) {
# warning("Encountered warning ('",
# w$message, "') when trying to coerce ",
# "the mean to numeric from value '",
# mean, "'.");
# invokeRestart(muffleWarning=TRUE);
# });
# tryCatch(min <- as.numeric(min),
# warning = function(w) {
# warning("Encountered warning ('",
# w$message, "') when trying to coerce ",
# "the min to numeric from value '",
# min, "'.");
# invokeRestart(muffleWarning=TRUE);
# });
# tryCatch(max <- as.numeric(max),
# warning = function(w) {
# warning("Encountered warning ('",
# w$message, "') when trying to coerce ",
# "the max to numeric from value '",
# max, "'.");
# invokeRestart(muffleWarning=TRUE);
# });
if (is.na(mean) || is.na(min) || is.na(max)) {
return(NA);
}
return(
100 * (mean - min) / (max - min)
);
}
metabefor::transform_in_every_clusteringEntity(
studies,
fun = standardizeMean,
funArgs = c(mean = "uni.mean",
min = "varSpec_scaleRange_min",
max = "varSpec_scaleRange_max"),
newEntityName = "uni.mean_rescaled",
requiredField_regex = "uni.name"
);
# studies$rxsTrees$`Balanay-Kearney--2015.rxs.Rmd`$results$YANS_1stfactor_score$value;
################################################################################
############# Compose Standardized Standard Deviations #########################
standardizeSD <- function(sd, mean, new_mean, min) {
if (is.null(mean) || is.null(sd) || is.null(new_mean)) {
return(NA);
}
mean <- as.numeric(mean) - as.numeric(min);
sd <- as.numeric(sd);
new_mean <- as.numeric(new_mean);
if (is.na(mean) || is.na(sd) || is.na(new_mean)) {
return(NA);
}
return(
(sd / mean) * new_mean
);
}
metabefor::transform_in_every_clusteringEntity(
studies,
fun = standardizeSD,
funArgs = c(mean = "uni.mean",
sd = "uni.sd",
new_mean = "uni.mean_rescaled",
min = "varSpec_scaleRange_min"),
newEntityName = "uni.sd_rescaled",
requiredField_regex = "uni.name"
);
# devtools::load_all("B:/Data/R/metabefor");
# metabefor::get_singleValue_fromTree(
# studies$rxsTrees$`Hoover-Krishnamurti--2010.rxs.Rmd`,
# 'volume_restriction_freq', returnDf = TRUE
# );
#
# metabefor::get_singleValue_fromTree(
# studies$rxsTrees$`Balanay-Kearney--2015.rxs.Rmd`,
# 'hpuse_concerts_asc', returnDf = TRUE
# );
#
# univarDf <-
# get_valueList_asDf_fromStudyTree(
# studies$rxsTrees$`Hoover-Krishnamurti--2010.rxs.Rmd`,
# requiredFields = "uni.variable",
# silent=FALSE
# );
# devtools::load_all("B:/Data/R/metabefor");
univarDfLog <-
capture.output(
univarDf <-
metabefor::get_valueList_asDf(
studies,
requiredFields = "uni.variable",
pathString_regex_select = "results",
flattenVectorsInDf = TRUE,
pathString_regex_explode = "results",
fieldname_regex_alwaysFlatten = "varSpec_",
silent=FALSE
)
);
### GJ to add functionality to metabefor::get_valueList_asDf:
### - specify columns to ignore when comparing nr of columns;
### - store logs as attributes
### - optionally print warnings only
# grep("WARNING", univarDfLog);
#
# cat(univarDfLog[80:110], sep="\n")
#
# setdiff(names(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$results$perception_sound_levels_nc$value), names(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$results$age_participants$value))
writexl::write_xlsx(
univarDf,
path = file.path(dataPath, "univariate.xlsx")
);
### iconv(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$comments$value, from = "UTF-8", to = "UTF-8")
write.csv(
univarDf,
file.path(dataPath, "univariate.csv"),
row.names = FALSE,
fileEncoding = "UTF-8"
);
###-----------------------------------------------------------------------------
### Prepare TXS files
###-----------------------------------------------------------------------------
### These scripts are only necessary once, and so are placed in an external
### file commented out
# source(here::here("scripts", "prepare-txs-files.R"));
# devtools::load_all("B:/Data/R/metabefor");
assocDfLog <-
capture.output(
assocDf <-
metabefor::get_valueList_asDf(
studies,
requiredFields = c("assoc.var1name", "assoc.var2name"),
flattenVectorsInDf = TRUE,
silent=FALSE
)
);
writexl::write_xlsx(
assocDf,
file.path(dataPath, "associations.xlsx")
);
### iconv(studies$rxsTrees$`Beach-Gilliver--2019.rxs.Rmd`$comments$value, from = "UTF-8", to = "UTF-8")
write.csv(
assocDf,
file.path(dataPath, "associations.csv")
);
if (runStudyTreePreparation) {
### Save to disk
### Store workspace; first delete most old automatically saved
### workspaces
oldfiles <- list.files(
extractionScriptPath,
pattern='^autogenerated--rxsObject-postTreePrep--.*\\.rds$',
full.names=TRUE
);
### Delete all but most recent one
if (length(oldfiles) > 1) {
unlink(head(sort(oldfiles), -1));
}
### Housecleaning
rm(oldfiles);
### Store new workspace
currentDate <- Sys.time();
extractedStudiesFilename <-
format(
currentDate,
"autogenerated--rxsObject-postTreePrep--%Y-%m-%d--%H-%M.rds"
);
tryCatch({
saveRDS(studies,
file=file.path(extractionScriptPath,
extractedStudiesFilename));
}, error = function(e) {
warning(
paste0(
"Encountered error when trying to save extracted studies to ",
extractedStudiesFilename, "!\n"
)
);
});
} else {
### Load most recently saved object
fileList <- list.files(
extractionScriptPath,
pattern='^autogenerated--rxsObject-postTreePrep--.*\\.rds$',
full.names=TRUE
);
if (length(fileList) > 0) {
extractedStudiesFilename <- tail(sort(fileList), 1);
studies <- readRDS(extractedStudiesFilename);
rm(extractedStudiesFilename);
} else {
stop("No automatically stored extracted studies objects available!");
}
rm(fileList);
univarDf <-
as.data.frame(
readxl::read_xlsx(
path = file.path(dataPath, "univariate.xlsx")
)
);
assocDf <-
as.data.frame(
readxl::read_xlsx(
file.path(dataPath, "associations.xlsx")
)
);
}
if ("justifier" %in% installed.packages()[, "Package"]) {
### Clean workspace so we can start storing out decisions.
justifier::clean_workspace(force = TRUE, silent=FALSE);
###---------------------------------------------------------------------------
justifier::log_decision(
"When doing analyses that result in a situation where we have multiple estimates per study/sample, we decide to pool all estimates.",
justification = c(
justifier::justify(
"Pooling estimates within each study means all estimates in the meta-analysis are independent.",
weight = 1
),
justifier::justify(
"Pooling estimates within each study may mean that the pooled estimate is less valid than the best estimate in that study was. For example, if a study measures both Perceived Norms and Descriptive Norms, and we decide to aggregate these by pooling to run analyses on the level of Perceived Norms, the original Perceived Norms measure from that study would be more valid than the pooled estimate, which pulls the estimate towards Descriptive Norms (and so, away from Injunctive Norms).",
weight = -.8
)
),
type = "data_preparation",
alternatives = c(
"Pool all estimates",
"Selectively pool based on construct definitions",
"Never pool, deal with dependence of estimates"
)
);
###---------------------------------------------------------------------------
### Show decisions and justifications (note: we may want to defer this to
### the end of the document)
plot(justifier::workspace());
}
Your justifier workspace has been cleaned and is now empty.
###-----------------------------------------------------------------------------
### Get a list of all countries
###-----------------------------------------------------------------------------
### NOTE: flattenVectorsInDf should be FALSE, but that throws an error;
### debug!
allCountries <-
metabefor::get_singleValue(
studies,
"country",
flattenVectorsInDf = TRUE#FALSE
);
countryTable <-
table(allCountries$country);
countryDf <-
data.frame(iso_a2 = toupper(names(countryTable)),
freq = unname(unclass(countryTable)));
###-----------------------------------------------------------------------------
### Add ISO Alpha-3 codes
###-----------------------------------------------------------------------------
### Get ISO Alpha-2 to ISO Alpha-3 country code translation table
### https://www.iban.com/country-codes
country_iso_codes <-
utils::read.delim(file.path(dataPath,
"country-iso-alpha2-alpha3.csv"),
encoding = "UTF-8");
iso_a3 <-
stats::setNames(country_iso_codes$iso_a3,
nm = country_iso_codes$iso_a2);
### Add ISO Alpha-3
countryDf$iso_a3 <-iso_a3[countryDf$iso_a2];
###-----------------------------------------------------------------------------
### Create and save plot
###-----------------------------------------------------------------------------
### Get world map
spdf_world <- rnaturalearth::ne_countries(returnclass = "sf");
spdf_world_withUsers <-
merge(x = spdf_world,
y = countryDf,
by.x = "gu_a3",
by.y = "iso_a3",
all.x = TRUE,
all.y = FALSE);
worldMap <-
ggplot2::ggplot(data = spdf_world_withUsers) +
ggplot2::geom_sf(ggplot2::aes(fill = freq),
na.rm=TRUE) +
ggplot2::scale_fill_viridis_c(
name = "Studies",
trans="log10",
na.value="black"
) +
ggplot2::theme_void() +
ggplot2::theme(
plot.margin = ggplot2::unit(c(.5,.5,.5,.5), units="cm")
);
ufs::knitAndSave(
worldMap,
path = outputPath,
figCaption = "World map of studies",
figWidth = 20,
figHeight = 10);
World map of studies.
table(
univarDf$uni.variable[
univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj"]
);
age gender
123 214
rows_about_age <-
(univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj") &
(univarDf$uni.variable == "age");
ageDf <- univarDf[rows_about_age, ];
mean(ageDf$uni.mean, na.rm=TRUE);
[1] 20.22113
### Alternatively:
mean(
univarDf[
(univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj") &
(univarDf$uni.variable == "age"),
'uni.mean'
],
na.rm=TRUE
);
[1] 20.22113
hist(
univarDf[
(univarDf$varSpec_oper.constructIdentifier == "inflBiological_79n2w1bj") &
(univarDf$uni.variable == "age"),
'uni.mean'
]
);
### Start with:
### 0.1 Run ufs::quietGitLabUpdate("r-packages/metabefor", quiet = FALSE);
### 0.2 Restart R
### 1) Run setup chunk to load paths
### 2) Run this to load studies:
if (!exists('studies')) {
studies <-
metabefor::rxs_parseExtractionScripts(
path = extractionScriptPath
);
}
### 3) Run the following chunks to supplement data and create data frames:
### 3.1) supplement-values-from-txs-specs
### 3.2) supplement-values-within-trees
### 3.3) produce-univar-df
### 3.4) produce-assoc-df
###
### Then you have uniVarDf and assocDf
#-----------------------------------------------------------------------------
### Change data frame to wide format (and back to long):
# ?tidyr::pivot_wider
# ?tidyr::pivot_longer
#############################################################################
# How can I find the row number of a particular value in column X ?
which(rownames(univarDf) == "1042") # "[1] 2 ".
which(grepl(86.80, univarDf$uni.percentage)) # "[1] 91".
##############################################################################
############### Prepare univariate data frame (general)#######################
### Combine names authors study and year of publication and add to univarDf:
univarDf$author_year <-
paste0(univarDf$studyInfo_name.author,
" (",
univarDf$studyInfo_year.published,
")")
### 'author_year' as first column:
univarDf <- dplyr::relocate(univarDf, author_year, .before = uni.name)
options(digits = 4)
View(univarDf)
library(tidyverse)
# Remove duplicate rows:
univarDf <- distinct(univarDf) # No fast way to do this; rows are distinct on the variable that 'splits' the row (when in metafor for certain variables a vector is used instead of a single value [e.g. for 'year of study' or 'country'], it causes the formation of a duplicate row).
# Remove empty columns:
univarDf <- Filter(function(x)!all(is.na(x)), univarDf)
# Remove study by Saunders et al. (2014) (reason: M age > 30 years):
univarDf <- univarDf[univarDf$studyId!="Saunders-Dann-Griest-Frederick--2013.rxs.Rmd", ]
# Add row numbers:
univarDf$row_number <- seq.int(nrow(univarDf))
univarDf <- relocate(univarDf, row_number, .before = author_year)
# Use the correct N for each result:
univarDf <- univarDf %>%
mutate(
N = coalesce(
as.numeric(univarDf$uni.n),
as.numeric(univarDf$subsample_subsample.N),
as.numeric(univarDf$studyMethods_N)
)
)
# Relocate new N column
univarDf <- relocate(univarDf, N, .before = uni.variable)
View(univarDf) # df now contains 1356 rows and 79 columns.
##############################################################################
############ Prepare univariate data frame of BEHAVIOUR ######################
# Keep only rows about behaviour:
univBeh <- univarDf[univarDf$varSpec_oper.constructIdentifier=="behaviour_79n2w1bj", ]
View(univBeh) # df with 363 rows and 81 columns.
# Add row numbers:
univBeh$row_number <- seq.int(nrow(univBeh))
univBeh <- dplyr::relocate(univBeh, row_number, .before = author_year)
# Remove columns:
univBeh <- univBeh[ ,-c(8:10,17:20,22:23,27:32,35:39,41:61,63,66:69,75:76,79:81)]
# Relocate columns:
univBeh <- dplyr::relocate(univBeh, varSpec_hp_measure, .before = uni.subsample)
univBeh <- dplyr::relocate(univBeh, varSpec_hp_precise, .before = uni.subsample)
univBeh <- dplyr::relocate(univBeh, studyInfo_pld_use, .before = uni.subsample)
univBeh <- dplyr::relocate(univBeh, uni.mean_rescaled, .before = uni.percentage)
View(univBeh) # df now has 363 rows and 30 columns.
##############################################################################
############ Prepare univariate data frame HP-use ############################
# Remove studies (exclusively) about pld-use:
univHp <- univBeh[univBeh$studyInfo_pld_use!="yes, only pld use", ]
View(univHp)# df has 337 rows and 30 columns.
# Create separate data frames for 3 types of hp use:
univHpEver <- univHp[univHp$varSpec_hp_measure=="ever used", ]
univHpGen <- univHp[univHp$varSpec_hp_measure=="general", ]
univHpPrec <- univHp[univHp$varSpec_hp_measure=="precise", ]
# For homogeneous results, only 1 result per study. These results are AVERAGED.
# When an average score is calculated based on homogeneous results in a study, and the N for the two (or more) results differ, then a WEIGHTED average is calculated (taking into account N, not the variance). This approach will be followed for all analyses.
# When a weighted average was composed, it was placed in the row of its constituents, and the rows of the other(s) variable(s) was/were removed. Note that apart from the new average, the information in the other columns is not adjusted! So if e.g. the new N is needed, additional analyses are required.
##############################################################################
###### 1)Ever HP used ########################################################
# Remove empty rows:
View(univHpEver)
univHpEver <- univHpEver[rowSums(is.na(univHpEver)) != ncol(univHpEver), ]
# Add row numbers:
univHpEver$row_number <- seq.int(nrow(univHpEver))
univHpEver <- dplyr::relocate(univHpEver, row_number, .before = author_year)
View(univHpEver) # 21 rows, 30 columns.
# Total: 7 studies, all nominal with 2 categories, except Kamphuis (2018) which has 3 categories (but only 1 is "yes, hp use"). All studies report 1 result, except Kamphuis (2018) which reports 3 results ("ever used" for 3 settings: festival, concert, dance party).
univHpEver <- univHpEver[-c(1,4,5,7,8,10,11,13,14,17,19,20), ] # Remove the "no hp use" rows.
# Kamphuis (2018),compose average % for 3 results (2 options):
# 1. Weighted (for sample sizes) average:
univHpEver[3,13] <- with(univHpEver[c(3:5), ], sum(N * uni.percentage)/sum(N))
# 58.198% weighted average.
# 2. Average (unweighted):
# univHpEver[3,13] <- with(univHpEver[c(3:5), ], sum(uni.percentage)/3)
# 58.33% unweighted average.
univHpEver <- univHpEver[-c(4:5), ] # Remove old rows.
View(univHpEver) # df now has 7 rows and 30 columns.
mean(univHpEver$uni.percentage) # 40.93%
# With all 7 studies equal weights, although N ranges from 43 to 1443. Note that participants differ in age and continent.
# The weighted average for Kamphuis (2018) was used.
# Basic bar plot:
plotunivHpEver <-ggplot(univHpEver, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotunivHpEver
###############################################################################
#### 2)General use of HP ######################################################
View(univHpGen)
# Remove empty rows
univHpGen <- univHpGen[rowSums(is.na(univHpGen)) != ncol(univHpGen), ]
# Remove empty columns
univHpGen <- Filter(function(x)!all(is.na(x)), univHpGen)
univHpGen <- univHpGen[ ,-c(12)]
# Add row numbers
univHpGen$row_number <- seq.int(nrow(univHpGen))
univHpGen <- relocate(univHpGen, row_number, .before = author_year)
# Remove duplicate rows
univHpGen <- univHpGen[-c(16,18), ]
View(univHpGen) # df now has 43 rows and 24 columns.
# Relocate operationalisation of values
univHpGen <- relocate(univHpGen, varSpec_oper.values, .before = uni.percentage)
univHpGen <- relocate(univHpGen, varSpec_oper.labels, .before = uni.percentage)
# Count number of unique studies:
univHpGenCount <- univHpGen %>% group_by(author_year) %>% summarize(count=n())
View(univHpGenCount) # 18 studies (1-5 results each), only percentages. The results of 14 studies are dichotomous, 2 studies use 2 categories, and 2 studies use 5 categories. Therefore, all results will be transformed into dichotomous results, so results can be aggregated.
# Values are not operationalized in a uniform way (e.g. 1 is not always 'yes') and some results are dichototomous, while others are categorical: 1) Recode, so 2 is always 'yes'; 2)Construct dichotomous results (if not yet present): 'sometimes' counts as 'no', 'always' and 'frequently' as 'yes'; 3) For studies with multiple categories, join all the '2s'.
## 1) Recode, so 2 is always 'yes':
# Add row numbers
univHpGen$row_number <- seq.int(nrow(univHpGen))
univHpGen <- relocate(univHpGen, row_number, .before = author_year)
View(univHpGen)
# DelGiacco (2015):
univHpGen[c(9:13),9] <- ifelse(univHpGen[c(9:13),9] > 3, 2, ifelse(univHpGen[c(9:13),9], 1, 99))
# Gilliver (2012):
univHpGen[c(22:23),9] <- ifelse(univHpGen[c(22:23),9] > 1, 1, ifelse(univHpGen[c(22:23),9], 2))
# Griest (2007):
univHpGen[c(24:26),9] <- ifelse(univHpGen[c(24:26),9] < 3, 1, ifelse(univHpGen[c(24:26),9], 2))
# Hoppenbrouwer (2018):
univHpGen[c(31:35),9] <- ifelse(univHpGen[c(31:35),9]== 1, 2, ifelse(univHpGen[c(31:35),9], 1))
# Martens (2006):
univHpGen[c(38:40),9] <- ifelse(univHpGen[c(38:40),9]== 1, 2, ifelse(univHpGen[c(38:40),9], 1))
# Pouwels:
univHpGen[41,9] <- 2
# Zieltjes (2014):
univHpGen[c(42:43),9] <- ifelse(univHpGen[c(42:43),9]== 1, 2, ifelse(univHpGen[c(42:43),9], 1))
## 2)Keep only the results for 'hp use = yes' (i.e. value 2):
univHpGen1 <- univHpGen[univHpGen$uni.value==2, ]
View(univHpGen1) # df has 19 rows and 24 columns.
# Add row numbers
univHpGen1$row_number <- seq.int(nrow(univHpGen1))
univHpGen1 <- relocate(univHpGen1, row_number, .before = author_year)
## 3) Join percentage (for study with multiple 'yes' [i.e.'2s']) and remove old row:
# Del Giacco (2015):
univHpGen1[6,12] <- sum(univHpGen1[c(5:6),12])
univHpGen1 <- univHpGen1[-5, ] # df has 18 rows and 24 columns.
# Calculate mean % of hp use:
mean(univHpGen1$uni.percentage) # 12.37% is the (unweighted) mean % of "general" hp use, N of studies ranges from 41 to 9458.
# Basic bar plot:
plotHpGen1 <-ggplot(univHpGen1, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotHpGen1
###############################################################################
#### 3) Use of HP in specific situations ######################################
View(univHpPrec)
# Remove empty rows
univHpPrec <- univHpPrec[rowSums(is.na(univHpPrec)) != ncol(univHpPrec), ]
# The df now has 245 rows.
# Add row numbers
univHpPrec$row_number <- seq.int(nrow(univHpPrec))
univHpPrec <- relocate(univHpPrec, row_number, .before = author_year)
# Relocate columns
univHpPrec <- relocate(univHpPrec, varSpec_oper.values, .before = uni.percentage)
univHpPrec <- relocate(univHpPrec, varSpec_oper.labels, .before = uni.percentage)
univHpPrec <- relocate(univHpPrec, uni.value, .before = uni.percentage)
univHpPrec <- relocate(univHpPrec, varSpec_oper.datatype, .before = uni.value)
### Remove duplicate rows:
# Widen et al. (2006) [duplicate rows]:
univHpPrec <- univHpPrec[-c(214,216,220,222), ]
# Gilles et al. (2014)[duplicate rows]:
univHpPrec <- univHpPrec[-c(73,75,77,79,81,83,85,87,89,91), ]
# Matei et al. (2018)[duplicate rows]:
univHpPrec <- univHpPrec[-c(124,127,130,133,136,139,142,145,148), ]
View(univHpPrec) # The DF now has 222 rows.
## Count number of unique studies
univHpPrecCount <- univHpPrec %>% group_by(author_year) %>% summarize(count=n())
View(univHpPrecCount) # Total: 26 studies, nominal, ordinal, and numeric. Almost 2/3 of the results is dichotomous. Therefore, the remaining 1/3 of the results is converted in a dichotomous result, so the results of the studies can be aggregated.
# Count number of results for each setting of hp use:
univHpPrecSettingC <- univHpPrec %>% group_by(varSpec_hp_precise) %>% summarize(count=n())
View(univHpPrecSettingC) # Total: 9 kind of settings (incl. 'other'), with results ranging from 3 to 48.
### Df1:
### Recode, so 2 is always 'yes' ('sometimes'= 'no'; 'always' & 'frequently' = 'yes'):
univHpPrec1 <- univHpPrec
View(univHpPrec1)
# Auchter M.& Le Prell C.G. (2014):
univHpPrec1[c(1:5),15] <- ifelse(univHpPrec1[c(1:5),15] > 3, 2, ifelse(univHpPrec1[c(1:5),15], 1, 99))
# Balanay, 2015:
univHpPrec1[c(6:19),15] <- ifelse(univHpPrec1[c(6:19),15]== 1, 2, ifelse(univHpPrec1[c(6:19),15], 1, 99))
# Beach E., Nielsen L.B. & Gilliver M. (2016): numeric.
# Bogosch I.I., House R.A. & Kudla I. (2005):
univHpPrec1[c(24:26),15] <- ifelse(univHpPrec1[c(24:26),15]== 3, 2, ifelse(univHpPrec1[c(24:26),15], 1, 99))
# Callahan et al. (2014):
univHpPrec1[c(27:30),15] <- ifelse(univHpPrec1[c(27:30),15]== 1, 2, ifelse(univHpPrec1[c(27:30),15], 1, 99))
# Carter L. & Black D. (2017):
univHpPrec1[c(31:66),15] <- ifelse(univHpPrec1[c(31:66),15]== 3, 2, ifelse(univHpPrec1[c(31:66),15], 1, 99))
# de Lijster G.P.A. & van der Ploeg C.P.B. (2015):
univHpPrec1[c(67:68),15] <- ifelse(univHpPrec1[c(67:68),15]== 1, 2, ifelse(univHpPrec1[c(67:68),15], 1, 99))
# Eilering, M. & Zweet, D. (2018):
univHpPrec1[c(69:72),15] <- ifelse(univHpPrec1[c(69:72),15]== 1, 2, ifelse(univHpPrec1[c(69:72),15], 1, 99))
# Gilles et al. (2014):
univHpPrec1[c(73:82),15] <- ifelse(univHpPrec1[c(73:82),15] > 3, 2, ifelse(univHpPrec1[c(73:82),15], 1, 99))
# Gorter, A.F. (2012):
univHpPrec1[c(83:84),15] <- ifelse(univHpPrec1[c(83:84),15]== 1, 2, ifelse(univHpPrec1[c(83:84),15], 1, 99))
# Hickson et al. (2007): row 85-98, correct values (i.e. 2=yes).
# Holmes et al., (2007): row 99-118, correct values (i.e. 2=yes).
# Khan et al. (2018): row 119-121, numeric.
# Matei et al. (2018): row 122-139, correct values (i.e. 2=yes).
# Olsen et al. (2004): row 140-143, correct values (i.e. 2=yes).
# Olson et al. (2016):
univHpPrec1[c(144:147),15] <- ifelse(univHpPrec1[c(144:147),15]== 1, 2, ifelse(univHpPrec1[c(144:147),15], 1, 99))
# Peters et al. (2018): row 148-152, numeric.
# Rawool V.W. & Collignon-Wayne L.A. (2008):
univHpPrec1[c(154:155),15] <- ifelse(univHpPrec1[c(154:155),15]== 1, 2, ifelse(univHpPrec1[c(154:155),15], 1, 99))
# The response to the question used by Rawool (2008) (‘I use noisy equipment (such as..) without HP') was reversed (strong disagreement now is regarded as 'hp use').
univHpPrec1[c(156:160),15] <- ifelse(univHpPrec1[c(156:160),15]==1, 2, ifelse(univHpPrec1[c(156:160),15], 1, 99))
# Vogel I., Brug J., Van der Ploeg C.P.B. & Raat H. (2010): correct values (i.e. 2=yes).
# Warner-Czyz A.D. & Cain S. (2015): 163-186
univHpPrec1[c(163:186),15] <- ifelse(univHpPrec1[c(163:186),15]== 3, 2, ifelse(univHpPrec1[c(163:186),15], 1, 99))
# Weichbold V.& Zorowka P. (2003): row 187-191, correct values (i.e. 2=yes).
# Weichbold V.& Zorowka P. (2007): " " .
# Widén S, Bohlin M, Johansson I. (2011): numeric.
# Widén S.E. (2013): numeric.
# Widen S.E., Holmes A.E. & Erlandsson S.I. (2006)
univHpPrec1[c(195:202),15] <- ifelse(univHpPrec1[c(195:202),15]== 1, 2, ifelse(univHpPrec1[c(195:202),15], 1, 99))
# Widén S.E., Holmes A.E., Johnson T., Bohlin M., & Erlandsson (2009): row 203 - 220, correct values (i.e. 2=yes).
# Numeric studies:
univHpPrec1[c(20:23,119:121,148:153,192:194),15] <- 2
View(univHpPrec1)
### Df2:
### Keep only the results for 'hp use = yes' (i.e. value 2):
univHpPrec2 <- univHpPrec1[univHpPrec1$uni.value==2, ]
# Add row numbers
univHpPrec2$row_number <- seq.int(nrow(univHpPrec2))
univHpPrec2 <- relocate(univHpPrec2, row_number, .before = author_year)
# Remove empty row (that suddenly appeared...):
univHpPrec2 <- univHpPrec2[-c(89), ]
View(univHpPrec2)# The DF now has 105 rows.
### Join percentages and rescaled mean scores:
# Auchter M.& Le Prell C.G. (2014), sum the the two '2s':
univHpPrec2[1,16] <- sum(univHpPrec2[c(1:2),16])
univHpPrec2 <- univHpPrec2[-2, ]
# Gilles et al. (2014), sum 'often' and 'always' (i.e. four '2s'):
univHpPrec2[31,16] <- sum(univHpPrec2[c(31:32),16])
univHpPrec2 <- univHpPrec2[-32, ]
univHpPrec2[32,16] <- sum(univHpPrec2[c(32:33),16])
univHpPrec2 <- univHpPrec2[-33, ] # The DF now has 102 rows.
# Add the results of the rescaled means to the column with the percentages:
univHpPrec2[c(9:12),16] <- univHpPrec2[c(9:12),11]
univHpPrec2[c(51:53),16] <- univHpPrec2[c(51:53),11]
univHpPrec2[c(67:72),16] <- univHpPrec2[c(67:72),11]
univHpPrec2[c(86:88),16] <- univHpPrec2[c(86:88),11]
View(univHpPrec2) # The column uni.percentage now has no 'NAs'.
### Df3:
univHpPrec3 <- univHpPrec2
# Results that fall in the same category (e.g. "concert/festival") are AVERAGED, thus only 1 result per study!
# If N for two homogeneous results differ, then for the (dichotomous) results a WEIGHTED average (taking into account N, not the variance) for that study is calculated.
# Beach et al. (2016): calculate mean hp-use for concert & festival (n=51 for both subgroups. Therefore, no weights used.) (row 10 and 11):
univHpPrec3[10,16] <- mean(univHpPrec3[c(10:11),16])
univHpPrec3 <- univHpPrec3[-11, ]
# Add row numbers:
univHpPrec3$row_number <- seq.int(nrow(univHpPrec3))
univHpPrec3 <- relocate(univHpPrec3, row_number, .before = author_year)
View(univHpPrec3) # The df now has 101 rows.
# Widen (2006) and Carter & Black(2017) are present respectively four and ten times, label the subgroups 'a' and 'b' to distinguish them in the plots:
univHpPrec3[c(15:20),2] <- "Carter L. & Black D. (2017) a"
univHpPrec3[c(21:26),2] <- "Carter L. & Black D. (2017) b"
univHpPrec3[c(88:89),2] <- "Widen S.E., Holmes A.E. & Erlandsson S.I. (2006) a"
univHpPrec3[c(90:91),2] <- "Widen S.E., Holmes A.E. & Erlandsson S.I. (2006) b"
# Widen (2006): a=USA, b=Sweden; Carter (2017): a=normal hearing, b=hearing impairment.
# Peters, G.J.Y & Noijen, J. (2019): Do not use results, as neither 'epw' nor 'epw' is similar to measures of 'hp use' in other studies.
univHpPrec3 <- univHpPrec3[-c(66:71), ]
View(univHpPrec3) # The df now has 95 rows.
# Do not use 'epc' (row 67,69,71) and use mean of behavior_concert_epw and behavior_festival_epw:
# univHpPrec3 <- univHpPrec3[-c(67,69,71), ]
# univHpPrec3[67,11] <- mean(univHpPrec3[c(67,68),11])
# univHpPrec3 <- univHpPrec3[-68, ] # Concert is n=272, and festival is n=332. This difference is not taken into account.
# The df 'univHpPrec3' can now be used to calculate the average % of hp use for the specified different settings.
###############################################################################
### 3.1. Mean % of hp use at concerts/festivals ###############################
mean(univHpPrec3[univHpPrec3$varSpec_hp_precise=="concert/festival","uni.percentage"]) # 18.2%.
# or:
concertHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="concert/festival", ]
mean(concertHp$uni.percentage) # 18.2% (range 1.90% - 61.20%). Unweighted average. The N of the studies ranges from 49 - 130.000!
# Add row numbers:
concertHp$row_number <- seq.int(nrow(concertHp))
concertHp <- relocate(concertHp, row_number, .before = author_year)
View(concertHp) # Df with 17 rows/results from 15 studies.
## Count number of unique studies
concertHpCount <- concertHp %>% group_by(author_year) %>% summarize(count=n())
View(concertHpCount) # Total: 15 studies. Carter (2018) and Widen (2006) both have 2 subgroups and thus each two results.
# In Carter (2018) the groups consist of people with and without a hearing impairment, in Widen (2006) two similar studies have been performed in the US and in Sweden.
# As the researchers and the methods used were the same in both cases, using the two results from each study may decrease heterogeneity of the results in the review. On the other side, the two groups in each study are very different. In the case of Widen (2006), the results even moved in opposite directions. For the latter reason, I use BOTH (!) results of the 2 studies.
# Basic bar plot:
plotConcertHp <-ggplot(concertHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotConcertHp
###############################################################################
### 3.2. Mean % of hp use while making music ##################################
mean(univHpPrec3[univHpPrec3$varSpec_hp_precise=="making music","uni.percentage"]) # 13.71%, for 21 rows/results from 10 studies.
# Some studies include multiple results. Use only an overall/average result from these studies:
makeMusicHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="making music", ]
# Add row numbers:
makeMusicHp$row_number <- seq.int(nrow(makeMusicHp))
makeMusicHp <- relocate(makeMusicHp, row_number, .before = author_year)
View(makeMusicHp)
# Callahan et al.(2011): calculate mean of "Using HPD during ensemble performances" (i.e. hpuse_music_freq) and "Using HPD when practicing or rehearsing" (i.e. hpuse_performance_freq):
makeMusicHp[3,16] <- mean(makeMusicHp[c(3:4),16])
makeMusicHp <- makeMusicHp[-4, ]
# Olson et al. (2014): calculate mean of "hpuse_rehearsal_freq and "hpuse_performance_freq":
makeMusicHp[17,16] <- mean(makeMusicHp[c(17:18),16])
makeMusicHp <- makeMusicHp[-18, ]
# Matei et al. (2018): reports numerous results, use only "hpuse_all_freq" and drop the other results.
makeMusicHp <- makeMusicHp[-c(8:15), ]
View(makeMusicHp) # Df now has 11 rows from 10 studies (Carter, 2017 has results for both subgroups).
# Mean % of hp use while making music (with adjusted df):
mean(makeMusicHp$uni.percentage) # 11.80% (range 0.0% - 35.0%). The N of the studies is not taken into account (i.e. no weighted mean). N ranges from 6 - 211.
# Basic bar plot:
plotMakeMusicHp <-ggplot(makeMusicHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotMakeMusicHp
###############################################################################
### 3.3. Mean % of hp use during club/disco visit #############################
discoHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="club/discotheque", ]
# Add row numbers:
discoHp$row_number <- seq.int(nrow(discoHp))
discoHp <- relocate(discoHp, row_number, .before = author_year)
View(discoHp) # The df has 17 rows/results from 15 studies (Carter, 2017 and Widen, 2006 each have 2 results [as they have 2 subgroups]) .
# Mean % of hp use at disco/club:
mean(discoHp$uni.percentage) # 9.55% (range 0.00% - 37.50%). The N of the studies is not taken into account (i.e. no weighted mean). The N ranges from 51 - 1757.
# Basic bar plot:
plotDiscoHp <-ggplot(discoHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotDiscoHp
dev.off()
###############################################################################
### 3.4. Mean % of hp use during use of power tool ############################
toolsHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="use of power tool", ]
# Add row numbers:
toolsHp$row_number <- seq.int(nrow(toolsHp))
toolsHp <- relocate(toolsHp, row_number, .before = author_year)
View(toolsHp)
# Compose weighted averages for studies with >1 (homogeneous) result:
# Balanay J.A.G. & Kearney G.D. (2015):
toolsHp[1,16] <- with(toolsHp[c(1:2), ], sum(N * uni.percentage)/sum(N))# 31.757%
toolsHp <- toolsHp[-2, ] # Remove old row.
# Hickson et al. (2007):
toolsHp[4,16] <- with(toolsHp[c(4:5), ], sum(N * uni.percentage)/sum(N))
# 28.92%
toolsHp <- toolsHp[-5, ]
# Holmes et al. (2007):
toolsHp[5,16] <- with(toolsHp[c(5:6), ], sum(N * uni.percentage)/sum(N))
# 13.05%
toolsHp <- toolsHp[-6, ]
# Warner-Czyz et al. (2015):
toolsHp[7,16] <- with(toolsHp[c(7:8), ], sum(N * uni.percentage)/sum(N))
# 6.76%
toolsHp <- toolsHp[-8, ]
# Widén et al. (2009):
toolsHp[8,16] <- with(toolsHp[c(8:9), ], sum(N * uni.percentage)/sum(N))# 21.91%
toolsHp <- toolsHp[-9, ]
View(toolsHp) # Df with 8 rows from 7 studies (Carter, 2017 two results)
# Mean % of hp use while using power tools:
mean(toolsHp$uni.percentage) # 21.69% (range 6.76% - 36.13%). The N of the studies is not taken into account (i.e. no weighted mean). Before (!) scores were averaged (for studies with >1 result), N ranged from 25 - 824.
# The % of hp use while using power tools on average is higher than during lawn mowing. In studies were these 2 behaviors were aggregated, the % may therefore be lower than in studies were only use of power tools was measured. After aggregation a true comparison, thus, is no longer possible. This may also apply to other parts of the review were results that are not completely homogeneous have been aggregated.
# Basic bar plot:
plotToolsHp <-ggplot(toolsHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotToolsHp
dev.off()
###############################################################################
### 3.5. Mean % of hp use during hobby & sports ###############################
hobbyHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="hobby and sports", ]
# Add row numbers:
hobbyHp$row_number <- seq.int(nrow(hobbyHp))
hobbyHp <- relocate(hobbyHp, row_number, .before = author_year)
View(hobbyHp) # df with 9 rows/results from 4 studies.
# Compose weighted averages for studies with >1 (homogeneous) result:
# Holmes et al. (2007):
hobbyHp[2,16] <- with(hobbyHp[c(2:4), ], sum(N * uni.percentage)/sum(N))#14.97%
hobbyHp <- hobbyHp[-c(3:4), ]
# Warner-Czyz et al. (2015):
hobbyHp[3,16] <- with(hobbyHp[c(3:4), ], sum(N * uni.percentage)/sum(N))#2.22%
hobbyHp <- hobbyHp[-4, ]
# Widén et al. (2009):
hobbyHp[4,16] <- with(hobbyHp[c(4:6), ], sum(N * uni.percentage)/sum(N))#9.37%
hobbyHp <- hobbyHp[-c(5:6), ]
View(hobbyHp) # df with 4 rows from 4 studies.
# Mean % of hp use during hobby/sports:
mean(hobbyHp$uni.percentage) #8.14% (range 2.22% - 14.97%). The N of the studies is not taken into account (i.e. no weighted mean). Before scores were averaged (for studies with >1 result), N ranged from 23 - 1285.
# The settings in this category are quite diverse: hp-use is highest for nascar (20-30%), while it is lowest in aerobics class (about 0%).
# Basic bar plot:
plotHobbyHp <-ggplot(hobbyHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotHobbyHp
###############################################################################
### 3.6. Mean % of hp use during shooting #####################################
shootingHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="shooting", ]
# Add row numbers:
shootingHp$row_number <- seq.int(nrow(shootingHp))
shootingHp <- relocate(shootingHp, row_number, .before = author_year)
View(shootingHp) # df with 7 rows/results from 6 studies (Carter, 2017 twice).
# Mean % of hp use while using firearms:
mean(shootingHp$uni.percentage) #52.70% (range 28.00% - 79.20%), for 7 rows/results from 6 studies. The N of the studies is not taken into account (i.e. no weighted mean). The N ranges from 11 - 697.
# Basic bar plot:
plotShootingHp <-ggplot(shootingHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotShootingHp
###############################################################################
### 3.7. Mean % of hp use during work #########################################
workHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="at work", ]
# Add row numbers:
workHp$row_number <- seq.int(nrow(workHp))
workHp <- relocate(workHp, row_number, .before = author_year)
# For Khan et al. (2018) use only the overall result:
workHp <- workHp[-c(4:5), ]
View(workHp) # df with 5 rows/results from 5 studies.
# Mean % of hp use at work:
mean(workHp$uni.percentage) #16.30% (range 10.90% - 29.11%). The N of the studies is not taken into account (i.e. no weighted mean). The N ranges from 50 - 192.
# Basic bar plot:
plotworkHp <-ggplot(workHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotworkHp
dev.off()
###############################################################################
### 3.8. Mean % of hp use while at the pub ####################################
pubHp <- univHpPrec3[univHpPrec3$varSpec_hp_precise=="pub", ]
# Add row numbers:
pubHp$row_number <- seq.int(nrow(pubHp))
pubHp <- relocate(pubHp, row_number, .before = author_year)
View(pubHp)# df with 3 rows/results from 2 studies.
# Mean % of hp use at the pub:
mean(pubHp$uni.percentage) #4.83% (range 0.00% - 12.50%). The N of the studies is not taken into account (i.e. no weighted mean). N ranges from 51 - 178 (subgroups: 115 & 63).
# Basic bar plot:
plotPubHp <-ggplot(pubHp, aes(x=author_year, y=uni.percentage)) + geom_bar(stat="identity")+ coord_flip()
plotPubHp
###############################################################################
### 3.9. Plot of 'specific', 'ever', and 'general' hp use combined ############
## Make one data frame of 'hp precise', 'hp ever' and 'hp general':
# Hp precise (data 3.1 - 3.8.):
dfP1 <- concertHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP2 <- discoHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP3 <- makeMusicHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP4 <- toolsHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP5 <- hobbyHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP6 <- shootingHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP7 <- workHp[ ,c("varSpec_hp_precise","uni.percentage")]
dfP8 <- pubHp[ ,c("varSpec_hp_precise","uni.percentage")]
# Hp ever:
View(univHpEver)
univHpEver <- univHpEver[ ,-7]
univHpEver <- rename(univHpEver, varSpec_hp_precise = varSpec_hp_measure)
dfEv <- univHpEver[ ,c("varSpec_hp_precise","uni.perce