-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathindex.Rmd
executable file
·199 lines (185 loc) · 11.1 KB
/
index.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
---
title: "Summary"
params:
analysisDirFP: null
runID: null
seqDirFP: null
sampleIDInsertFreq: 5
---
```{r index-comments, include=FALSE, eval=FALSE}
# CSS styling helps to manage the layout of the page to compensate for the table. From: https://itqna.net/questions/48098/how-adjust-margins-r-markdown.
# For rapid testing:
rmarkdown::render(input = "index.Rmd", output_file = "index.html", params = list(analysisDirFP = "../../../../runs/M347-21-013", runID = "M347-21-013", seqDirFP = "../../../../runs/M347-21-013"))
```
<style type="text/css">
.main-container {
max-width: 60%;
margin-left: 50px;
margin-right: 50px;
}
</style>
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(formattable)
library(kableExtra)
# Note that Rmd files use the location of the Rmd file as their working directory during knitting. This causes problems when trying to unambiguously locate external dependencies that aren't in the same directory. Setting working directory below to address.
knitr::opts_knit$set(root.dir = params$analysisDirFP)
```
```{r thresholds, include=FALSE}
fastqcRawThreshold <- 100000 # min number of raw reads accepted in fastqc_raw column
#seqycleanKeptReviewThreshold <- 90 # min percent of reads remaining after seqyclean (pass/review)
#seqycleanKeptFailThreshold <- 70 # min percent of reads remaining after seqyclean (review/fail)
depthAfterTrimThreshold <- 100 # min depth after trimming
covAfterTrimThreshold <- 90 # min coverage after trimming
#perHumanThreshold <- NULL # max percentage human reads allowed per sample (not in use)
perSC2Threshold <- 65 # min percentage SC2 reads allowed per sample
perNThreshold <- 10 # max percentage N allowed in genome
#totMappedReads <- 100000 # min number of reads mapping to SC2 Wuhan reference
perMappedReads <- 65 # min percentage of reads mapping to SC2 Wuhan reference
numORFsThreshold <- 10 # min number of ORFs passing QC
sGeneCovThreshold <- 95 # min percentage of bases in S gene with any coverage
sPerNThreshold <- 10 # min percentage of Ns in S gene causing failure
sGeneFrameshiftThreshold <- FALSE # frameshifts allowed in S gene?
```
```{r format-fnc, include=FALSE}
highGood <- function(v, t) {
# accepts value and threshhold and returns green if value >= threshhold and gray if not
# for color bar formatting
return(ifelse(v >= t, "lightgreen", "lightgray"))
}
```
```{r read-sum, include=FALSE}
# Read summary.txt in
sumTab <- read_tsv("summary.txt")
# This does all of the formatting for the table read in above and displayed in the next code chunk. It contains a mix of dplyr, formattable, and kableExtra commands.
sumTab <- mutate(sumTab, sample = str_replace(sample, paste0("-", params$runID), "")) %>%
# remember to mod column names to match if columns included change
select(-aligner_version, -ivar_version, -fastp_reads_passed) %>%
mutate(
pangolin_lineage = cell_spec(pangolin_lineage,
color = ifelse(pangolin_lineage == "None", "red", "black"),
bold = ifelse(pangolin_lineage == "None", T, F)),
pangolin_status = cell_spec(pangolin_status,
color = ifelse(pangolin_status == "passed_qc", "black", "red"),
bold = ifelse(pangolin_status == "passed_qc", F, T)),
fastqc_raw_reads_1 = color_bar(color = highGood(fastqc_raw_reads_1, fastqcRawThreshold))(fastqc_raw_reads_1),
fastqc_raw_reads_2 = color_bar(color = highGood(fastqc_raw_reads_2, fastqcRawThreshold))(fastqc_raw_reads_2),
depth_after_trimming = cell_spec(depth_after_trimming,
color = ifelse(depth_after_trimming >= depthAfterTrimThreshold, "black", "red"),
bold = ifelse(depth_after_trimming >= depthAfterTrimThreshold, F, T)),
coverage_after_trimming = cell_spec(coverage_after_trimming,
color = ifelse(coverage_after_trimming >= covAfterTrimThreshold, "black", "red"),
bold = ifelse(coverage_after_trimming >= covAfterTrimThreshold, F, T)),
`%_N` = cell_spec(`%_N`,
color = ifelse(`%_N` >= perNThreshold, "red", "black"),
bold = ifelse(`%_N` >= perNThreshold, T, F)),
vadr_status = cell_spec(vadr_status,
color = ifelse(vadr_status == "PASS", "black", "red"),
bold = ifelse(vadr_status == "PASS", F, T)),
`%_Reads_Matching_SC2_Ref` = cell_spec(`%_Reads_Matching_SC2_Ref`,
color = ifelse(`%_Reads_Matching_SC2_Ref` < perMappedReads, "red", "black"),
bold = ifelse(`%_Reads_Matching_SC2_Ref` < perMappedReads, T, F)),
ORFs.Passing.QC = cell_spec(ORFs.Passing.QC,
color = ifelse(ORFs.Passing.QC < numORFsThreshold, "red", "black"),
bold = ifelse(ORFs.Passing.QC < numORFsThreshold, T, F)),
Coverage.S = cell_spec(Coverage.S,
color = ifelse(is.na(Coverage.S) == TRUE | Coverage.S < sGeneCovThreshold, "red", "black"),
bold = ifelse(is.na(Coverage.S) == TRUE | Coverage.S < sGeneCovThreshold, T, F)),
Percent.Ns.S = cell_spec(Percent.Ns.S,
color = ifelse(is.na(Percent.Ns.S) | Percent.Ns.S >= sPerNThreshold, "red", "black"),
bold = ifelse(is.na(Percent.Ns.S) | Percent.Ns.S >= sPerNThreshold, T, F)),
vdr_sgene_orftshift = cell_spec(vdr_sgene_orftshift,
color = ifelse(vdr_sgene_orftshift != sGeneFrameshiftThreshold, "red", "black"),
bold = ifelse(vdr_sgene_orftshift != sGeneFrameshiftThreshold, T, F)),
S_aa_INDELs = str_replace_all(S_aa_INDELs, c("," = ", ")))
# Make column names nicer
sumTab <- select(sumTab,
Sample.ID = sample_id,
Sample.Name = sample,
Depth.Post.Trim = depth_after_trimming,
Coverage.Post.Trim = coverage_after_trimming,
`#FastQC.R1` = fastqc_raw_reads_1,
`#FastQC.R2` = fastqc_raw_reads_2,
`%Reads.Mapping.SC2` = `%_Reads_Matching_SC2_Ref`,
ORFs.Passing.QC,
Coverage.S,
Pangolin = pangolin_lineage,
Pangolin.QC = pangolin_status,
`#Lineage.Subs` = pangolin_substitutions,
pangoLEARN.v = pangoLEARN_version,
`GenBank#`,
`%N` = `%_N`,
Percent.Ns.S,
Vadr.S.ORF.Shift = vdr_sgene_orftshift,
NextClade = nextclade_clade,
`#Seqyclean.Pairs` = seqyclean_pairs_kept_after_cleaning,
`%Seqyclean.Pairs` = seqyclean_percent_kept_after_cleaning,
`%Human.Reads` = `%_human_reads`,
`%SC2.Reads` = `%_SARS-COV-2_reads`,
`#iVar.Variants` = ivar_num_variants_identified,
`#BCFTools.Variants` = bcftools_variants_identified,
`#BEDTools.Failed.Amps` = bedtools_num_failed_amplicons,
`#SAMTools.Failed.Amps` = samtools_num_failed_amplicons,
`#N` = num_N,
`#Degenerate` = num_degenerage,
`#ACTG` = num_ACTG,
`#Total.Bases` = num_total,
Total.Reads.Analyzed = Total_Reads_Analyzed,
Mean.Cov.Depth = ave_cov_depth,
Vadr = vadr_status,
Vadr.All.ORF.Shift = vdr_sample_orfshift,
AA.Changes.S = S_aa_INDELs,
Length.Longest.Insert = len_largest_insertion,
Length.Longest.Del = len_largest_deletion,
Mean.Depth.S,
Percent.Pos.Min.Cov.S
)
# Insert Sample.Name column into table at every nth column (params$sampleIDInsertFreq) position
# how many times to insert sampleID column
colReps <- floor(ncol(sumTab) / params$sampleIDInsertFreq)
# create and bind columns to insert
sumTab <- bind_cols(sumTab, as_tibble(replicate(colReps, sumTab$Sample.Name)))
# Move Sample.Name to the first column
sumTab <- relocate(sumTab, Sample.Name)
# Grab the vector of column names
sumTabColNames <- colnames(sumTab)
# function to rename columns slightly more nicely. Note R won't allow redundant column names.
colNamer <- function(c) {
return(paste0("Sample.Name", str_replace(c, "V", "")))
}
# move new sampleID columns to right place
for (i in 1:colReps) {
sumTab <- relocate(sumTab,
eval(paste0("V", i)),
.after = sumTabColNames[i*params$sampleIDInsertFreq]) %>%
rename_with(colNamer, starts_with(paste0("V", i)))
}
# # Add global QC column
# sumTab <- mutate(sumTab,
# Global.QC = case_when(
# Depth.Post.Trim < 100 ~ "FAIL",
# `#FastQC.R1` < fastqcRawThreshold ~ "#FastQC.R1",#"FAIL",
# `%Reads.Mapping.SC2` < perMappedReads ~ "%Reads.Mapping.SC2", #"FAIL",
# ORFs.Passing.QC < numORFsThreshold ~ "ORFs.Passing.QC", #"FAIL",
# Coverage.S < sGeneCovThreshold ~ "Coverage.S", #"FAIL",
# Pangolin == "None" ~ "Pangolin", #"FAIL",
# Pangolin.QC == "fail" ~ "Pangolin.QC", "FAIL",
# `%N` >= perNThreshold ~ "%N", #"FAIL",
# Percent.Ns.S >= perNThreshold ~ "Percent.Ns.S", "FAIL",
# Vadr.S.ORF.Shift != sGeneFrameshiftThreshold ~ "Vadr.S.ORF.Shift", #"FAIL",
# TRUE ~ "pass"
# )) %>%
# # mutate(Global.QC = cell_spec(Global.QC,
# # color = ifelse(Global.QC == "FAIL", "red", "black"),
# # bold = ifelse(Global.QC == "FAIL", T, F))) %>%
# relocate(Global.QC)
```
```{r display-sum, echo=FALSE}
# Note at time of dev, adding scroll boxes was the only way AJW could get fixed_thead = TRUE to work as expected.
sampleIDIndices <- which(str_starts(colnames(sumTab), "Sample.Name"))
knitr::kable(sumTab, escape = F, align = "l") %>%
column_spec(sampleIDIndices, background = "lightgray") %>%
kable_styling(fixed_thead = TRUE, bootstrap_options = c("striped", "hover")) %>%
scroll_box(width = "150%", height = "600px")
```