-
Notifications
You must be signed in to change notification settings - Fork 0
/
mrsi_funcs.R
115 lines (101 loc) · 4.23 KB
/
mrsi_funcs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
require(dplyr)
zscore_abs <- function(x) abs(scale(x,center=T,scale=T)[,1])
# added 20230614 (to match previous analysis)
# 20 for Glu and GABA. guess for others
# TODO: VALIDATE Glc and Cho SD thres
sd_thres <- function(x)
case_when(
grepl('Glu|GABA',x) ~ 20,
grepl('Cho',x) ~ 40, # TODO: check
grepl('Glc',x) ~ 120,# TODO: check
.default=20) # taurine is 20, just not explicitly
#' select metabolites based on column name using dplyr::match w/ regular expression
#'
#' @param met_keep vector of metabolite column names
#' @return pattern matching CR and SD columns for metabolite. for dplyr::matches()
colname_sd_or_cr <- function(met_keep) {
mets_patt <- paste0(collapse="|", mets_keep) # GABA|Glu|Gln|Cho|Glc
mets_regex <- glue::glue('^({mets_patt})\\.(Cr|SD)') # Cr ratio and SD
}
#' original wide format lcmodel output to row per metabolite with column for Cr ratio and SD
#'
#' @param mrs lcmodel mrsi input data frame
#' @param z_thres maximum abs zscore
#' @return long dataframe with 'Cr' and 'SD' column. NA if zscore>thres
mrs_wide_to_long_cleaned <- function(mrs, z_thres=3) {
# longer with new cols 'met', 'Cr', and 'SD' (reshape to remove per metabolite columns)
mrs_long <- longer_met_CrSD(mrs) %>%
# collapse across hemispheres
# conveniently R,L are always hemisphere. others: MPFC ACC
mutate(biregion=gsub('^(R|L)','',region)) %>%
# work on each region X met separately
group_by(met, region) %>%
#NB. 'Cr' here is Cr ratio for a given metabolite
# sd_thres is 20 for gaba and glu
mutate(Cr=ifelse(SD>sd_thres(met), NA, Cr),
met_crz=zscore_abs(Cr),
Cr=ifelse(met_crz>=z_thres, NA, Cr))
}
#' reject rows that fail quality checking
#'
#' @param MRS input dataframe with SD and Cr columns for macromolicules
#' @return dataframe with noisy rows removed
mrsi_metqc <- function(MRS) {
checkmate::expect_subset(c("roi", "GPC.Cho.SD", "NAA.NAAG.SD", "Cr.SD", "MM20.Cr", "Glu.Cr"),
names(MRS))
#keep people's correct coordinates
MRS <- MRS %>% filter(!is.na(roi))
# get rid of junk data noticed recently
MRS<- MRS %>% filter(Glu.Cr != 0)
# save out a dataframe to share data after this step
# Step 2 Outlier Detection - get rid of peole who have bad data for 3 major metabolite peaks - GPC+Cho, NAA+NAAG, Cr
MRS <- MRS %>%
filter(GPC.Cho.SD <= 10 | is.na(GPC.Cho.SD),
NAA.NAAG.SD <= 10 | is.na(NAA.NAAG.SD),
Cr.SD <= 10 | is.na(Cr.SD),
# Step 3 Outlier Detection - get rid of people who have lots of macromolecule in their spectra, as that can create distortions
MM20.Cr <= 3 | is.na(MM20.Cr))
}
#' add calculated columns age,age2,agegrp,region,dateNumeric
#'
#' @param MRS input dataframe with age, roi, and ld8
#' @return dataframe with new columns
mrsi_add_cols <- function(MRS){
checkmate::expect_subset(c("age","roi", "ld8"), names(MRS))
#make inverse age column
MRS$invage <- 1/MRS$age
#make age^2 column
MRS$age2 <- (MRS$age - mean(MRS$age))^2
## NB. could remove R/L from names to later collapse/average across hemisphere
## 13MP20200207_LCMv2fixidx.csv now includes 'label' column
region_look <- list(
"1" = "RAntInsula",
"2" = "LAntInsula",
"3" = "RPostInsula",
"4" = "LPostInsula",
"5" = "RCaudate",
"6" = "LCaudate",
"7" = "ACC",
"8" = "MPFC",
"9" = "RDLPFC",
"10" = "LDLPFC",
"11" = "RSTS",
"12" = "LSTS",
"13" = "RThalamus")
MRS <- MRS %>%
mutate(agegrp = cut(age, breaks=c(0,16,22,Inf),
labels=c("10-16","17-22", "23-30")),
region=unlist(region_look[as.character(roi)]),
dateNumeric=as.numeric(as.POSIXct(gsub('.*_','',ld8), format="%Y%m%d")))
}
#' row per metabolite, but keep Cr and SD each in their own column
#'
#' @param mrs input dataframe with many column names ending in .SD and .Cr
#' @return longer dataframe with new columsn 'met' 'Cr' 'SD'
longer_met_CrSD <- function(mrs){
mrs %>%
tidyr::pivot_longer(cols=matches('\\.(SD|Cr)$'),
names_pattern="(\\S+)\\.(SD|Cr)",
names_to=c("met","mtype")) %>%
tidyr::spread(mtype,value)
}