d3b-center · komalsrathi · Jun 20, 2024 · Jun 20, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/analyses/molecular-subtyping-HGG/00-fusion-summary.nb.html b/analyses/molecular-subtyping-HGG/00-fusion-summary.nb.html
diff --git a/analyses/molecular-subtyping-HGG/01-HOPE-HGG-subtyping-subset-file.html b/analyses/molecular-subtyping-HGG/01-HOPE-HGG-subtyping-subset-file.html
diff --git a/analyses/molecular-subtyping-HGG/02-HOPE-molecular-subtyping.html b/analyses/molecular-subtyping-HGG/02-HOPE-molecular-subtyping.html
@@ -358,13 +358,13 @@ <h4 class="date">2023-08-03</h4>
 
 
 <pre class="r"><code>library(tidyverse)</code></pre>
-<pre><code>## ── Attaching core tidyverse packages ───────────────────────────────────────────────── tidyverse 2.0.0 ──
-## ✔ dplyr     1.1.1     ✔ readr     2.1.4
-## ✔ forcats   1.0.0     ✔ stringr   1.5.0
-## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
-## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
-## ✔ purrr     1.0.1     
-## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
+<pre><code>## ── Attaching core tidyverse packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
+## ✔ dplyr     1.1.4     ✔ readr     2.1.5
+## ✔ forcats   1.0.0     ✔ stringr   1.5.1
+## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
+## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
+## ✔ purrr     1.0.2     
+## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
 ## ✖ dplyr::filter() masks stats::filter()
 ## ✖ dplyr::lag()    masks stats::lag()
 ## ℹ Use the conflicted package (&lt;http://conflicted.r-lib.org/&gt;) to force all conflicts to become errors</code></pre>
@@ -411,8 +411,8 @@ <h2>extract DNA, RNA and methylation samples</h2>
                                collapse = &quot;, &quot;),
             age_at_diagnosis_yr = paste(sort(unique(age_at_diagnosis_yr)),
                                         collapse = &quot;, &quot;))</code></pre>
-<pre><code>## `summarise()` has grouped output by &#39;Kids_First_Participant_ID&#39;. You can override using the `.groups`
-## argument.</code></pre>
+<pre><code>## `summarise()` has grouped output by &#39;Kids_First_Participant_ID&#39;. You can
+## override using the `.groups` argument.</code></pre>
 <pre class="r"><code>all_data_df &lt;- inner_join(relevant_clinical_df, 
                           all_data_df) %&gt;%
   dplyr::arrange(Kids_First_Participant_ID, sample_id)</code></pre>
@@ -556,9 +556,9 @@ <h2>Join all table together</h2>
 as.data.frame(table(molecular_subtype_table$molecular_subtype)) %&gt;%
   arrange(desc(Freq))</code></pre>
 <pre><code>##                Var1 Freq
-## 1       DMG, H3 K28   22
+## 1       DMG, H3 K28   21
 ## 2          HGG, IDH    7
-## 3               PXA    5
+## 3               PXA    6
 ## 4       DHG, H3 G35    4
 ## 5 IHG, NTRK-altered    4
 ## 6 IHG, ROS1-altered    3
@@ -627,11 +627,11 @@ <h2>read tp53 score file</h2>
   &quot;results&quot;,
   &quot;tp53_altered_status.tsv&quot;), guess_max = 100000) </code></pre>
 <pre><code>## Rows: 189 Columns: 16
-## ── Column specification ─────────────────────────────────────────────────────────────────────────────────
+## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 ## Delimiter: &quot;\t&quot;
 ## chr (7): sample_id, Kids_First_Biospecimen_ID_DNA, Kids_First_Biospecimen_ID...
-## dbl (8): tp53_score, SNV_indel_counts, CNV_loss_counts, SV_counts, Fusion_co...
-## lgl (1): SV_type
+## dbl (7): tp53_score, SNV_indel_counts, CNV_loss_counts, SV_counts, Fusion_co...
+## lgl (2): CNV_loss_evidence, SV_type
 ## 
 ## ℹ Use `spec()` to retrieve the full column specification for this data.
 ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.</code></pre>
@@ -671,19 +671,18 @@ <h2>Combine all to get final molecular subtyping results with tp53
 <pre><code>## Warning in left_join(., select(tp53_df, c(&quot;sample_id&quot;, &quot;tp53_altered&quot;)), : Detected an unexpected many-to-many relationship between `x` and `y`.
 ## ℹ Row 1 of `x` matches multiple rows in `y`.
 ## ℹ Row 69 of `y` matches multiple rows in `x`.
-## ℹ If a many-to-many relationship is expected, set `relationship = &quot;many-to-many&quot;` to silence this
-##   warning.</code></pre>
+## ℹ If a many-to-many relationship is expected, set `relationship = &quot;many-to-many&quot;` to silence this warning.</code></pre>
 </div>
 <div id="session-info" class="section level2">
 <h2>session info</h2>
 <pre class="r"><code>sessionInfo()</code></pre>
-<pre><code>## R version 4.2.3 (2023-03-15)
-## Platform: x86_64-pc-linux-gnu (64-bit)
-## Running under: Ubuntu 22.04.2 LTS
+<pre><code>## R version 4.4.0 (2024-04-24)
+## Platform: x86_64-pc-linux-gnu
+## Running under: Ubuntu 22.04.4 LTS
 ## 
 ## Matrix products: default
-## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
-## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
+## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
+## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
 ## 
 ## locale:
 ##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
@@ -693,26 +692,29 @@ <h2>session info</h2>
 ##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
 ## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
 ## 
+## time zone: Etc/UTC
+## tzcode source: system (glibc)
+## 
 ## attached base packages:
 ## [1] stats     graphics  grDevices utils     datasets  methods   base     
 ## 
 ## other attached packages:
-##  [1] lubridate_1.9.2 forcats_1.0.0   stringr_1.5.0   dplyr_1.1.1    
-##  [5] purrr_1.0.1     readr_2.1.4     tidyr_1.3.0     tibble_3.2.1   
-##  [9] ggplot2_3.4.2   tidyverse_2.0.0
+##  [1] lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
+##  [5] purrr_1.0.2     readr_2.1.5     tidyr_1.3.1     tibble_3.2.1   
+##  [9] ggplot2_3.5.1   tidyverse_2.0.0
 ## 
 ## loaded via a namespace (and not attached):
-##  [1] bslib_0.4.2      compiler_4.2.3   pillar_1.9.0     jquerylib_0.1.4 
-##  [5] tools_4.2.3      bit_4.0.5        digest_0.6.31    timechange_0.2.0
-##  [9] jsonlite_1.8.4   evaluate_0.20    lifecycle_1.0.3  gtable_0.3.3    
-## [13] pkgconfig_2.0.3  rlang_1.1.0      cli_3.6.1        parallel_4.2.3  
-## [17] yaml_2.3.7       xfun_0.38        fastmap_1.1.1    withr_2.5.0     
-## [21] knitr_1.42       generics_0.1.3   vctrs_0.6.2      sass_0.4.5      
-## [25] hms_1.1.3        bit64_4.0.5      rprojroot_2.0.3  grid_4.2.3      
-## [29] tidyselect_1.2.0 glue_1.6.2       R6_2.5.1         fansi_1.0.4     
-## [33] vroom_1.6.1      rmarkdown_2.21   tzdb_0.3.0       magrittr_2.0.3  
-## [37] scales_1.2.1     htmltools_0.5.5  colorspace_2.1-0 utf8_1.2.3      
-## [41] stringi_1.7.12   munsell_0.5.0    cachem_1.0.7     crayon_1.5.2</code></pre>
+##  [1] bit_4.0.5         gtable_0.3.5      jsonlite_1.8.8    crayon_1.5.2     
+##  [5] compiler_4.4.0    tidyselect_1.2.1  parallel_4.4.0    jquerylib_0.1.4  
+##  [9] scales_1.3.0      yaml_2.3.8        fastmap_1.2.0     R6_2.5.1         
+## [13] generics_0.1.3    knitr_1.47        rprojroot_2.0.4   munsell_0.5.1    
+## [17] bslib_0.7.0       pillar_1.9.0      tzdb_0.4.0        rlang_1.1.4      
+## [21] utf8_1.2.4        stringi_1.8.4     cachem_1.1.0      xfun_0.44        
+## [25] sass_0.4.9        bit64_4.0.5       timechange_0.3.0  cli_3.6.2        
+## [29] withr_3.0.0       magrittr_2.0.3    digest_0.6.35     grid_4.4.0       
+## [33] vroom_1.6.5       hms_1.1.3         lifecycle_1.0.4   vctrs_0.6.5      
+## [37] evaluate_0.24.0   glue_1.7.0        fansi_1.0.6       colorspace_2.1-0 
+## [41] rmarkdown_2.27    tools_4.4.0       pkgconfig_2.0.3   htmltools_0.5.8.1</code></pre>
 </div>
 </div>
 

diff --git a/analyses/molecular-subtyping-HGG/03-molecular-subtype-integrate.R b/analyses/molecular-subtyping-HGG/03-molecular-subtype-integrate.R
@@ -52,7 +52,13 @@ hist_with_subtype <- hist %>%
   mutate(cancer_group = str_extract(integrated_diagnosis, "[^,]*")) %>%
   select(colnames(.)[!grepl(paste(c("^HARMONY_", "^HOPE_"), collapse = "|"), colnames(.))], 
          starts_with("HARMONY_"), starts_with("HOPE_")) %>%
-  select(-short_histology) %>%
-  write_tsv(file.path(results_dir, "Hope-GBM-histologies.tsv"))
+  select(-short_histology) 
+
+# cancer_group_short is HGG where molecular_subtype is PXA
+hist_with_subtype %>%
+  mutate(cancer_group_short = ifelse(!is.na(molecular_subtype) & molecular_subtype == "PXA", "HGG", cancer_group_short)) 
+
+# write to file
+hist_with_subtype %>% write_tsv(file.path(results_dir, "Hope-GBM-histologies.tsv"))