Skip to content

Commit

Permalink
Merge pull request #588 from d3b-center/zzgeng/supp_table
Browse files Browse the repository at this point in the history
Update manuscript: Generate tables
  • Loading branch information
jharenza committed Jul 1, 2024
2 parents 72a2a55 + 701e164 commit e289e49
Show file tree
Hide file tree
Showing 13 changed files with 353 additions and 35 deletions.
4 changes: 2 additions & 2 deletions tables/output_tables.Rmd → tables/01-output_tables.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ output:
toc: TRUE
toc_float: TRUE
params:
release: v14
release: v15
---

Code adapted from: https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/master/tables/output_tables.Rmd
Expand Down Expand Up @@ -46,7 +46,7 @@ histology_file <- file.path(data_dir, "histologies.tsv")

# Define output files and sheet names, when appropriate
```{r}
table_s1_file <- file.path(results_dir, "histologies.xlsx")
table_s1_file <- file.path(results_dir, "SuppTable1-Histologies.xlsx")
```

# Read files
Expand Down
28 changes: 11 additions & 17 deletions tables/output_tables.nb.html → tables/01-output_tables.nb.html

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions tables/02-molecular_subtype_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
library(tidyverse)
library(openxlsx)

## set directories
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
input_dir <- file.path(root_dir, "data")
output_dir <- file.path(root_dir, "tables", "results")

## read file
hist <- read_tsv(file.path(input_dir, "histologies.tsv"))

## all molecular subtype ##
hist_sample <- hist %>%
filter(sample_type == "Tumor") %>%
select(match_id, broad_histology, molecular_subtype) %>%
filter(!is.na(molecular_subtype)) %>%
distinct() %>%
group_by(broad_histology, molecular_subtype) %>%
tally() %>%
dplyr::rename("Tumors" = "n")

hist_patient <- hist %>%
filter(sample_type == "Tumor") %>%
select(Kids_First_Participant_ID, broad_histology, molecular_subtype) %>%
filter(!is.na(molecular_subtype)) %>%
distinct() %>%
group_by(broad_histology, molecular_subtype) %>%
tally() %>%
dplyr::rename("Patients" = "n")

hist_combined <- hist_sample %>%
left_join(hist_patient) %>%
dplyr::rename("Broad Histologies" = "broad_histology",
"OpenPedCan Molecular Subtype" = "molecular_subtype")
hist_combined <- hist_combined %>%
ungroup() %>%
add_row(`Broad Histologies` = "",
`OpenPedCan Molecular Subtype` = "Total",
Tumors = sum(hist_combined$Tumors),
Patients = sum(hist_combined$Patients))

## MB subtype
## change the directory once the PR is merged ##
MB_subtype <- read_tsv(file.path(output_dir, "mb_shh_molecular_subtypes.tsv"))

## fianl table
final_table <- list(histologies_summary = hist_combined,
MB_SHH_subtype = MB_subtype)
write.xlsx(final_table,
file.path(output_dir, "SuppTable2-Molecular-Subtype-Table.xlsx"))
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,5 @@ list_package_table <- list(r_packages = r_packages,
python_libraries = python_libraries,
cli_tools = cli_tools)
openxlsx::write.xlsx(list_package_table, "results/list_package_table.xlsx")
openxlsx::write.xlsx(list_package_table, "results/SuppTable3-List_Package_Table.xlsx")
```
Original file line number Diff line number Diff line change
Expand Up @@ -1629,16 +1629,17 @@ <h4 class="date">2024-05-30</h4>


<pre class="r"><code>library(tidyverse)</code></pre>
<pre><code>## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
<pre><code>## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ── Conflicts ───────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (&lt;http://conflicted.r-lib.org/&gt;) to force all conflicts to become errors</code></pre>
<pre class="r"><code>library(openxlsx)</code></pre>
<div id="generate-softwarepackages-table" class="section level2">
<h2>generate <code>software/packages</code> table</h2>
<div id="r-packages" class="section level3">
Expand Down Expand Up @@ -1759,7 +1760,7 @@ <h3>combine all together</h3>
python_libraries = python_libraries,
cli_tools = cli_tools)

openxlsx::write.xlsx(list_package_table, &quot;results/list_package_table.xlsx&quot;)</code></pre>
openxlsx::write.xlsx(list_package_table, &quot;results/SuppTable3-List_Package_Table.xlsx&quot;)</code></pre>
</div>
</div>

Expand Down
Binary file not shown.
Binary file not shown.
Binary file added tables/results/SuppTable3-List_Package_Table.xlsx
Binary file not shown.
Binary file removed tables/results/list_package_table.xlsx
Binary file not shown.
Loading

0 comments on commit e289e49

Please sign in to comment.