Skip to content

Commit

Permalink
Merge pull request #554 from d3b-center/v15-download
Browse files Browse the repository at this point in the history
V15 download and release notes (1/2)
  • Loading branch information
jharenza authored Mar 1, 2024
2 parents 9b38120 + 7f6b140 commit 09bf65e
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 37 deletions.
29 changes: 25 additions & 4 deletions .github/workflows/run_analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@ jobs:
name: Run Analysis - Consensus CN Manta
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: free disk space
run: |
sudo swapoff -a
sudo rm -f /swapfile
sudo apt clean
docker rmi $(docker image ls -aq)
df -h
- name: Download Data for Consensus CN Manta
uses: docker://pgc-images.sbgenomics.com/d3b-bixu/open-pedcan:latest
with:
Expand All @@ -27,7 +34,14 @@ jobs:
needs: consensus_cn_manta
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: free disk space
run: |
sudo swapoff -a
sudo rm -f /swapfile
sudo apt clean
docker rmi $(docker image ls -aq)
df -h
- name: Download Data for Consensus CN
uses: docker://pgc-images.sbgenomics.com/d3b-bixu/open-pedcan:latest
with:
Expand Down Expand Up @@ -131,6 +145,7 @@ jobs:

- name: Mutational signatures
entrypoint: mutational-signatures/run_mutational_signatures.sh
openpbta_testing: 1

#- name: Immune Deconvolution
# entrypoint: immune-deconv/run-immune-deconv.sh
Expand Down Expand Up @@ -161,8 +176,14 @@ jobs:
# entrypoint: rnaseq-batch-correct/run_ruvseq.sh

steps:
- uses: actions/checkout@v3

- uses: actions/checkout@v4
- name: free disk space
run: |
sudo swapoff -a
sudo rm -f /swapfile
sudo apt clean
docker rmi $(docker image ls -aq)
df -h
- name: Download Data
uses: docker://pgc-images.sbgenomics.com/d3b-bixu/open-pedcan:latest
with:
Expand Down
Binary file modified analyses/create-subset-files/biospecimen_ids_for_subset.RDS
Binary file not shown.
2 changes: 1 addition & 1 deletion analyses/create-subset-files/run_create_subset_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set -o pipefail

# Set defaults for release and biospecimen file name
BIOSPECIMEN_FILE=${BIOSPECIMEN_FILE:-biospecimen_ids_for_subset.RDS}
RELEASE=${RELEASE:-v14}
RELEASE=${RELEASE:-v15}
NUM_MATCHED=${NUM_MATCHED:-15}

# This option controls whether or not the two larger MAF files are skipped as
Expand Down
68 changes: 43 additions & 25 deletions analyses/mutational-signatures/run_mutational_signatures.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,59 @@ set -o pipefail
# Set the working directory to the directory of this file
cd "$(dirname "${BASH_SOURCE[0]}")"


# In CI we'll run an abbreviated version of the de novo signatures extraction
ABBREVIATED_MUTSIGS=${OPC_QUICK_MUTSIGS:-0}

# Run only consensus testing file in CI, since tumor only snv is large
IS_CI=${OPENPBTA_TESTING:-0}

if [[ "$IS_CI" -eq "1" ]]

then

echo "Run the SBS mutational signatures analysis using existing signatures on consensus SNV"
Rscript -e "rmarkdown::render('01-known_signatures.Rmd', params = list(snv_file = \"snv-consensus-plus-hotspots.maf.tsv.gz\", output_Folder = \"ConsensusSNV\"), clean = TRUE)"
mv 01-known_signatures.nb.html 01-ConsensusSNV_known_signatures.nb.html

echo "Run the mutational signatures analysis using COSMIC DBS signatures (v3.3) on consensus SNV"
Rscript -e "rmarkdown::render('02-cosmic_dbs_signatures.Rmd', params = list(snv_file = \"snv-consensus-plus-hotspots.maf.tsv.gz\", output_Folder = \"ConsensusSNV\"), clean = TRUE)"
mv 02-cosmic_dbs_signatures.nb.html 02-ConsensusSNV_cosmic_dbs_signatures.nb.html

# Run the SBS mutational signatures analysis using existing signatures on consensus SNV
Rscript -e "rmarkdown::render('01-known_signatures.Rmd', params = list(snv_file = \"snv-consensus-plus-hotspots.maf.tsv.gz\", output_Folder = \"ConsensusSNV\"), clean = TRUE)"
mv 01-known_signatures.nb.html 01-ConsensusSNV_known_signatures.nb.html
echo "Run analysis of adult CNS mutational signatures on consensus SNV"
Rscript --vanilla 03-fit_cns_signatures.R \
--snv_file snv-consensus-plus-hotspots.maf.tsv.gz \
--output_Folder ConsensusSNV

# Run the SBS mutational signatures analysis using existing signatures on tumor only SNV
Rscript -e "rmarkdown::render('01-known_signatures.Rmd', params = list(snv_file = \"snv-mutect2-tumor-only-plus-hotspots.maf.tsv.gz\", output_Folder = \"TumorOnlySNV\"), clean = TRUE)"
mv 01-known_signatures.nb.html 01-TumorOnly_known_signatures.nb.html
else

# Run the mutational signatures analysis using COSMIC DBS signatures (v3.3)
Rscript -e "rmarkdown::render('02-cosmic_dbs_signatures.Rmd', params = list(snv_file = \"snv-consensus-plus-hotspots.maf.tsv.gz\", output_Folder = \"ConsensusSNV\"), clean = TRUE)"
mv 02-cosmic_dbs_signatures.nb.html 02-ConsensusSNV_cosmic_dbs_signatures.nb.html
# Run the SBS mutational signatures analysis using existing signatures on consensus SNV
Rscript -e "rmarkdown::render('01-known_signatures.Rmd', params = list(snv_file = \"snv-consensus-plus-hotspots.maf.tsv.gz\", output_Folder = \"ConsensusSNV\"), clean = TRUE)"
mv 01-known_signatures.nb.html 01-ConsensusSNV_known_signatures.nb.html

# Run the SBS mutational signatures analysis using existing signatures on tumor only SNV
Rscript -e "rmarkdown::render('01-known_signatures.Rmd', params = list(snv_file = \"snv-mutect2-tumor-only-plus-hotspots.maf.tsv.gz\", output_Folder = \"TumorOnlySNV\"), clean = TRUE)"
mv 01-known_signatures.nb.html 01-TumorOnly_known_signatures.nb.html

Rscript -e "rmarkdown::render('02-cosmic_dbs_signatures.Rmd', params = list(snv_file = \"snv-mutect2-tumor-only-plus-hotspots.maf.tsv.gz\", output_Folder = \"TumorOnlySNV\"), clean = TRUE)"
mv 02-cosmic_dbs_signatures.nb.html 02-TumorOnly_cosmic_dbs_signatures.nb.html
# Run the mutational signatures analysis using COSMIC DBS signatures (v3.3)
Rscript -e "rmarkdown::render('02-cosmic_dbs_signatures.Rmd', params = list(snv_file = \"snv-consensus-plus-hotspots.maf.tsv.gz\", output_Folder = \"ConsensusSNV\"), clean = TRUE)"
mv 02-cosmic_dbs_signatures.nb.html 02-ConsensusSNV_cosmic_dbs_signatures.nb.html

# Run analysis of adult CNS mutational signatures
Rscript --vanilla 03-fit_cns_signatures.R \
--snv_file snv-consensus-plus-hotspots.maf.tsv.gz \
--output_Folder ConsensusSNV
Rscript -e "rmarkdown::render('02-cosmic_dbs_signatures.Rmd', params = list(snv_file = \"snv-mutect2-tumor-only-plus-hotspots.maf.tsv.gz\", output_Folder = \"TumorOnlySNV\"), clean = TRUE)"
mv 02-cosmic_dbs_signatures.nb.html 02-TumorOnly_cosmic_dbs_signatures.nb.html

Rscript --vanilla 03-fit_cns_signatures.R \
--snv_file snv-mutect2-tumor-only-plus-hotspots.maf.tsv.gz \
--output_Folder TumorOnlySNV

# Run mutational signature summary of hypermutant tumors
## skip script 04 if it is on GitHub CI
if [ "$CI" = true ]; then
echo "Running in GitHub CI"
else
echo "Not running in GitHub CI"
# Run analysis of adult CNS mutational signatures
Rscript --vanilla 03-fit_cns_signatures.R \
--snv_file snv-consensus-plus-hotspots.maf.tsv.gz \
--output_Folder ConsensusSNV

Rscript --vanilla 03-fit_cns_signatures.R \
--snv_file snv-mutect2-tumor-only-plus-hotspots.maf.tsv.gz \
--output_Folder TumorOnlySNV

Rscript -e "rmarkdown::render('04-explore_hypermutators.Rmd', params = list(output_Folder = \"ConsensusSNV\"), clean = TRUE)"

## Tumor only result did not have sample passing the filter. Therefore, 04 script is not running for tumor only
#Rscript -e "rmarkdown::render('04-explore_hypermutators.Rmd', params = list(output_Folder = \"TumorOnlySNV\"), clean = TRUE)"

fi
5 changes: 3 additions & 2 deletions doc/data-files-description.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ This document contains information about all data files associated with this pro
|`fusion-putative-oncogenic.tsv` | Analysis file | [`fusion_filtering`](https://github.com/d3b-center/OpenPedCan-analysis/tree/master/analyses/fusion_filtering) | Filtered and prioritized fusions
|`gene-counts-rsem-expected_count-collapsed.rds` | Analysis file | PBTA+GMKF+TARGET [`collapse-rnaseq`](https://github.com/d3b-center/OpenPedCan-analysis/tree/dev/analyses/collapse-rnaseq) | Gene expression - RSEM expected_count for each samples collapsed to gene symbol (gene-level)
|`gene-expression-rsem-tpm-collapsed.rds` | Analysis file | PBTA+GMKF+TARGET [`collapse-rnaseq`](https://github.com/d3b-center/OpenPedCan-analysis/tree/dev/analyses/collapse-rnaseq) | Gene expression - RSEM TPM for each samples collapsed to gene symbol (gene-level)
|`tcga-gene-expression-rsem-tpm-collapsed.rds` | Modified reference file | TCGA samples lifted from GENCODE v27 to v39 | Gene expression - RSEM TPM for each samples collapsed to gene symbol (gene-level)
|`gtex-gene-expression-rsem-tpm-collapsed.rds` | Modified reference file | GTEX v8 release lifted to GENCODE v39 | Gene expression - RSEM TPM for each samples collapsed to gene symbol (gene-level)
|`tcga_gene-counts-rsem-expected_count-collapsed.rds` | Modified reference file | TCGA samples lifted from GENCODE v27 to v39 | Gene expression - RSEM counts for each samples collapsed to gene symbol (gene-level)
|`tcga_gene-expression-rsem-tpm-collapsed.rds` | Modified reference file | TCGA samples lifted from GENCODE v27 to v39 | Gene expression - RSEM TPM for each samples collapsed to gene symbol (gene-level)
|`gtex_gene-expression-rsem-tpm-collapsed.rds` | Modified reference file | GTEX v8 release lifted to GENCODE v39 | Gene expression - RSEM TPM for each samples collapsed to gene symbol (gene-level)
|`gtex_gene-counts-rsem-expected_count-collapsed.rds` | Modified reference file | GTEX v8 release lifted to GENCODE v39 | Gene expression - RSEM counts for each samples collapsed to gene symbol (gene-level)
|`WGS.hg38.lancet.300bp_padded.bed` | Reference Target/Baits File | [SNV and INDEL calling](https://github.com/AlexsLemonade/OpenPBTA-manuscript/blob/master/content/03.methods.md#snv-and-indel-calling) | WGS.hg38.lancet.unpadded.bed file with each region padded by 300 bp
|`WGS.hg38.lancet.unpadded.bed` | Reference Regions File | [SNV and INDEL calling](https://github.com/AlexsLemonade/OpenPBTA-manuscript/blob/master/content/03.methods.md#snv-and-indel-calling) | hg38 WGS regions created using UTR, exome, and start/stop codon features of the GENCODE 31 reference, augmented with PASS variant calls from Strelka2 and Mutect2
Expand Down
135 changes: 132 additions & 3 deletions doc/release-notes.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,139 @@
# release notes

## current release
### release-v15
- Release date: 2024-03-01
- Status: available
- Overview of changes:
- This release fixes a bug in the TCGA and GTEX collapse script per issues [#552](https://github.com/d3b-center/OpenPedCan-analysis/issues/552) and [#551](https://github.com/d3b-center/OpenPedCan-analysis/issues/551).
- Files changed:
- gtex_gene-counts-rsem-expected_count-collapsed.rds
- gtex_gene-expression-rsem-tpm-collapsed.rds
- tcga_gene-expression-rsem-tpm-collapsed.rds
- Files added:
- tcga_gene-counts-rsem-expected_count-collapsed.rds

```
v15
├── 20038D-17Q6-01.regions.100bp_padded.bed
├── S0274956_Padded_HG38.merged.bed
├── S02972011_Covered_hg38_100.bed
├── S04380110_Regions_hg38_100.bed
├── S07604715_100bp_Padded.bed
├── SeqCap_EZ_Exome_v2_Padded_HG38.merged.bed
├── StrexomeLite_hg38_liftover_100bp_padded.bed
├── Strexome_targets_intersect_sorted_padded100.GRCh38.bed
├── TARGET_AML_NBL_WT_SeqVal79_attempt06_AllTracks_HG38_bed_expanded100.bed
├── WGS.hg38.lancet.300bp_padded.bed
├── WGS.hg38.lancet.unpadded.bed
├── WGS.hg38.mutect2.vardict.unpadded.bed
├── WGS.hg38.strelka2.unpadded.bed
├── WGS.hg38.vardict.100bp_padded.bed
├── agilent-v4-targets-ucsc.100bp_padded.bed
├── ashion_exome_v2_targets_hg38_padded100.bed
├── biospecimen_id_to_bed_map.tsv
├── cnv-cnvkit.seg.gz
├── cnv-consensus-gistic-only.seg.gz
├── cnv-consensus-gistic.zip
├── cnv-consensus.seg.gz
├── cnv-controlfreec-tumor-only.tsv.gz
├── cnv-controlfreec.tsv.gz
├── cnv-gatk.seg.gz
├── cnvkit_with_status.tsv
├── consensus_seg_with_status.tsv
├── consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only.tsv.gz
├── consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only_autosomes.tsv.gz
├── consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only_x_and_y.tsv.gz
├── cptac-protein-imputed-phospho-expression-log2-ratio.tsv.gz
├── cptac-protein-imputed-prot-expression-abundance.tsv.gz
├── cptac-protein-imputed-prot-expression-log2-ratio.tsv.gz
├── efo-mondo-map.tsv
├── ensg-hugo-pmtl-mapping.tsv
├── fusion-annoFuse.tsv.gz
├── fusion-arriba.tsv.gz
├── fusion-dgd.tsv.gz
├── fusion-putative-oncogenic.tsv
├── fusion-starfusion.tsv.gz
├── fusion_summary_embryonal_foi.tsv
├── fusion_summary_ependymoma_foi.tsv
├── fusion_summary_ewings_foi.tsv
├── fusion_summary_lgg_hgg_foi.tsv
├── gbm-protein-imputed-phospho-expression-abundance.tsv.gz
├── gbm-protein-imputed-prot-expression-abundance.tsv.gz
├── gene-counts-rsem-expected_count-collapsed.rds
├── gene-expression-rsem-tpm-collapsed.rds
├── gtex_gene-counts-rsem-expected_count-collapsed.rds
├── gtex_gene-expression-rsem-tpm-collapsed.rds
├── hg38_strelka.bed
├── histologies-base.tsv
├── histologies.tsv
├── hope-protein-imputed-phospho-expression-abundance.tsv.gz
├── hope-protein-imputed-prot-expression-abundance.tsv.gz
├── independent-specimens.methyl.primary-plus.eachcohort.tsv
├── independent-specimens.methyl.primary-plus.tsv
├── independent-specimens.methyl.primary.eachcohort.tsv
├── independent-specimens.methyl.primary.tsv
├── independent-specimens.methyl.relapse.eachcohort.tsv
├── independent-specimens.methyl.relapse.tsv
├── independent-specimens.rnaseq.primary-plus-pre-release.tsv
├── independent-specimens.rnaseq.primary-pre-release.tsv
├── independent-specimens.rnaseq.relapse-pre-release.tsv
├── independent-specimens.rnaseqpanel.primary-plus.eachcohort.tsv
├── independent-specimens.rnaseqpanel.primary-plus.tsv
├── independent-specimens.rnaseqpanel.primary.eachcohort.tsv
├── independent-specimens.rnaseqpanel.primary.tsv
├── independent-specimens.rnaseqpanel.relapse.eachcohort.tsv
├── independent-specimens.rnaseqpanel.relapse.tsv
├── independent-specimens.wgs.primary-plus.eachcohort.tsv
├── independent-specimens.wgs.primary-plus.tsv
├── independent-specimens.wgs.primary.eachcohort.tsv
├── independent-specimens.wgs.primary.tsv
├── independent-specimens.wgs.relapse.eachcohort.tsv
├── independent-specimens.wgs.relapse.tsv
├── independent-specimens.wgswxspanel.primary-plus.eachcohort.prefer.wgs.tsv
├── independent-specimens.wgswxspanel.primary-plus.eachcohort.prefer.wxs.tsv
├── independent-specimens.wgswxspanel.primary-plus.prefer.wgs.tsv
├── independent-specimens.wgswxspanel.primary-plus.prefer.wxs.tsv
├── independent-specimens.wgswxspanel.primary.eachcohort.prefer.wgs.tsv
├── independent-specimens.wgswxspanel.primary.eachcohort.prefer.wxs.tsv
├── independent-specimens.wgswxspanel.primary.prefer.wgs.tsv
├── independent-specimens.wgswxspanel.primary.prefer.wxs.tsv
├── independent-specimens.wgswxspanel.relapse.eachcohort.prefer.wgs.tsv
├── independent-specimens.wgswxspanel.relapse.eachcohort.prefer.wxs.tsv
├── independent-specimens.wgswxspanel.relapse.prefer.wgs.tsv
├── independent-specimens.wgswxspanel.relapse.prefer.wxs.tsv
├── infinium.gencode.v39.probe.annotations.tsv.gz
├── intersect_cds_lancet_strelka_mutect_WGS.bed
├── intersect_strelka_mutect_WGS.bed
├── md5sum.txt
├── mirna-expression-counts.rds
├── nexterarapidcapture_exome_targetedregions_v1.2_hg38_100.bed
├── onco1500-v2-targets-ucsc.100bp_padded.bed
├── onco1500-v4-targets-ucsc.100bp_padded.bed
├── onco1500-v6-targets-ucsc.100bp_padded.bed
├── onco1500-v6a-targets-ucsc.100bp_padded.bed
├── release-notes.md
├── rna-dna-qc-stats.tsv
├── rna-isoform-expression-rsem-tpm.rds
├── snv-consensus-plus-hotspots.maf.tsv.gz
├── snv-mutation-tmb-all.tsv
├── snv-mutation-tmb-coding.tsv
├── snv-mutect2-tumor-only-plus-hotspots.maf.tsv.gz
├── splice-events-rmats.tsv.gz
├── sv-manta.tsv.gz
├── tcga_gene-counts-rsem-expected_count-collapsed.rds
├── tcga_gene-expression-rsem-tpm-collapsed.rds
├── truseq-exome-targeted-regions-manifest-v1-2_hg38_100.bed
├── uberon-map-gtex-group.tsv
├── uberon-map-gtex-subgroup.tsv
├── wgs_canonical_calling_regions.hg38.bed
└── xgen-exome-research-panel-targets_hg38_ucsc_liftover.100bp_padded.sort.merged.bed
```

## previous release
### release-v14

- Release date: 2023-01-29
- Release date: 2024-01-29
- Status: available
- Overview of changes:
- This release adds the following data:
Expand All @@ -23,10 +153,9 @@
- Add new NIH Bethesda methylation v2 classifier columns to histology file
- Use NIH classifier if unable to get high-confidence subtypes any other way for ATRT and HGG subtyping modules

## previous release
### release-v13

- Release date: 2023-01-03
- Release date: 2024-01-03
- Status: available
- Overview of changes:
- This release adds the following data:
Expand Down
4 changes: 2 additions & 2 deletions download-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ set -o pipefail

# Use the OpenPedCan bucket as the default.
URL=${OPENPEDCAN_URL:-https://s3.amazonaws.com/d3b-openaccess-us-east-1-prd-pbta/open-targets}
RELEASE=${OPENPEDCAN_RELEASE:-v14}
PREVIOUS=${OPENPEDCAN_RELEASE:-v13}
RELEASE=${OPENPEDCAN_RELEASE:-v15}
PREVIOUS=${OPENPEDCAN_RELEASE:-v14}

# Remove old symlinks in data
find data -type l -delete
Expand Down

0 comments on commit 09bf65e

Please sign in to comment.