From 68dddb5ae5a9339b74747f5e4a837b1433fdcefd Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 1 Feb 2023 11:21:20 +0100 Subject: [PATCH 01/12] Move Snakefile to its own language --- lib/linguist/languages.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index 2356b44850..a9a64e9803 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -5292,7 +5292,6 @@ Python: - ".pyt" - ".pyw" - ".rpy" - - ".smk" - ".spec" - ".tac" - ".wsgi" @@ -5302,7 +5301,6 @@ Python: - DEPS - SConscript - SConstruct - - Snakefile - wscript interpreters: - python @@ -6411,6 +6409,21 @@ Smithy: extensions: - ".smithy" language_id: 1027892786 +Snakefile: + type: programming + group: Python + tm_scope: source.python + ace_mode: python + codemirror_mode: python + codemirror_mime_type: text/x-python + color: "#33c68a" + extensions: + - ".smk" + - ".snakefile" + filenames: + - Snakefile + aliases: + - snakemake Solidity: type: programming color: "#AA6746" From 3218a70e37d2dd4343894fb7e30397dc181e9cf1 Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 1 Feb 2023 11:22:53 +0100 Subject: [PATCH 02/12] rename snakefile to Snakemake --- lib/linguist/languages.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index a9a64e9803..a89aba9a06 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -6409,7 +6409,7 @@ Smithy: extensions: - ".smithy" language_id: 1027892786 -Snakefile: +Snakemake: type: programming group: Python tm_scope: source.python @@ -6423,7 +6423,7 @@ Snakefile: filenames: - Snakefile aliases: - - snakemake + - snakefile Solidity: type: programming color: "#AA6746" From 328945ae177aa65ddb95668b09e549b599b95527 Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 1 Feb 2023 11:29:58 +0100 Subject: [PATCH 03/12] Create Snakefile --- samples/Snakemake/Snakefile | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 samples/Snakemake/Snakefile diff --git a/samples/Snakemake/Snakefile b/samples/Snakemake/Snakefile new file mode 100644 index 0000000000..3fd26c8ee6 --- /dev/null +++ b/samples/Snakemake/Snakefile @@ -0,0 +1,70 @@ +configfile: "config.yaml" + + +rule all: + input: + "plots/quals.svg" + + +def get_bwa_map_input_fastqs(wildcards): + return config["samples"][wildcards.sample] + + +rule bwa_map: + input: + "data/genome.fa", + get_bwa_map_input_fastqs + output: + temp("mapped_reads/{sample}.bam") + params: + rg=r"@RG\tID:{sample}\tSM:{sample}" + log: + "logs/bwa_mem/{sample}.log" + threads: 8 + shell: + "(bwa mem -R '{params.rg}' -t {threads} {input} | " + "samtools view -Sb - > {output}) 2> {log}" + + +rule samtools_sort: + input: + "mapped_reads/{sample}.bam" + output: + protected("sorted_reads/{sample}.bam") + shell: + "samtools sort -T sorted_reads/{wildcards.sample} " + "-O bam {input} > {output}" + + +rule samtools_index: + input: + "sorted_reads/{sample}.bam" + output: + "sorted_reads/{sample}.bam.bai" + shell: + "samtools index {input}" + + +rule bcftools_call: + input: + fa="data/genome.fa", + bam=expand("sorted_reads/{sample}.bam", sample=config["samples"]), + bai=expand("sorted_reads/{sample}.bam.bai", sample=config["samples"]) + output: + "calls/all.vcf" + params: + rate=config["prior_mutation_rate"] + log: + "logs/bcftools_call/all.log" + shell: + "(bcftools mpileup -f {input.fa} {input.bam} | " + "bcftools call -mv -P {params.rate} - > {output}) 2> {log}" + + +rule plot_quals: + input: + "calls/all.vcf" + output: + "plots/quals.svg" + script: + "scripts/plot-quals.py" From 68a24d036ac56ca35895f03325d484e930e26d23 Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 1 Feb 2023 11:32:18 +0100 Subject: [PATCH 04/12] Create snakemake-calling.smk --- samples/Snakemake/snakemake-calling.smk | 68 +++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 samples/Snakemake/snakemake-calling.smk diff --git a/samples/Snakemake/snakemake-calling.smk b/samples/Snakemake/snakemake-calling.smk new file mode 100644 index 0000000000..886d51a1b5 --- /dev/null +++ b/samples/Snakemake/snakemake-calling.smk @@ -0,0 +1,68 @@ +# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/calling.smk +# Accessed: Jan 10 2020 by Nils Homer +# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) + +if "restrict-regions" in config["processing"]: + rule compose_regions: + input: + config["processing"]["restrict-regions"] + output: + "called/{contig}.regions.bed" + conda: + "../envs/bedops.yaml" + shell: + "bedextract {wildcards.contig} {input} > {output}" + + +rule call_variants: + input: + bam=get_sample_bams, + ref=config["ref"]["genome"], + known=config["ref"]["known-variants"], + regions="called/{contig}.regions.bed" if config["processing"].get("restrict-regions") else [] + output: + gvcf=protected("called/{sample}.{contig}.g.vcf.gz") + log: + "logs/gatk/haplotypecaller/{sample}.{contig}.log" + params: + extra=get_call_variants_params + wrapper: + "0.27.1/bio/gatk/haplotypecaller" + + +rule combine_calls: + input: + ref=config["ref"]["genome"], + gvcfs=expand("called/{sample}.{{contig}}.g.vcf.gz", sample=samples.index) + output: + gvcf="called/all.{contig}.g.vcf.gz" + log: + "logs/gatk/combinegvcfs.{contig}.log" + wrapper: + "0.27.1/bio/gatk/combinegvcfs" + + +rule genotype_variants: + input: + ref=config["ref"]["genome"], + gvcf="called/all.{contig}.g.vcf.gz" + output: + vcf=temp("genotyped/all.{contig}.vcf.gz") + params: + extra=config["params"]["gatk"]["GenotypeGVCFs"] + log: + "logs/gatk/genotypegvcfs.{contig}.log" + wrapper: + "0.27.1/bio/gatk/genotypegvcfs" + + +rule merge_variants: + input: + ref=get_fai(), # fai is needed to calculate aggregation over contigs below + vcfs=lambda w: expand("genotyped/all.{contig}.vcf.gz", contig=get_contigs()), + output: + vcf="genotyped/all.vcf.gz" + log: + "logs/picard/merge-genotyped.log" + wrapper: + "0.40.2/bio/picard/mergevcfs" From 825d40e4f1604509b16eddcd81756d75bab52c6a Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 1 Feb 2023 11:32:25 +0100 Subject: [PATCH 05/12] Delete snakemake-calling.smk --- samples/Python/snakemake-calling.smk | 68 ---------------------------- 1 file changed, 68 deletions(-) delete mode 100644 samples/Python/snakemake-calling.smk diff --git a/samples/Python/snakemake-calling.smk b/samples/Python/snakemake-calling.smk deleted file mode 100644 index 886d51a1b5..0000000000 --- a/samples/Python/snakemake-calling.smk +++ /dev/null @@ -1,68 +0,0 @@ -# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/calling.smk -# Accessed: Jan 10 2020 by Nils Homer -# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) - -if "restrict-regions" in config["processing"]: - rule compose_regions: - input: - config["processing"]["restrict-regions"] - output: - "called/{contig}.regions.bed" - conda: - "../envs/bedops.yaml" - shell: - "bedextract {wildcards.contig} {input} > {output}" - - -rule call_variants: - input: - bam=get_sample_bams, - ref=config["ref"]["genome"], - known=config["ref"]["known-variants"], - regions="called/{contig}.regions.bed" if config["processing"].get("restrict-regions") else [] - output: - gvcf=protected("called/{sample}.{contig}.g.vcf.gz") - log: - "logs/gatk/haplotypecaller/{sample}.{contig}.log" - params: - extra=get_call_variants_params - wrapper: - "0.27.1/bio/gatk/haplotypecaller" - - -rule combine_calls: - input: - ref=config["ref"]["genome"], - gvcfs=expand("called/{sample}.{{contig}}.g.vcf.gz", sample=samples.index) - output: - gvcf="called/all.{contig}.g.vcf.gz" - log: - "logs/gatk/combinegvcfs.{contig}.log" - wrapper: - "0.27.1/bio/gatk/combinegvcfs" - - -rule genotype_variants: - input: - ref=config["ref"]["genome"], - gvcf="called/all.{contig}.g.vcf.gz" - output: - vcf=temp("genotyped/all.{contig}.vcf.gz") - params: - extra=config["params"]["gatk"]["GenotypeGVCFs"] - log: - "logs/gatk/genotypegvcfs.{contig}.log" - wrapper: - "0.27.1/bio/gatk/genotypegvcfs" - - -rule merge_variants: - input: - ref=get_fai(), # fai is needed to calculate aggregation over contigs below - vcfs=lambda w: expand("genotyped/all.{contig}.vcf.gz", contig=get_contigs()), - output: - vcf="genotyped/all.vcf.gz" - log: - "logs/picard/merge-genotyped.log" - wrapper: - "0.40.2/bio/picard/mergevcfs" From 508f7ff91536fc468b7d49f759c2b82482f62e9f Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 1 Feb 2023 11:32:54 +0100 Subject: [PATCH 06/12] Create snakemake-mapping.smk --- samples/Snakemake/snakemake-mapping.smk | 90 +++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 samples/Snakemake/snakemake-mapping.smk diff --git a/samples/Snakemake/snakemake-mapping.smk b/samples/Snakemake/snakemake-mapping.smk new file mode 100644 index 0000000000..e48bb27fb7 --- /dev/null +++ b/samples/Snakemake/snakemake-mapping.smk @@ -0,0 +1,90 @@ +# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/mapping.smk +# Accessed: Jan 10 2020 by Nils Homer +# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) + +rule trim_reads_se: + input: + unpack(get_fastq) + output: + temp("trimmed/{sample}-{unit}.fastq.gz") + params: + extra="", + **config["params"]["trimmomatic"]["se"] + log: + "logs/trimmomatic/{sample}-{unit}.log" + wrapper: + "0.30.0/bio/trimmomatic/se" + + +rule trim_reads_pe: + input: + unpack(get_fastq) + output: + r1=temp("trimmed/{sample}-{unit}.1.fastq.gz"), + r2=temp("trimmed/{sample}-{unit}.2.fastq.gz"), + r1_unpaired=temp("trimmed/{sample}-{unit}.1.unpaired.fastq.gz"), + r2_unpaired=temp("trimmed/{sample}-{unit}.2.unpaired.fastq.gz"), + trimlog="trimmed/{sample}-{unit}.trimlog.txt" + params: + extra=lambda w, output: "-trimlog {}".format(output.trimlog), + **config["params"]["trimmomatic"]["pe"] + log: + "logs/trimmomatic/{sample}-{unit}.log" + wrapper: + "0.30.0/bio/trimmomatic/pe" + + +rule map_reads: + input: + reads=get_trimmed_reads + output: + temp("mapped/{sample}-{unit}.sorted.bam") + log: + "logs/bwa_mem/{sample}-{unit}.log" + params: + index=config["ref"]["genome"], + extra=get_read_group, + sort="samtools", + sort_order="coordinate" + threads: 8 + wrapper: + "0.27.1/bio/bwa/mem" + + +rule mark_duplicates: + input: + "mapped/{sample}-{unit}.sorted.bam" + output: + bam=temp("dedup/{sample}-{unit}.bam"), + metrics="qc/dedup/{sample}-{unit}.metrics.txt" + log: + "logs/picard/dedup/{sample}-{unit}.log" + params: + config["params"]["picard"]["MarkDuplicates"] + wrapper: + "0.26.1/bio/picard/markduplicates" + + +rule recalibrate_base_qualities: + input: + bam=get_recal_input(), + bai=get_recal_input(bai=True), + ref=config["ref"]["genome"], + known=config["ref"]["known-variants"] + output: + bam=protected("recal/{sample}-{unit}.bam") + params: + extra=get_regions_param() + config["params"]["gatk"]["BaseRecalibrator"] + log: + "logs/gatk/bqsr/{sample}-{unit}.log" + wrapper: + "0.27.1/bio/gatk/baserecalibrator" + + +rule samtools_index: + input: + "{prefix}.bam" + output: + "{prefix}.bam.bai" + wrapper: + "0.27.1/bio/samtools/index" From e952ef5577848a5ba8f59b12353a60c6449fcbef Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 1 Feb 2023 11:33:01 +0100 Subject: [PATCH 07/12] Delete snakemake-mapping.smk --- samples/Python/snakemake-mapping.smk | 90 ---------------------------- 1 file changed, 90 deletions(-) delete mode 100644 samples/Python/snakemake-mapping.smk diff --git a/samples/Python/snakemake-mapping.smk b/samples/Python/snakemake-mapping.smk deleted file mode 100644 index e48bb27fb7..0000000000 --- a/samples/Python/snakemake-mapping.smk +++ /dev/null @@ -1,90 +0,0 @@ -# Source: https://raw.githubusercontent.com/snakemake-workflows/dna-seq-gatk-variant-calling/master/rules/mapping.smk -# Accessed: Jan 10 2020 by Nils Homer -# License: MIT (https://github.com/snakemake-workflows/dna-seq-gatk-variant-calling/blob/master/LICENSE) - -rule trim_reads_se: - input: - unpack(get_fastq) - output: - temp("trimmed/{sample}-{unit}.fastq.gz") - params: - extra="", - **config["params"]["trimmomatic"]["se"] - log: - "logs/trimmomatic/{sample}-{unit}.log" - wrapper: - "0.30.0/bio/trimmomatic/se" - - -rule trim_reads_pe: - input: - unpack(get_fastq) - output: - r1=temp("trimmed/{sample}-{unit}.1.fastq.gz"), - r2=temp("trimmed/{sample}-{unit}.2.fastq.gz"), - r1_unpaired=temp("trimmed/{sample}-{unit}.1.unpaired.fastq.gz"), - r2_unpaired=temp("trimmed/{sample}-{unit}.2.unpaired.fastq.gz"), - trimlog="trimmed/{sample}-{unit}.trimlog.txt" - params: - extra=lambda w, output: "-trimlog {}".format(output.trimlog), - **config["params"]["trimmomatic"]["pe"] - log: - "logs/trimmomatic/{sample}-{unit}.log" - wrapper: - "0.30.0/bio/trimmomatic/pe" - - -rule map_reads: - input: - reads=get_trimmed_reads - output: - temp("mapped/{sample}-{unit}.sorted.bam") - log: - "logs/bwa_mem/{sample}-{unit}.log" - params: - index=config["ref"]["genome"], - extra=get_read_group, - sort="samtools", - sort_order="coordinate" - threads: 8 - wrapper: - "0.27.1/bio/bwa/mem" - - -rule mark_duplicates: - input: - "mapped/{sample}-{unit}.sorted.bam" - output: - bam=temp("dedup/{sample}-{unit}.bam"), - metrics="qc/dedup/{sample}-{unit}.metrics.txt" - log: - "logs/picard/dedup/{sample}-{unit}.log" - params: - config["params"]["picard"]["MarkDuplicates"] - wrapper: - "0.26.1/bio/picard/markduplicates" - - -rule recalibrate_base_qualities: - input: - bam=get_recal_input(), - bai=get_recal_input(bai=True), - ref=config["ref"]["genome"], - known=config["ref"]["known-variants"] - output: - bam=protected("recal/{sample}-{unit}.bam") - params: - extra=get_regions_param() + config["params"]["gatk"]["BaseRecalibrator"] - log: - "logs/gatk/bqsr/{sample}-{unit}.log" - wrapper: - "0.27.1/bio/gatk/baserecalibrator" - - -rule samtools_index: - input: - "{prefix}.bam" - output: - "{prefix}.bam.bai" - wrapper: - "0.27.1/bio/samtools/index" From b07a4fc937d83fa39ab2215e242183a4f91ae53c Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Fri, 17 Feb 2023 13:22:44 +0100 Subject: [PATCH 08/12] add language id --- lib/linguist/languages.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index a89aba9a06..1f7e30f278 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -6424,6 +6424,7 @@ Snakemake: - Snakefile aliases: - snakefile + language_id: 151241392 Solidity: type: programming color: "#AA6746" From e0203ffd45fef59400928240207b6391caa7f47a Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Fri, 17 Feb 2023 13:29:45 +0100 Subject: [PATCH 09/12] Rename Snakefile to template.snakefile --- samples/Snakemake/{Snakefile => template.snakefile} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename samples/Snakemake/{Snakefile => template.snakefile} (100%) diff --git a/samples/Snakemake/Snakefile b/samples/Snakemake/template.snakefile similarity index 100% rename from samples/Snakemake/Snakefile rename to samples/Snakemake/template.snakefile From e151eacc82b06dcc436d4aa994e133858ad1c247 Mon Sep 17 00:00:00 2001 From: Colin Seymour Date: Mon, 27 Feb 2023 10:38:58 +0000 Subject: [PATCH 10/12] Update grammars README.md --- vendor/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/vendor/README.md b/vendor/README.md index 5493feb783..6bc14b1926 100644 --- a/vendor/README.md +++ b/vendor/README.md @@ -503,6 +503,7 @@ This is a list of grammars that Linguist selects to provide syntax highlighting - **Smalltalk:** [tomas-stefano/smalltalk-tmbundle](https://github.com/tomas-stefano/smalltalk-tmbundle) - **Smarty:** [textmate/php-smarty.tmbundle](https://github.com/textmate/php-smarty.tmbundle) - **Smithy:** [awslabs/smithy-vscode](https://github.com/awslabs/smithy-vscode) +- **Snakemake:** [MagicStack/MagicPython](https://github.com/MagicStack/MagicPython) - **Solidity:** [davidhq/SublimeEthereum](https://github.com/davidhq/SublimeEthereum) - **Soong:** [flimberger/android-system-tools](https://github.com/flimberger/android-system-tools) - **SourcePawn:** [Dreae/sourcepawn-vscode](https://github.com/Dreae/sourcepawn-vscode) From fbaef2b498ce514515e74d14b0e8cde2fa127be1 Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Mon, 27 Feb 2023 14:26:31 +0100 Subject: [PATCH 11/12] change snakemake color to logo color --- lib/linguist/languages.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/linguist/languages.yml b/lib/linguist/languages.yml index a473febfe9..d22c74f285 100644 --- a/lib/linguist/languages.yml +++ b/lib/linguist/languages.yml @@ -6498,7 +6498,7 @@ Snakemake: ace_mode: python codemirror_mode: python codemirror_mime_type: text/x-python - color: "#33c68a" + color: "#419179" extensions: - ".smk" - ".snakefile" From c01da47c32571a651d9c795fe8fee83c1044bf0c Mon Sep 17 00:00:00 2001 From: Silas Kieser Date: Wed, 8 Mar 2023 09:23:34 +0100 Subject: [PATCH 12/12] Create Snakefile --- samples/Snakemake/filenames/Snakefile | 49 +++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 samples/Snakemake/filenames/Snakefile diff --git a/samples/Snakemake/filenames/Snakefile b/samples/Snakemake/filenames/Snakefile new file mode 100644 index 0000000000..3980fed870 --- /dev/null +++ b/samples/Snakemake/filenames/Snakefile @@ -0,0 +1,49 @@ +#example from https://github.com/snakemake/snakemake/edit/main/examples/hello-world/Snakefile +configfile: "config.yaml" + + +rule all: + input: + expand( + "plots/{country}.hist.pdf", + country=config["countries"] + ) + + +rule select_by_country: + input: + "data/worldcitiespop.csv" + output: + "by-country/{country}.csv" + conda: + "envs/xsv.yaml" + shell: + "xsv search -s Country '{wildcards.country}' " + "{input} > {output}" + + +rule plot_histogram: + input: + "by-country/{country}.csv" + output: + "plots/{country}.hist.svg" + container: + "docker://faizanbashir/python-datascience:3.6" + script: + "scripts/plot-hist.py" + + +rule convert_to_pdf: + input: + "{prefix}.svg" + output: + "{prefix}.pdf" + wrapper: + "0.47.0/utils/cairosvg" + + +rule download_data: + output: + "data/worldcitiespop.csv" + shell: + "curl -L https://burntsushi.net/stuff/worldcitiespop.csv > {output}"