-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathconfig.yaml
executable file
·128 lines (107 loc) · 6.38 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
## Important note:
## All paths defined in this configuration file must be either absolute or relative to the
## location of the Snakefile!
## Reference annotation details
##--------------------------------------------------------------------------------------------
## Specify "Ensembl" or "Gencode" depending on your choice
annotation: Ensembl
organism: Homo_sapiens # separate with underscore
build: GRCh38
release: 93
##--------------------------------------------------------------------------------------------
## Paths to existing reference files
##--------------------------------------------------------------------------------------------
txome: example_data/reference/Ensembl.GRCh38.93/Homo_sapiens.GRCh38.cdna.all.1.1.10M.fa.gz
genome: example_data/reference/Ensembl.GRCh38.93/Homo_sapiens.GRCh38.dna.chromosome.1.1.10M.fa
gtf: example_data/reference/Ensembl.GRCh38.93/Homo_sapiens.GRCh38.93.1.1.10M.gtf
##--------------------------------------------------------------------------------------------
## Paths to indexes that will be generated by the workflow
##--------------------------------------------------------------------------------------------
salmonindex: example_data/reference/SalmonIndex/Homo_sapiens.GRCh38.93.sidx
#salmonk: 31
STARindex: example_data/reference/STARIndex/Homo_sapiens.GRCh38.93.STAR.idx
##--------------------------------------------------------------------------------------------
## Additional STAR parameters
## Here, you can specify any optional parameters for the index building and/or alignment
## with STAR. The following arguments are automatically populated and should NOT be
## specified here:
## Indexing: runMode, runThreadN, genomeDir, genomeFastaFiles, sjdbGTFfile, sjdbOverhang
## Alignment: runMode, genomeDir, readFilesIn, runThreadN, outFileNamePrefix, outSAMtype, readFilesCommand
##--------------------------------------------------------------------------------------------
## Add or remove parameters inside the ""
additional_star_index: ""
additional_star_align: ""
## Additional Salmon parameters
## Here, you can specify any optional parameters for the index building and/or
## abundance quantification with Salmon. The following arguments are automatically populated
## based on the arguments specified elsewhere, and should NOT be specified here:
## Indexing: transcriptome input file, index directory, gencode flag
## Quantification: library type, fastq files, index directory, output directory, number of cores
##--------------------------------------------------------------------------------------------
## Add or remove parameters inside the ""
additional_salmon_index: "-k 31"
## Add or remove parameters inside the ""
## We specify the mean and standard deviation of the fragment length distribution, for use with Salmon.
## This is important to specify for single-end reads.
## For paired-end reads, these values will define the prior, which is then updated
## based on the observed fragment lengths.
additional_salmon_quant: "--seqBias --gcBias --fldMean 250 --fldSD 25"
##--------------------------------------------------------------------------------------------
## Information about the experiment
##--------------------------------------------------------------------------------------------
readlength: 63
## Path to metadata text file. This file must contain at least the following columns:
## names: the sample identifiers = the names of the FASTQ files (excluding the _R1/R2.fastq.gz part)
## type: either SE or PE, indicating whether the sample was analyzed
## via single-end or paired-end sequencing.
metatxt: example_data/metadata.txt
## Variables used for model fitting
## design: design formula for use with edgeR, camera and DRIMSeq. Must be a string
## of the form "~ <predictors>"
## contrast: (comma-separated if multiple) list of contrasts to estimate in edgeR_dge.Rmd
design: "~ 0 + celline"
contrast: cellineN61311-cellineN052611,cellineN052611-cellineN61311
## Gene sets used for gene set analysis with camera
## Comma-separated list of gene set categories to test with camera.
## Must be a subset of H,C1,C2,C3,C4,C5,C6,C7
## Only required if variable "run_camera: is True (see below).
genesets: H,C5
## The maximal number of cores to use for FastQC, STAR, Salmon and DRIMSeq.
## Note that the actual number of cores available to Snakemake is determined by
## the --cores argument when it is invoked.
ncores: 1
##---------------------------------------------------------------------------------------------
## Path to a folder containing gzipped fastq files, and the file suffix (typically, either fastq or fq).
## If you have paired-end fastq files, you also need to define the extension distinguishing the two read files.
## More precisely, ARMOR assumes that paired-end fastq files are named
## <sample-name>_<fqext1>.<fqsuffix>.gz and <sample-name>_<fqext2>.<fqsuffix>.gz.
## Single-end fastq files are supposed to be named
## <sample-name>.<fqsuffix>.gz.
##---------------------------------------------------------------------------------------------
FASTQ: example_data/FASTQ
fqext1: R1
fqext2: R2
fqsuffix: fastq
##---------------------------------------------------------------------------------------------
## Path to a folder that will store the output generated by the workflow.
## Additional subfolders of this folder will be generated by the workflow.
## To put output in the current directory, set output to ".".
##---------------------------------------------------------------------------------------------
output: example_data/output
##---------------------------------------------------------------------------------------------
## R setup
##---------------------------------------------------------------------------------------------
## Specify "True" if R should be installed in a conda environment or "False" if you want to use
## your own R installation (then you have to set the path to your library in the .Renviron file)
useCondaR: True
Rbin: R
##---------------------------------------------------------------------------------------------
## Conditional conda rules
##---------------------------------------------------------------------------------------------
## Should read trimming, STAR mapping, DRIMSeq analysis and gene set analysis be performed? Set
## to False if the step is not required.
run_trimming: True
run_STAR: True
run_DRIMSeq: True
run_camera: True
##---------------------------------------------------------------------------------------------