-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvignette_config.yaml
59 lines (59 loc) · 4.12 KB
/
vignette_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
adapters: CTGTAGGCACC # Illumina sequencing adapter(s) to remove
asite_disp_length_file: data/yeast_standard_asite_disp_length.txt # Table of fixed A-site positions by read length
buffer: 250 # Length of flanking region around the CDS
build_indices: TRUE # Build indices for aligner? if TRUE, remake indices from fasta files
codon_positions_file: data/yeast_codon_pos_i200.RData # Codon positions in each gene
count_reads: TRUE # Scan input, temporary and output files and produce counts of reads in each FASTQ, SAM, and BAM file processed?
count_threshold: 64 # Remove genes with a read count below this threshold, when generating statistics and figures
dataset: vignette # Dataset name
dedup_stats: FALSE # Output UMI deduplication statistics?
dedup_umis: FALSE # Extract UMIs and deduplicate reads if TRUE
dir_index: vignette/index # Built indices directory
dir_in: vignette/input # Input directory
dir_out: vignette/output # Output directory
dir_tmp: vignette/tmp # Intermediate files directory
output_metagene_normalized_profile: TRUE # Calculate position-specific nucleotide frequency?
extract_umis: FALSE # Extract UMIs if TRUE
feature: CDS # Feature type
features_file: data/yeast_features.tsv # Features to correlate with ORFs
fq_files: # fastq files to be processed, relative to dir_in
WTnone: SRR1042855_s1mi.fastq.gz
WT3AT: SRR1042864_s1mi.fastq.gz
NotHere: example_missing_file.fastq.gz # Test case for missing file
group_umis: FALSE # Summarise UMI groups before and after deduplication, if TRUE
is_riboviz_gff: TRUE # Does the GFF file contain 3 elements per gene - UTR5, CDS, and UTR3
job_email_events: beas # Events triggering emails about batch job (job submission). Any combination of b - begin, e - end, a - abort, s - suspend.
job_email: null # E-mail address for batch job events (job submission).
job_memory: 8G # Requested memory for batch job (job submission).
job_name: riboviz # Name of batch job (job submission).
job_num_cpus: 4 # Requested number of CPUs for batch job (job submission).
job_parallel_env: mpi # Requested parallel environment for batch job (Grid Engine job submission).
job_runtime: '48:00:00' # Maximum runtime for batch job (job submission).
make_bedgraph: TRUE # Output bedgraph files, as TSV, in addition to h5?
max_read_length: 50 # Maximum read length in H5 output
min_read_length: 10 # Minimum read length in H5 output
multiplex_fq_files: null # Multiplexed fastq files to be processed, relative to dir_in
nextflow_dag_file: nextflow-dag.html # Nextflow DAG file (job submission).
nextflow_report_file: nextflow-report.html # Nextflow report file (job submission).
nextflow_timeline_file: nextflow-timeline.html # Nextflow timeline file (job submission).
nextflow_trace_file: nextflow-trace.tsv # Nextflow trace file (job submission).
nextflow_work_dir: work # Nextflow work directory (job submission).
num_processes: 1 # Number of processes to parallelize over
orf_fasta_file: vignette/input/yeast_YAL_CDS_w_250utrs.fa # ORF file to align to
orf_gff_file: vignette/input/yeast_YAL_CDS_w_250utrs.gff3 # GFF2/GFF3 file for ORFs
orf_index_prefix: YAL_CDS_w_250 # ORF index file prefix, relative to dir_index
output_pdfs: TRUE # generate .pdfs for sample-related plots
primary_id: Name # Primary gene IDs to access the data (YAL001C, YAL003W, etc.)
publish_index_tmp: FALSE # Publish index and temporary files to dir_index and dir_tmp? If FALSE, use symlinks.
rpf: TRUE # Is the dataset an RPF or mRNA dataset?
rrna_fasta_file: vignette/input/yeast_rRNA_R64-1-1.fa # rRNA file to avoid aligning to
rrna_index_prefix: yeast_rRNA # rRNA index file prefix, relative to dir_index
run_static_html: TRUE # Create static html visualization per sample?
sample_sheet: null # Sample sheet, TSV file with, at least, SampleID and TagRead (barcode) columns
samsort_memory: null # Memory to give to 'samtools sort'
secondary_id: NULL # Secondary gene IDs to access the data (COX1, EFB1, etc.)
stop_in_feature: FALSE # Are stop codons part of the feature annotations in GFF?
trim_5p_mismatches: TRUE # Trim mismatched 5' base?
t_rna_file: data/yeast_tRNAs.tsv # tRNA estimates
umi_regexp: null # UMI-tools-compliant regular expression to extract UMIs
validate_only: false