-
Notifications
You must be signed in to change notification settings - Fork 0
/
config_T2T.yaml
executable file
·73 lines (60 loc) · 3.16 KB
/
config_T2T.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
### Pipeline options
# Input parameters
input_format: "pod5" #pod5 or bam (aligned bam)
basecalling_mode: "methylation" #methylation or basic
variant_calling_mode: "germline" #germline or somatic
# Output parameter
output_dir: "path/to/data_output/"
# Select which parts of the pipeline should be run
steps:
basecalling: true
alignment: true
differential_methylation_sample: true
differential_methylation_condition: true
snv_calling: true
sv_calling: true
phasing: true
cnv_calling: true
# Path to design file
design: "path/to/my_design.tsv"
# References information
references:
genome: "/mnt/beegfs/database/bioinfo/Index_DB/Fasta/Ensembl/T2T-CHM13v2.0/Homo_sapiens-GCA_009914755.4-unmasked.fa"
genome_fai: "/mnt/beegfs/database/bioinfo/Index_DB/Fasta/Ensembl/T2T-CHM13v2.0/Homo_sapiens-GCA_009914755.4-unmasked.fa.fai"
genome_snpEff: ""
dbnsfp: ""
clinvar: "/mnt/beegfs/database/bioinfo/Index_DB/ClinVar/liftover_GRCh38_to_T2T-CHM13v2.0/chm13v2.0_ClinVar20220313.vcf.gz"
genome_annotsv: ""
annotsv: "/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/ANNOTATIONS/AnnotSV_annotations/"
code_symbol_conversion: "/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/REFERENCES/DMR/T2T-CHM13v2.0/correspondance_table_ENST_ENSG.tsv"
fastq_screen_conf: "/mnt/beegfs/database/bioinfo/Index_DB/Fastq_Screen/0.15.3/minimap2/fastq_screen.conf"
chromosomes: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y']
# DMR references
Alu: "/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/REFERENCES/DMR/T2T-CHM13v2.0/Alu_ucsc_T2T.bed"
CpG: "/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/REFERENCES/DMR/T2T-CHM13v2.0/CpG_ucsc_T2T.bed"
Transcript: "/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/REFERENCES/DMR/T2T-CHM13v2.0/Transcript_ucsc_T2T.bed"
# Basecalling parameters
dorado_basecaller:
model: "/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/MODELS/dorado/model_r10/dna_r10.4.1_e8.2_400bps_sup@v5.0.0" # Dorado basecaller model v5.0.0 SUP for R10
model_meth: "/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/MODELS/dorado/model_r10/dna_r10.4.1_e8.2_400bps_sup@v5.0.0_5mCG_5hmCG@v2"
meth_type: ["5mCG","5hmCG"] #mandatory if basecalling is set to false.
# Reads filtering parameters
reads_filtering:
min_read_length: 1000 #Only >= 1000 bp long reads will be kept for mapping
min_quality_score: 10 #Only reads with base quality score >= 10 will be kept for mapping
# Germline SNV calling parameters
clair3:
model: ["/mnt/beegfs/database/bioinfo/bigr_long-reads_bulk/MODELS/rerio/clair3_models/r1041_e82_400bps_sup_v500"] # Rerio Clair3 model for data obtained using Dorado basecaller model v5.0.0 SUP for R10
# Somatic SNV parameters
clairs:
model: "ont_r10_dorado_sup_5khz" # R10 model
# SNV annotation parameters
snpsift:
keep_only_pass: true #Filter VCF output from the variant caller to keep only variants annotated as FILTER="PASS" before running the annotation and filtering steps
filters: ["( QUAL >= 10 )", "( QUAL >= 15 )"] # will output 2 files (1 by filter)
# SNV graph parameters
maftools:
genes_of_interest: "path/to/my_genes_list.txt"
# CNV parameters
spectre:
blacklist: ""