forked from jazsakr/lr_blk_smk
-
Notifications
You must be signed in to change notification settings - Fork 3
/
config.yml
150 lines (126 loc) · 6.79 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# things to update
tc_path: ~/mortazavi_lab/bin/TranscriptClean/
sr:
bam: {sr_file}
bam_sorted: data/sr_bulk/star/{mouse_id}_sorted.bam
bam_index: data/sr_bulk/star/{mouse_id}_sorted.bam.bai
bw: data/sr_bulk/bigwig/{mouse_id}_{strand}.bw
ref:
fa_link: https://www.encodeproject.org/files/mm10_no_alt_analysis_set_ENCODE/@@download/mm10_no_alt_analysis_set_ENCODE.fasta.gz
fa_gz: ref/genome.fa.gz
fa: ref/genome.fa
chrom_sizes: ref/genome_chrom_sizes.tsv
gtf_link: https://www.encodeproject.org/files/gencode.vM21.primary_assembly.annotation_UCSC_names/@@download/gencode.vM21.primary_assembly.annotation_UCSC_names.gtf.gz
gtf_gz: ref/annot.gtf.gz
gtf: ref/annot.gtf
gtf_utr: ref/annot_utr.gtf
sjs: ref/annot_junc.bed
talon_db: ref/ref.db
ca_link: https://www.encodeproject.org/files/ENCFF999KXH/@@download/ENCFF999KXH.h5
ca: ref/ca.h5
ca_ends: ref/ca_{end_mode}.bed
ca_ics: ref/ca_ic.tsv
ca_annot: ref/ca_annot.h5
data:
fastq: data/{batch}/raw/{dataset}_{flowcell}.fastq
# mapping
map_stats: data/{batch}/minimap2/{dataset}_{flowcell}_map_stats.txt
sam: data/{batch}/minimap2/{dataset}_{flowcell}.sam
bam: data/{batch}/minimap2/{dataset}_{flowcell}.bam
sam_log: data/{batch}/minimap2/{dataset}_{flowcell}_minimap.log
sam_rev: data/{batch}/minimap2/{dataset}_{flowcell}.rev.sam
# transcriptclean
sam_clean: data/{batch}/tc/{dataset}_{flowcell}_clean.sam
fa_clean: data/{batch}/tc/{dataset}_{flowcell}_clean.fa
sam_clean_log: data/{batch}/tc/{dataset}_{flowcell}_clean.log
sam_clean_te_log: data/{batch}/tc/{dataset}_{flowcell}_clean.TE.log
tc_stats: data/{batch}/tc/{dataset}_{flowcell}_tc_stats.txt
# TODO -- put these in a separate talon_label --> study directory?
# talon label
sam_label: data/{batch}/talon/{dataset}_{flowcell}_labeled.sam
bam_label_sorted: data/{batch}/talon/{dataset}_{flowcell}_labeled.sorted.bam
bam_label_sorted_index: data/{batch}/talon/{dataset}_{flowcell}_labeled.sorted.bam.bai
# merge labelled sams
bam_label_merge: data/{batch}/talon/{dataset}_labeled_merged.bam
bam_label_merge_sorted: data/{batch}/talon/{dataset}_labeled_merged_sorted.bam
bam_label_merge_index: data/{batch}/talon/{dataset}_labeled_merged_sorted.bam.bai
# stuff to get bigwigs
bw: data/{batch}/bigwig/{dataset}_{strand}.bw
# talon files - split by talon run + study
talon_config: data/{batch}/talon/{study}/talon_{talon_run}_config.csv
talon_db: data/{batch}/talon/{study}/annot_{talon_run}_talon.db
read_annot: data/{batch}/talon/{study}/annot_{talon_run}_talon_read_annot.tsv
talon_temp: data/{batch}/talon/{study}/annot_{talon_run}_temp/
# talon files - split only by study
ab: data/{batch}/talon/{study}/annot_talon_abundance.tsv
filt_list: data/{batch}/talon/{study}/talon_pass_list.tsv
filt_ab: data/{batch}/talon/{study}/annot_talon_abundance_filtered.tsv
filt_gtf: data/{batch}/talon/{study}/annot_talon_observedOnly.gtf
full_read_annot: data/{batch}/talon/{study}/annot_talon_read_annot.tsv
# lapa
lapa_config: data/{batch}/lapa/{study}/lapa_config.csv
lapa_ends: data/{batch}/lapa/{study}/{end_mode}/{end_mode}_clusters.bed
lapa_links: data/{batch}/lapa/{study}/tss_to_tes_links.csv
lapa_gtf: data/{batch}/lapa/{study}/lapa_corrected.gtf
lapa_ab: data/{batch}/lapa/{study}/lapa_corrected_abundance.tsv
# lapa filtering
lapa_filt_list: data/{batch}/lapa/{study}/lapa_pass_list.tsv
lapa_filt_gtf: data/{batch}/lapa/{study}/lapa_corrected_filtered.gtf
lapa_filt_ab: data/{batch}/lapa/{study}/lapa_corrected_abundance_filtered.tsv
# cerberus
ics: data/{batch}/cerberus/{study}/ics.tsv
ends: data/{batch}/cerberus/{study}/{end_mode}.bed
agg_ics: data/{batch}/cerberus/agg_ics.tsv
agg_ends: data/{batch}/cerberus/agg_{end_mode}.bed
ca_ref: data/{batch}/cerberus/ca_ref.h5
# ceberus annot
ca_ref_annot: data/{batch}/cerberus/ca_vM21_annot.h5
ca_annot: data/{batch}/cerberus/ca_{study}_{cerb_run}_annot.h5 # sequential
ca_annot_2: data/{batch}/cerberus/{study}/ca_annot.h5 # parallel / non-overlapping
ca_ref_gtf: data/{batch}/cerberus/ca_vM21.gtf
ca_gtf: data/{batch}/cerberus/{study}/ca.gtf
ca_ab: data/{batch}/cerberus/{study}/ca_ab.tsv
# swan
swan_meta: data/{batch}/swan/{study}/swan_metadata.tsv
sg: data/{batch}/swan/{study}/swan.p
die_tsv: data/{batch}/swan/{genotype1}_{genotype2}_die.tsv
adata: data/{batch}/swan/swan_{feature}_adata.h5ad
# degs and dets
de_tsv: data/{batch}/de/{genotype1}_{genotype2}_de_{feature}.tsv
# trouble region
bam_subset: data/{batch}/debug/{dataset}_subset.bam
bam_subset_sorted: data/{batch}/debug/{dataset}_subset_sorted.bam
bam_subset_index: data/{batch}/debug/{dataset}_subset_sorted.bam.bai
# fusions
bam_fusion_subset: data/{batch}/debug/{fusion_gene1}_{fusion_gene2}/{dataset}_{fusion_gene1}_{fusion_gene2}_subset.bam
bam_fusion_subset_sorted: data/{batch}/debug/{fusion_gene1}_{fusion_gene2}/{dataset}_{fusion_gene1}_{fusion_gene2}_subset_sorted.bam
bam_fusion_subset_index: data/{batch}/debug/{fusion_gene1}_{fusion_gene2}/{dataset}_{fusion_gene1}_{fusion_gene2}_subset_sorted.bam.bai
fusion_read_names: data/{batch}/debug/{fusion_gene1}_{fusion_gene2}/{dataset}_{fusion_gene1}_{fusion_gene2}_read_names.txt
all_fusion_read_names: data/{batch}/debug/{dataset}_fusion_read_names.txt
# bam minus fusion reads
bam_minus_fusion: data/{batch}/talon/minus_fusion/{dataset}.bam
bam_minus_fusion_sorted: data/{batch}/talon/minus_fusion/{dataset}_sorted.bam
bam_minus_fusion_index: data/{batch}/talon/minus_fusion/{dataset}_sorted.bam.bai
# # trouble regions
# bam_gfap_subset: data/{batch}/debug/gfap_eftud2/{dataset}_gfap_subset.bam
# bam_gfap_subset_sorted: data/{batch}/debug/gfap_eftud2/{dataset}_gfap_subset_sorted.bam
# bam_gfap_subset_index: data/{batch}/debug/gfap_eftud2/{dataset}_gfap_subset_sorted.bam.bai
#
# bam_eftud2_gfap_subset: data/{batch}/debug/gfap_eftud2/{dataset}_gfap_eftud2_subset.bam
# bam_eftud2_gfap_subset_sorted: data/{batch}/debug/gfap_eftud2/{dataset}_gfap_eftud2_subset_sorted.bam
# bam_eftud2_gfap_subset_index: data/{batch}/debug/gfap_eftud2/{dataset}_gfap_eftud2_subset_sorted.bam.bai
#
# eftud2_gfap_fusion_names: data/{batch}/debug/gfap_eftud2/{dataset}_gfap_eftud2_read_names.txt
#
#
#
# # trouble regions
# bam_plp1_subset: data/{batch}/debug/plp1_bc/{dataset}_plp1_subset.bam
# bam_plp1_subset_sorted: data/{batch}/debug/plp1_bc/{dataset}_plp1_subset_sorted.bam
# bam_plp1_subset_index: data/{batch}/debug/plp1_bc/{dataset}_plp1_subset_sorted.bam.bai
#
# bam_bc_plp1_subset: data/{batch}/debug/plp1_bc/{dataset}_plp1_bc_subset.bam
# bam_bc_plp1_subset_sorted: data/{batch}/debug/plp1_bc/{dataset}_plp1_bc_subset_sorted.bam
# bam_bc_plp1_subset_index: data/{batch}/debug/plp1_bc/{dataset}_plp1_bc_subset_sorted.bam.bai
#
# bam_bc_plp1_fusion_names: data/{batch}/debug/plp1_bc/{dataset}_plp1_bc_read_names.txt