diff --git a/ChangeLog b/ChangeLog index fdbfd9e..9e4291d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,15 +1,17 @@ CHANGES ======= -* non-canonical deeptools to get rid of comp problem -* ignore -* cap snakemake -> tmp & profile -* rewrite - after alsieve -* ignore build -* purge&ignore -* restructure to make code less obfuscated + pave way for multicomps -* egg exclude -* RPKM bigwigs +* include frig fig, ignore > 1000 for fragsize, set threads motif cluster +* uropa change defaults + checks gtf +* make to preserve key order in yaml dump. Purge ESS +* switch to gene for uropa annotation as default +* Update README.md +* Wd (#9) +* meme working +* babysteps into tobias/motifs +* purgebuild +* Wd (#8) +* Wd (#6) * update readme * Shuffling * fix batch vs non batch in 1 run diff --git a/src/AOS/helper.py b/src/AOS/helper.py index 6666624..57917e3 100644 --- a/src/AOS/helper.py +++ b/src/AOS/helper.py @@ -30,8 +30,9 @@ def plotfragsize(frags): for i,r in df.iterrows(): size = int(r[0]) occ = int(r[1]) - for k in repeat(size, occ): - reps.append([r['Sample'], size]) + if size < 1000: + for k in repeat(size, occ): + reps.append([r['Sample'], size]) df = pd.DataFrame(reps) df.columns = ['sample', 'size'] df.sort_values(by=['sample'], inplace=True) @@ -238,7 +239,7 @@ def peak_boundaries(peaks, genomefa, of): with open(genomefa) as f: for line in f: if line.startswith('>'): - header = line.strip().replace('>', '') + header = str(line.strip().replace('>', '').split(' ')[0]) chromdic[header] = 0 else: chromdic[header] += len(line.strip()) @@ -246,7 +247,7 @@ def peak_boundaries(peaks, genomefa, of): peakchange = 0 with open(peaks) as f: for line in f: - chrom = line.strip().split()[0] + chrom = str(line.strip().split()[0]) start = int(line.strip().split()[1]) end = int(line.strip().split()[2]) if end > chromdic[chrom]: @@ -266,3 +267,38 @@ def peak_boundaries(peaks, genomefa, of): header=False, index=False ) + +def PCA_colors(config): + colors = [ + '#1f77b4', + '#ff7f0e', + '#2ca02c', + '#d62728', + '#9467bd', + '#8c564b', + '#e377c2', + '#7f7f7f', + '#bcbd22', + '#17becf' + ] + if config['files']['samplesheet']: + sdf = pd.read_csv( + config['files']['samplesheet'], + sep='\t', + header=0 + ) + sdf = sdf.set_index('sample') + sdf = sdf.loc[config['samples']] + colDic = {} + colIx = 0 + for s in sdf.iloc[:,[0]].values: + if s[0] not in colDic: + colDic[s[0]] = colors[colIx] + colIx += 1 + PCAstr = "--colors" + for s in sdf.iloc[:,[0]].values: + PCAstr += " \"{}\"".format( + colDic[s[0]] + ) + return (PCAstr) + return ("") diff --git a/src/AOS/rules/motifs.smk b/src/AOS/rules/motifs.smk index 45a5b0c..c5ee16f 100644 --- a/src/AOS/rules/motifs.smk +++ b/src/AOS/rules/motifs.smk @@ -11,6 +11,7 @@ rule clustermotifs: output: 'motifs_clustered/clusteredmotifs_consensus_motifs.meme' conda: config['envs']['tobias'] + threads: 10 shell:''' TOBIAS ClusterMotifs -m {input} -t 0.4 -a meme -p clusteredmotifs -o 'motifs_clustered' --dist_method seqcor ''' diff --git a/src/AOS/rules/peaks.smk b/src/AOS/rules/peaks.smk index e859c8f..1394709 100644 --- a/src/AOS/rules/peaks.smk +++ b/src/AOS/rules/peaks.smk @@ -1,4 +1,6 @@ from AOS.helper import peak_boundaries +from AOS.helper import PCA_colors + rule lnBams: input: os.path.join(config['dirs']['bamdir'], "{sample}.bam") @@ -164,11 +166,13 @@ rule multibigwigsum: rule plotPCA: input: - 'peakset/counts.bw.npz' + peakset = 'peakset/counts.bw.npz' output: 'figures/PCA.png' threads: 1 + params: + colstr = PCA_colors(config) conda: config['envs']['seqtools'] shell:''' - plotPCA --corData {input} -o {output} --transpose --ntop 5000 + plotPCA --corData {input.peakset} -o {output} --transpose --ntop 5000 {params.colstr} ''' \ No newline at end of file diff --git a/src/AOS/workflow.smk b/src/AOS/workflow.smk index b6f03bf..d174a22 100644 --- a/src/AOS/workflow.smk +++ b/src/AOS/workflow.smk @@ -33,7 +33,8 @@ def geto(): 'figures/mitofraction.png', 'qc/fragsize.tsv', 'figures/alignmentsieve.png', - 'figures/fragmentsizes.png' + 'figures/fragmentsizes.png', + 'figures/fripscores.png' ] ) # Differential