Skip to content

Commit

Permalink
Merge pull request #25 from maxplanck-ie/external_peaks
Browse files Browse the repository at this point in the history
External peaks
  • Loading branch information
WardDeb authored Apr 9, 2024
2 parents a3020f7 + 0bb4dbf commit 70978c5
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 38 deletions.
10 changes: 9 additions & 1 deletion src/AOS/atac.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@
show_default=True,
help='Pseudocount to add to the count matrix prior to differential calling.'
)
@click.option(
'--peakset',
required=False,
default=None,
show_default=True,
help='Include an external peak file (bed format).'
)
def main(bamdir,
outputdir,
gtf,
Expand All @@ -133,7 +140,8 @@ def main(bamdir,
upstreamuro,
downstreamuro,
featureuro,
pseudocount
pseudocount,
peakset
):
# Init
pf = Preflight(**locals())
Expand Down
70 changes: 37 additions & 33 deletions src/AOS/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import repeat
import shutil

def idx_to_mit(_f):
count = 0
Expand Down Expand Up @@ -234,39 +235,42 @@ def tsv_to_bed(tsv, bed, grint):
index=False
)

def peak_boundaries(peaks, genomefa, of):
chromdic = {}
with open(genomefa) as f:
for line in f:
if line.startswith('>'):
header = str(line.strip().replace('>', '').split(' ')[0])
chromdic[header] = 0
else:
chromdic[header] += len(line.strip())
bedlis = []
peakchange = 0
with open(peaks) as f:
for line in f:
chrom = str(line.strip().split()[0])
start = int(line.strip().split()[1])
end = int(line.strip().split()[2])
if end > chromdic[chrom]:
bedlis.append(
[chrom, start, chromdic[chrom]]
)
peakchange += 1
else:
bedlis.append(
[chrom, start, end]
)
print("Changed {} peaks.".format(peakchange))
beddf = pd.DataFrame(bedlis)
beddf.to_csv(
of,
sep='\t',
header=False,
index=False
)
def peak_boundaries(peaks, genomefa, peakset, of):
if not peakset:
chromdic = {}
with open(genomefa) as f:
for line in f:
if line.startswith('>'):
header = str(line.strip().replace('>', '').split(' ')[0])
chromdic[header] = 0
else:
chromdic[header] += len(line.strip())
bedlis = []
peakchange = 0
with open(peaks) as f:
for line in f:
chrom = str(line.strip().split()[0])
start = int(line.strip().split()[1])
end = int(line.strip().split()[2])
if end > chromdic[chrom]:
bedlis.append(
[chrom, start, chromdic[chrom]]
)
peakchange += 1
else:
bedlis.append(
[chrom, start, end]
)
print("Changed {} peaks.".format(peakchange))
beddf = pd.DataFrame(bedlis)
beddf.to_csv(
of,
sep='\t',
header=False,
index=False
)
else:
shutil.copyfile(peakset, of)

def PCA_colors(config):
colors = [
Expand Down
6 changes: 4 additions & 2 deletions src/AOS/preflight.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def __init__(
upstreamuro,
downstreamuro,
featureuro,
pseudocount
pseudocount,
peakset
):
def retabspath(_p):
if _p:
Expand All @@ -50,7 +51,8 @@ def retabspath(_p):
'fna': retabspath(genomefasta),
'motif': retabspath(motifs),
'samplesheet': retabspath(samplesheet),
'comparison': retabspath(comparison)
'comparison': retabspath(comparison),
'peakset': retabspath(peakset)
}
self.vars = {
'fragsize': fragsize,
Expand Down
5 changes: 3 additions & 2 deletions src/AOS/rules/peaks.smk
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,11 @@ rule peakbounds:
output:
'peakset/peaks.bed'
params:
config['files']['fna']
fna = config['files']['fna'],
peakset = config['files']['peakset']
threads: 1
run:
peak_boundaries(input[0], params[0], output[0])
peak_boundaries(input[0], params.fna, params.peakset, output[0])

rule uropa:
input:
Expand Down

0 comments on commit 70978c5

Please sign in to comment.