forked from henryjuho/sal_enhancers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bootstrappable_sfs_data_enhancers.py
64 lines (43 loc) · 2.06 KB
/
bootstrappable_sfs_data_enhancers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import argparse
import subprocess
import sys
def read_bed_loci(bed_file, chromo):
bed_cmd = 'zgrep ^{} {} | cut -f 4 | sort -u | grep -v "\\."'.format(chromo, bed_file)
gs = subprocess.Popen(bed_cmd, shell=True, stdout=subprocess.PIPE).communicate()[0].decode('utf-8').split('\n')[:-1]
return gs
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-bed_target', help='bed file of target sites', required=True)
parser.add_argument('-vcf', help='VCF file with called SNPs', required=True)
parser.add_argument('-call_fa', help='Fasta file of callable sites', required=True)
parser.add_argument('-region', help='Label to put in region column of output', required=True)
parser.add_argument('-chr', help='chromosome', required=True)
args = parser.parse_args()
# loop through bed file
counter = 0
print('region', 'window', 'sfs', 'n_call', sep='\t', file=sys.stdout)
for locus in read_bed_loci(args.bed_target, args.chr):
counter += 1
# get call
call_cmd = ('zgrep -w {} {} | '
'bedtools getfasta -fi {} -bed stdin | '
'grep -v ">" | tr -d "\\n"').format(locus, args.bed_target, args.call_fa)
print(call_cmd, file=sys.stderr)
n_call = subprocess.Popen(call_cmd, shell=True, stdout=subprocess.PIPE).communicate()[0]
n_call = n_call.decode('utf-8').count('K')
# get sfs
sfs_cmd = ('zgrep -w {} {} | '
'bedtools intersect -header -a {} -b stdin | '
'~/sfs_utils/vcf2raw_sfs.py '
'-mode snp').format(locus, args.bed_target, args.vcf)
print(sfs_cmd, file=sys.stderr)
sfs = subprocess.Popen(sfs_cmd, shell=True, stdout=subprocess.PIPE).communicate()[0]
sfs = sfs.decode('utf-8').split('\n')[:-1]
#sfs = [x.split()[0] for x in sfs]
if len(sfs) == 0:
sfs = '0'
else:
sfs = ','.join(sfs)
print(args.region, locus, sfs, n_call, sep='\t', file=sys.stdout)
if __name__ == '__main__':
main()