-
Notifications
You must be signed in to change notification settings - Fork 0
/
sliding_window_16102022.py
51 lines (42 loc) · 1.57 KB
/
sliding_window_16102022.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#! /usr/bin/env python3
import argparse
import sys
parser = argparse.ArgumentParser(
description='Generate fasta file with sliding windows')
parser.add_argument('-i', '--input', help='Input fasta file',
required=True, action='store')
parser.add_argument('-o', '--output', help='Output fasta file',
required=True, action='store')
parser.add_argument('-w', '--window', help='Window size',
required=True, action='store')
parser.add_argument('-s', '--step', help='Step size',
required=True, action='store')
parser.add_argument('-t', '--type', help='forwad or reverse',
required=True, action='store')
args = parser.parse_args()
f = args.input
o = args.output
w = int(args.window)
s = int(args.step)
t = str(args.type)
_final = []
def _output(x):
_header = line.rstrip()+"_"+str(x+1)+"_"+str(x+w)
_sequence = seq[x:x+w]
_final.append(_header)
_final.append(_sequence)
with open(f, "r") as q:
for line in q:
if line.startswith(">"):
seq = next(q, ' ').rstrip()
seqlen = seq.__len__()
if s*w > seqlen:
sys.exit(
"Error: sequence length is less than calculated sequence length")
steps = int(((seqlen-w)/s)+1)
if t == "forward":
[_output(i) for i in range(0, steps*s, s)]
elif t == "reverse":
[_output(i-w) for i in range(seqlen, seqlen-steps*s, -s)]
print(*_final, sep="\n", file=open(o, "w"))
print("file written to "+o)