-
Notifications
You must be signed in to change notification settings - Fork 0
/
rosalind_long.py
57 lines (51 loc) · 1.88 KB
/
rosalind_long.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# -*- coding: utf-8 -*-
import difflib
fileName = 'rosalind_long.txt'
with open(fileName) as file:
content = file.readlines()
stringNames = list() #contain names of strings
sequences = list() #contain the actual string sequences
graphElements = list()
nameFlag = False
stringSet = set()
sequenceCount = 0
for line in content:
if(line.startswith('>')):
temp = line[:]
temp = temp.translate(None, '>')
temp = temp.translate(None, '\n')
stringNames.append(temp)
stringSet.add(temp)
nameFlag = True
else:
if(not nameFlag):
temp = line[:]
temp = temp.translate(None, '\n')
prevSequence = str(sequences[len(sequences) - 1][:])
prevSequence = prevSequence + temp
sequences[len(sequences) - 1] = prevSequence
else:
temp = line[:]
temp = temp.translate(None, '\n')
sequences.append(temp)
sequenceCount += 1
nameFlag = False
count = 0
maxSubsequence = sequences[0]
del sequences[0]
while(len(sequences) > 0):
for index1, sequence1 in enumerate(sequences):
minLength = min(len(sequence1), len(maxSubsequence))
# if(index1 != index2):
for index in range(minLength, minLength/2, -1):
if sequence1[0:index+1] == maxSubsequence[-(index+1):]:
# if(len(maxSubsequence) < (len(sequence1) + len(maxSubsequence) - index)):
maxSubsequence = str(maxSubsequence) + str(sequence1[index+1:])
del sequences[index1]
break
if maxSubsequence[0:index+1] == sequence1[-(index+1):]:
# if(len(maxSubsequence) < (len(maxSubsequence) + len(sequence1) - index)):
maxSubsequence = str(sequence1) + str(maxSubsequence[index+1:])
del sequences[index1]
break
print maxSubsequence