-
Notifications
You must be signed in to change notification settings - Fork 0
/
sharedmotif.jl
37 lines (31 loc) · 909 Bytes
/
sharedmotif.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
include("../general-functions/BioTools.jl")
using .BioTools
"""
Finding a Shared Motif
sharedmotif(fasta) -> String
# Arguments
- `fasta`: a collection of DNA strings in FASTA format.
# Returns
- `String`: a longest common substring of the collection.
(If multiple solutions exist, you may return any single solution.)
"""
function sharedmotif(fasta)
strings = [i for i in values(readfasta(fasta))]
shortseq = minimum(strings)
seqlen = length(shortseq)
motif = ""
longestmotif = ""
i = 1
len = 2
while len + i <= seqlen + 1
motif = shortseq[i:i+len-1]
if all([occursin(motif, seq) for seq in strings])
longestmotif = motif
len += 1 # Match, next search for longer matches
else
i += 1 # No match, try next position
end
end
return longestmotif
end
println(sharedmotif("motiffasta.txt"))