forked from OrBaruk/Malware-HMM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_hmm.sh
executable file
·58 lines (43 loc) · 1.11 KB
/
build_hmm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
# Binaries
mfa2long=./bin/mfa2long
encoder=./bin/encoder
# Folders
clusters=./data/clusters
alignment=./data/train/alignment
sequences=./data/train/sequences
long=./data/train/alignment/long
hmm=./hmm
database=./database
# create dirs
# build stuff used here
# Clean
rm $sequences/*
rm $alignment/*
rm $long/*
rm $hmm/*
rm $database/*
for d in $clusters/*/
do
for c in $d*/
do
filename=$(basename $c)
echo $filename
# Encode files
$encoder $c*.beh > $sequences/$filename.fa
# MSA
#mafft --auto --text $sequences/$filename.fa > $alignment/$filename.mfa 2> /dev/null
muscle -in $sequences/$filename.fa -out $alignment/$filename.mfa -matrix ./data/pair_matrix.txt -gapopen -18.0 -gapextend -1.0 -center 0.0
lines=$(wc -l < $alignment/$filename.mfa)
# Checks if we have enough samples for a hmm
if [ $lines -gt 2 ]
then
# Convert to long
$mfa2long $alignment/$filename.mfa > $long/$filename.long
hmmbuild $hmm/$filename.hmm $alignment/$filename.mfa
fi
done
done
$encoder ./data/AntiVir_test/* > ./data/test/All.fa
cat $hmm/*.hmm > $database/db.mhmm
hmmpress $database/db.mhmm