forked from caraortizmah/x-ray_scripting_out
-
Notifications
You must be signed in to change notification settings - Fork 0
/
step2.sh
executable file
·99 lines (82 loc) · 4.09 KB
/
step2.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash
# A_ini and A_fin are the atom range that represents the
# residue of interest (res A) as occupied core MO
# MO_ini and MO_fin are the range of core MO corresponding
# to C 1s, it is necessary to adapt it for N and O
# out_file is the core MO population in the specific format
# that was created previously in the step1.sh
A_ini="$1" #first atom number for residue A
A_fin="$2" #last atom number for residue A
MO_ini="$3" #first 1s core MO
MO_fin="$4" #last 1s core MO
out_file="$5" #core MO population obtained from step1.sh
atmcore="$6" #atom type from the core space (C, N, S)
wavef="$7" #core orbital type (s,p)
# selecting just the core MOs that represents the target atoms
# here called as the residue A (res A) because of the interest of studying
# amino acids on proteins.
# deleting tmp if necessary
rm -rf resA_mo_3.tmp resA_mo_2.tmp resA_mo_2_1.tmp resA_mo2.tmp resA_mo3.tmp mo_line.tmp
# copying from the linenumber, where the MO target is, to the first blank
# line is found
# in this temporary file, MOs are copied with a subsequent list of atoms
# that correspond to their population contributions to that MO
for ii in $( seq $MO_ini 1 $MO_fin )
do
sed -n "/ $ii /,/^$/p" $out_file >> resA_mo2.tmp
done
# there are until 6 MOs placed in the same numberline in step1.sh output
# it means that sections having a MO and its atom list contribution can be
# repeated up to 6 times. There may be redundancies.
# Removing duplicates, and preserving unique
# and throwing away stderr
awk '!seen[$0]++' resA_mo2.tmp > resA_mo3.tmp 2> /dev/null
# creating mo_line.tmp as temporary file with a list of numberline position
# of the MO list
for ii in $( seq $MO_ini 1 $MO_fin )
do
# getting position lines having redundancies
echo "$(grep -n " $ii " resA_mo3.tmp | cut -d':' -f1)" >> mo_line.tmp
done
# creating a list of uniq linenumber positions including the last linenumber
# of the file
echo "$(wc -l resA_mo3.tmp | cut -d" " -f1)" >> mo_line.tmp
uniq_mo_l="$(cat mo_line.tmp | sort -nu | uniq)"
# the previous list (uniq_mol_l) now is organized by tuples
# where the first position of the tuple is the initial linenumber of the
# MO-atom-list section and the second position of the tuple is the last
# linenumber of that MO-atom-list section
echo $uniq_mo_l | awk -F" " '{for (i=1; i<NF; i++) print $i,$(i+1)}' > mo_line.tmp
# Each line in mo_line.tmp corresponds to a range linenumber of MO-atom-list
# section.
# All the MO-atom-list sections were copied (no redundancies) previously in
# the temporary file resA_mo3.tmp
# for each MO-atom-list section, do:
while read -r line
do
row1="$(echo $line | awk '{print $1}')" #initial position
row2="$(echo $line | awk '{print $2}')" #final position
for jj in $( seq $A_ini 1 $A_fin ) #screening in the atom range
do
# getting MO number list (usually 6 MOs) in that specified position line
head="$(sed -n ''"$row1"'p' resA_mo3.tmp)"
# looking for a specific atom ($jj), with some specific pattern (grep command), in
# a specific range linenumber (sed command) in the file resA_mo3.tmp.
# After cutting it and taking the second field (cut command). The numerical match
# is done (1st awk command) and print it just if contains 9 fields (2nd awk command)
# as in the original out file
sed -n ''"$row1"','"$row2"'p' resA_mo3.tmp | grep -n "${jj} ${atmcore} ${wavef}" | cut -d':' -f2 |\
awk -v x=${jj} '{if($1==x) print $0}' | awk '{if(NF==9) print $0}' > resA_mo_2_1.tmp
# print $head as first line and after the line pattern found in the temporary
# file resA_mo_2_1.tmp
awk -v x="${head}" '{printf "num-1 sym lvl %s\n%s\n\n", x, $0}' resA_mo_2_1.tmp >> resA_mo_2.tmp
done
done < mo_line.tmp
# removing duplicates and throwing away stderr
awk '!seen[$0]++' resA_mo_2.tmp > resA_mo_3.tmp 2> /dev/null
#removing empty lines
sed -i '/^$/d' resA_mo_3.tmp
mv resA_mo_3.tmp resA_mo.out
#comment the following line to check the writing-on-disk process
rm -rf resA_mo_2.tmp resA_mo_2_1.tmp mo_line.tmp resA_mo2.tmp resA_mo3.tmp
#one file as output from this script (resA_mo.out)