-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostprocessing_meta.sh
executable file
·189 lines (169 loc) · 7.17 KB
/
postprocessing_meta.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#!/bin/bash
echo
echo "This is a post-processing script for meta.csv, a file created by Kaleidoscope batch processing."
echo "This script should be run after manual id of recordings from within the output directory."
echo
if [[ $1 == -h* ]]
then
# echo "Usage: postprocessing_meta.sh <KML file> [<species code>]"
echo "Usage: postprocessing_meta.sh [<species code>]"
echo
echo "If run from the output directory of Kaleidoscope batch processing it will:"
echo -e "\t+ check that files meta.csv and id_notes.csv exist"
echo -e "\t+ make a backup of meta.csv before postprocessing"
echo -e "\t+ remove double quotes and carriage returns within meta.csv if they exist, separating multi-species entries by a semicolon"
echo -e "\t+ add system UID into the column REVIEW USERID of meta.csv"
echo -e "\t+ join ID NOTES column of id_notes.csv to meta.csv after replacing commas in notes with semicolons"
echo -e "\t+ discard some useless columns from meta.csv"
echo -e "\t+ create a meta.kml from meta.csv allowing for specification of a K species code to create species-specific KML files"
echo
exit 0
fi
# check that files exist
if [ ! -e meta.csv ]
then
echo "Cannot find meta.csv."
exit 1
fi
if [ ! -e id_notes.csv ]
then
echo "Cannot find id_notes.csv. This files was created by the pre-processing script."
exit 1
fi
#if (( ! $# > 0 )) && (( ! $# < 3 ))
if [[ ! $# < 2 ]]
then
# echo "Usage: postprocessing_meta.sh <KML file> [<species code>]"
echo "Usage: postprocessing_meta.sh [<species code>]"
echo
# echo "You need to specifiy a kml file from the EMT app."
echo "Optionally you can specify a species code from Kaleidoscope to create a one-species-only kml file."
#echo "You cannot give more than two command line arguments."
echo "You cannot give more than one command line argument."
exit 1
fi
if [ -e meta_after_review.csv ]
then
echo "You seem to have run postprocessing already. Restoring original meta.csv."
cp meta_after_review.csv meta.csv
fi
# make backup of manual id work
echo "I am making a backup of your work in meta_after_review.csv before continuing with post-processing meta.csv."
echo "Do note [re]move meta_after_review.csv!"
cp meta.csv meta_after_review.csv
# remove double quotes if they exist
if grep "\"" meta.csv > /dev/null
then
# if two or more species codes have been given to a recording, but separated by a comma
if grep -E '"[^"]+,[^"]+"' meta.csv > /dev/null
then
echo "Separating multi-species entries by a semicolon..."
perl -F/\",\"/ -i -lane'map {tr/,/;/} @F; print join(",", @F)' meta.csv
fi
echo "meta.csv contains double quotes. Removing them ..."
tr -d '"' < meta.csv > meta_noquotes.csv
mv meta_noquotes.csv meta.csv
fi
# check if all fields in MANUAL ID column of meta.csv have been filled
cut -d, -f24 meta.csv | grep "^$" > /dev/null
if [ $? -eq 0 ]
then
echo "Cannot start post-processing. You have not finished filling the MANUAL ID column in meta.csv."
exit 1
else
echo "You seem to have finished manual review of recordings."
fi
# remove carriage returns if they exist
tr -d '\r' < meta.csv > meta_noCR.csv
mv -f meta_noCR.csv meta.csv
# remove asterisks if they exist
# the column names sometimes get asterisks from Kaleidoscope
tr -d '*' < meta.csv > meta_noAst.csv
mv -f meta_noAst.csv meta.csv
# add system UID into column REVIEW USERID
echo "Adding your system user id to the REVIEW USERID column of meta.csv: $USER"
perl -F"," -lane 'BEGIN{chomp($user = `echo \$USER`);}if($.==1){print; for($i=0;$i<@F;$i++){if($F[$i] =~ /^REVIEW USERID$/){$Spalte=$i;}}}else{$F[$Spalte] = $user; print join(",", @F)}' meta.csv > meta_with_USERID.csv
if [ $? -eq 0 ]
then
mv -f meta_with_USERID.csv meta.csv
else
echo "Seem to have trouble adding USER ID. Quitting..."
exit 1
fi
# add date in new column MANUAL ID DATE
echo "Adding new column called MANUAL ID DATE to meta.csv."
perl -F, -ne'BEGIN{chomp($date = `date +%Y-%m-%d`);} chomp; if($. == 1){print; print ",", "MANUAL ID DATE", "\n";}else{print; print ",", $date, "\n";}' meta.csv > meta_with_ID_DATE.csv
if [ $? -eq 0 ]
then
mv -f meta_with_ID_DATE.csv meta.csv
else
echo "Seem to have trouble adding an ID DATE column to meta.csv. Quitting ..."
exit 1
fi
# joining ID Notes column from id_notes.csv to meta.csv after editing id_notes.csv in spreadsheet app.
# The file id_notes.csv needs to have seven columns with IN FILE as the first column
# and ID NOTES as the seventh column
COLNUM=$(head -n 1 id_notes.csv | tr ',' '\n' | wc -l)
if [ $COLNUM -eq 7 ]
then
echo "Joining notes column from id_notes.csv to meta.csv."
# if notes in ID NOTES column contain commas
if grep -E '"[^"]+(,[^"]+)+"' id_notes.csv > /dev/null
then
echo "Replacing commas with semicolons in ID NOTES column..."
perl -i -pe'next if not /"$/; ($pre_note, $note) = $_ =~ /(.*,)(".*"$)/; $note =~ tr/,/;/; $_ = $pre_note . $note . "\n";' id_notes.csv
fi
# remove double quotes
if grep "\"" id_notes.csv > /dev/null
then
echo "id_notes.csv contains double quotes. Removing them ..."
tr -d '"' < id_notes.csv > id_notes_withoutQuotes.csv
mv id_notes_withoutQuotes.csv id_notes.csv
fi
NRCOLNUM=$(head -n 1 meta.csv | tr ',' '\n' | nl | grep "NR" | cut -f1)
INFILECOLNUM=$(head -n 1 meta.csv | tr ',' '\n' | nl | grep "IN FILE" | cut -f1)
join -t, -1 $INFILECOLNUM -2 1 <(sort -t, -nk $NRCOLNUM meta.csv) <(sort -t, -nk 6,6 id_notes.csv | cut -d, -f1,7) > meta_with_id_notes.csv
# # get header line line from bottom to top (after sorting)
# NUMLINES=$(wc -l meta.csv | awk '{print $1-1}')
# tail -1 meta_with_id_notes.csv | tee header.csv | cat - <(head -n $NUMLINES meta_with_id_notes.csv) > meta_with_id_notes_headerTop.csv
if [ $? -eq 0 ]
then
# echo $NUMLINES
mv -f meta_with_id_notes.csv meta.csv
rm -f header.csv
# mv -f meta_with_id_notes_headerTop.csv meta.csv
else
echo "Seem to have trouble joining id notes to meta. Quitting..."
exit 1
fi
else
echo "Cannot join ID NOTES column. File id_notes.csv does not have seven columns."
exit 1
fi
# select columns from meta.csv
echo "Selecting columns from meta.csv. Some can be dropped, really."
cp meta.csv atem.vsc
cut -d, -f1,5-6,11-13,15,17-18,24,26,28,30- meta.csv > atem.vsc
mv atem.vsc meta.csv
# remove noise recordings
echo "Removing lines for noise recordings from meta.csv."
perl -F, -i'.withNoise' -lane'if($.==1){print; for($i=0;$i<@F;$i++){if($F[$i] =~ /^MANUAL ID$/){$palte=$i}}}else{print if not $F[$palte] =~ /noise/i}' meta.csv
# create meta_withINDIR
echo "Creating meta_withINDIR.csv : a version of meta.csv that contains the path to the audio files."
echo "It is therefore suitable for upload to a database."
mv NOISE/* .
rmdir NOISE
rename 's/_000//' *wav
META_NR=$( echo $(( $(wc -l meta.csv | awk '{print $1}') -1 )) )
paste -d, <(echo "INDIR"; for i in $(seq $META_NR); do pwd; done) meta.csv > meta_withINDIR.csv
# create KML file from meta.csv allowing to specify a species
if [ "$1" == "" ]
then
# style2kml.pl --meta meta.csv --EMT-kml $1 > meta.kml
style2kml.pl --meta meta.csv > meta_ManualID.kml
echo "Creating meta_ManualID.kml file from meta.csv."
else
style2kml.pl --meta meta.csv --species $1 > meta_$1.kml
printf "Creating meta_%s.kml file from meta.csv for species %s only.\n" $1 $1
# echo "Creating meta_$1.kml file from meta.csv but only for species $1."
fi