-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
200 lines (184 loc) · 9.35 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import pandas as pd
import openpyxl
def cleanData(input):
input = input.replace('[', '')
input = input.replace(']', '')
input = input.replace("'", '')
input = input.replace(" ", '')
cleanInput = input.split(',')
return cleanInput
# Output file will include:
# animal_ID: an integer starting at 0 that represents each individual animal in LabGym
# behavioral_sequence_name: first_second_third...
# behavioral_sequence_alias (optional): alias representing specific sequence of behaviors
# mean_probability: averaged probabilities of each behavior
# start_time: in seconds (to two decimal places)
# end_time: in seconds (to two decimal places)
if __name__ == '__main__':
# Make variables for later UI config
# min_depth and max_depth which is number of behaviors in a row to consider
file_name = "all_events-4.xlsx"
min_depth = 2
max_depth = 2
# Read in Excel file
data = pd.read_excel(file_name)
'''print(data.columns[3])
print(data.at[data.index[0], data.columns[31]])
behavioral_sample = cleanData(data.at[data.index[0], data.columns[31]])
print(behavioral_sample)
print(behavioral_sample[1])
print(behavioral_sample[0])'''
# Make new Excel file to store info
df = pd.DataFrame(columns=['animal_ID', 'behavioral_sequence_name', 'mean_confidence',
'start_time', 'end_time'])
# First row is time
# First column is animal ID (row.name is animal ID)
# Each behavior is ['behavior_name', probability of behavior]
# Each row is the series of behaviors
# Go through each row of the file, starting with row two (animal ID 0)
for i in range(data.shape[0]):
continuing_behavior = False
continuing_sequence = False
probability_sum = 0
probabilities_a = 0
probabilities_b = 0
num_behavior_instances = 0
num_behavior_instances_a = 0
num_behavior_instances_b = 0
behavior_a = ""
behavior_b = ""
start_time = 0
behavioral_sequence_name = ""
first_seq = True
col_index = 1
while col_index < (data.shape[1] - 1):
# Get the next two behavior/probability pairs to look at
current_data_a = cleanData(str(data.at[data.index[i], data.columns[col_index]]))
current_data_b = cleanData(str(data.at[data.index[i], data.columns[col_index + 1]]))
temp_a, probability_a = current_data_a
temp_b, probability_b = current_data_b
# Starting a sequence
# If the behavior (a) is not 'NA' and the next behavior (b) is not 'NA'
if temp_a != "NA" and temp_b != "NA":
# If behavior (a) is the same as behavior (b), for the first time
if temp_a == temp_b and not continuing_behavior and not continuing_sequence:
continuing_behavior = True
start_time = data.columns[col_index]
probability_sum = float(probability_a) + float(probability_b)
num_behavior_instances = 2
behavior_a = temp_a
first_seq = False
# If they're equal, but you already have previous data to link
elif temp_a == temp_b and continuing_behavior:
probability_sum += float(probability_b)
num_behavior_instances += 1
first_seq = False
# If the behaviors are different, but it's only just started
elif temp_a != temp_b and first_seq:
start_time = data.columns[col_index]
if num_behavior_instances > 0:
probabilities_a = probability_sum
num_behavior_instances_a = num_behavior_instances
else:
probabilities_a = float(probability_a)
num_behavior_instances_a = 1
num_behavior_instances = 1
probability_sum = float(probability_b)
behavior_a = temp_a
behavior_b = temp_b
behavioral_sequence_name = behavior_a + "_" + behavior_b
continuing_behavior = True
continuing_sequence = True
first_seq = False
# If the behaviors are different for the first time, set second behavior
elif temp_a != temp_b and not continuing_sequence:
behavior_b = temp_b
probabilities_a = probability_sum
num_behavior_instances_a = num_behavior_instances
num_behavior_instances = 1
probability_sum = float(probability_b)
continuing_sequence = True
behavioral_sequence_name = behavior_a + "_" + behavior_b
first_seq = False
# If the behaviors are different, and you want to end the previous sequence
elif temp_a != temp_b and continuing_sequence:
probabilities_b = probability_sum
num_behavior_instances_b = num_behavior_instances
mean_probability = (probabilities_a + probabilities_b) / (num_behavior_instances_a + num_behavior_instances_b)
end_time = data.columns[col_index]
animal_ID = data.at[i, data.columns[0]]
new_data = {'animal_ID': animal_ID,
'behavioral_sequence_name': behavioral_sequence_name,
'mean_confidence': mean_probability,
'start_time': start_time, 'end_time': end_time}
df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
continuing_sequence = False
continuing_behavior = False
behavior_a = behavior_b
col_index -= 1
start_time = end_time
first_seq = True
# start_time is the time of first occurrence of a in series
# end_time is the column after last occurrence of b in series
else:
if continuing_sequence or continuing_behavior:
probabilities_b = probability_sum
behavior_b = temp_a
behavioral_sequence_name = behavior_a + "_" + behavior_b
num_behavior_instances_b = num_behavior_instances
mean_probability = (probabilities_a + probabilities_b) / (num_behavior_instances_a + num_behavior_instances_b)
end_time = data.columns[col_index]
animal_ID = data.at[i, data.columns[0]]
new_data = {'animal_ID': animal_ID,
'behavioral_sequence_name': behavioral_sequence_name,
'mean_confidence': mean_probability,
'start_time': start_time, 'end_time': end_time}
df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
continuing_sequence = False
continuing_behavior = False
num_behavior_instances -= 1
probability_sum = probabilities_b
behavior_a = behavior_b
col_index -= 1
start_time = end_time
continuing_behavior = False
continuing_sequence = False
probability_sum = 0
probabilities_a = 0
probabilities_b = 0
num_behavior_instances = 0
num_behavior_instances_a = 0
num_behavior_instances_b = 0
behavior_a = ""
behavior_b = ""
behavior_a_start_time = 0
behavior_b_end_time = 0
behavioral_sequence_name = ""
first_seq = True
col_index += 1
if continuing_sequence or continuing_behavior:
probabilities_b = probability_sum
behavior_b = temp_b
behavioral_sequence_name = behavior_a + "_" + behavior_b
num_behavior_instances_b = num_behavior_instances
mean_probability = (probabilities_a + probabilities_b) / (
num_behavior_instances_a + num_behavior_instances_b)
end_time = data.columns[data.shape[1] - 1]
animal_ID = data.at[i, data.columns[0]]
new_data = {'animal_ID': animal_ID,
'behavioral_sequence_name': behavioral_sequence_name,
'mean_confidence': mean_probability,
'start_time': start_time, 'end_time': end_time}
df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True)
continuing_sequence = False
continuing_behavior = False
num_behavior_instances -= 1
probability_sum = probabilities_b
behavior_a = behavior_b
col_index -= 1
start_time = end_time
# Return list of behavioral sequences for each ID
# Includes start time and end time of each behavior sequence occurrence
# Write to an Excel file
output_filename = './all_events-4_results.xlsx'
df.to_excel(output_filename, index=False)