forked from Maxwe11y/DialogDP
-
Notifications
You must be signed in to change notification settings - Fork 1
/
chatgpt_erc.py
199 lines (174 loc) · 8.61 KB
/
chatgpt_erc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from dataloader_ta import MELDDataset
import pickle, json
from llm.llm import get_prompt, gen_response
from torch.utils.data import Dataset
from tqdm import tqdm
import os
from sklearn.metrics import classification_report
def metrics(path=r'./llm/molweni_6/'):
# emos = {'neutral':0, 'surprise':1, 'fear':2, 'sadness':3, 'joy':4, 'disgust':5, 'anger':6}
# emos = {'joyful': 0, 'mad': 1, 'peaceful': 2, 'neutral': 3, 'sad': 4, 'powerful': 5, 'scared': 6}
emos = {'neutral': 0, 'positive': 1, 'negative': 2}
with open(os.path.join(path, r'emotions.json'), 'r') as f:
emotions = json.load(f)
with open(os.path.join(path, r'responses.json'), 'r') as f:
responses = json.load(f)
res = []
count = 0
for dial in responses:
for response in dial:
pred_label = 3
for emo, label in emos.items():
if emo.lower() in response.lower():
pred_label = label
res.append(pred_label)
true_labels = [i for dial in emotions for i in dial ]
# true_labels_ = []
# res_ = []
# for i, (pred_label, true_label) in enumerate(zip(res, true_labels)):
# if pred_label == -1:
# count+=1
# else:
# res_.append(pred_label)
# true_labels_.append(true_label)
# if true_label >= 0 and true_label < 6:
# res[i] = true_label + 1
# else:
# res[i] = true_label - 2
result = classification_report(true_labels, res, labels=[0,1,2,3], digits=4)
print(result)
def emory_to_dp(downstream_data='Emory', ddp_data='molweni'):
data = pickle.load(open(r'./data/{}_revised/{}_{}_6.pkl'.format(downstream_data, downstream_data, ddp_data), 'rb'))
speakers_, emotion_labels_, sentences_, trainId, testId, validId, structure_ = data
dp_train = downstream2dp(trainId, sentences_, speakers_, structure_, emotion_labels_)
dp_valid = downstream2dp(validId, sentences_, speakers_, structure_, emotion_labels_)
dp_test = downstream2dp(testId, sentences_, speakers_, structure_, emotion_labels_)
return dp_train, dp_valid, dp_test
def meld_to_dp(downstream_data='MELD', ddp_data='molweni'):
# data = pickle.load(open(r'./data/{}_revised/{}_{}_16.pkl'.format(downstream_data, downstream_data, ddp_data), 'rb'))
data = pickle.load(open(r'./data/{}_revised/{}_{}_6_pred_all_freeze.pkl'.format(downstream_data, downstream_data, ddp_data), 'rb'))
videoIDs_, videoSpeakers_, videoLabels_, videoText_, videoAudio_, videoSentence_, trainVid, testVid, videoLabels_3, structure_, action_ = data
# [trainVid_, validVid] = train_test_split(list(trainVid), test_size=0.1)
# dp_train = downstream2dp(trainVid_, videoSentence_, videoSpeakers_, structure_)
# dp_valid = downstream2dp(validVid, videoSentence_, videoSpeakers_, structure_)
dp_test = downstream2dp(testVid, videoSentence_, videoSpeakers_, structure_, videoLabels_)
return dp_test
def downstream2dp(dataid, sentences, speakers, structures, emotion_labels_):
dataset = {}
for k in dataid:
texts = sentences[k]
speakers_ = speakers[k]
emos = emotion_labels_[k]
dial = {}
lines = []
for idx, (text, speaker, emo) in enumerate(zip(texts, speakers_, emos)):
text_toks = text.split(' ')
line = {'speaker': str(speaker.index(1)), 'text': text, 'text_raw': text,
'tokens': text_toks, 'emo':emo, 'turn': idx+1}
lines.append(line)
dial['id'] = k
dial['edus'] = lines
dial['relations'] = [{'type':line['type'], 'x':line['x'], 'y':line['y']} for line in structures[k]]
dial['labels'] = emos
dataset[k] = dial
return dataset
class LLMData(Dataset):
def __init__(self, data):
self.utts = {}
self.his = {}
self.structure_x = {}
self.structure_y = {}
self.keys = []
self.emotions = {}
idx = 0
for k, dial in data.items():
relations = dial['relations']
rels_x = {}
rels_y = {}
for rel in relations:
if rel['x'] not in rels_x:
rels_x[rel['x']] = {'y': [rel['y']], 'dep_type': [rel['type']]}
else:
if rel['y'] not in rels_x[rel['x']]['y']:
rels_x[rel['x']]['y'].append(rel['y'])
rels_x[rel['x']]['dep_type'].append(rel['type'])
if rel['y'] not in rels_y:
rels_y[rel['y']] = {'x': [rel['x']], 'head_type': [rel['type']]}
else:
if rel['x'] not in rels_y[rel['y']]['x']:
rels_y[rel['y']]['x'].append(rel['x'])
rels_y[rel['y']]['head_type'].append(rel['type'])
self.structure_x[idx] = rels_x
self.structure_y[idx] = rels_y
self.utts[idx] = [line['text'] for line in dial['edus']]
self.emotions[idx] = dial['labels']
self.keys.append(idx)
idx += 1
self.len = len(self.keys)
def __getitem__(self, index):
idx = self.keys[index]
his = "\n".join(self.utts[idx])
u_i = self.utts[idx]
rel_head = {}
head_utts = {}
rel_dep ={}
dep_utts = {}
for i in range(len(u_i)):
rel_head[i] = []
head_utts[i] = []
rel_dep[i] = []
dep_utts[i] = []
if i in self.structure_y[idx]:
if 'head_type' in self.structure_y[idx][i]:
rel_head[i] = [tp for tp in self.structure_y[idx][i]['head_type']]
if 'x' in self.structure_y[idx][i]:
head_utts[i] = [u_i[head] for head in self.structure_y[idx][i]['x']]
if i in self.structure_x[idx]:
if 'dep_type' in self.structure_x[idx][i]:
rel_dep[i] = [tp for tp in self.structure_x[idx][i]['dep_type']]
if 'y' in self.structure_x[idx][i]:
dep_utts[i] = [u_i[head] for head in self.structure_x[idx][i]['y']]
# rel_head = [[tp for tp in self.structure[idx][i]['head_type']] for i in range(len(u_i)) if i in self.structure[idx] if ['head_type'] in self.structure[idx][i]]
# head_utts = [[u_i[head] for head in self.structure[idx][i]['x']] for i in range(len(u_i)) if
# i in self.structure[idx] if ['x'] in self.structure[idx][i]]
# rel_dep = [[tp for tp in self.structure[idx][i]['dep_type']] for i in range(len(u_i)) if
# i in self.structure[idx] if ['dep_type'] in self.structure[idx][i]]
# dep_utts = [[u_i[head] for head in self.structure[idx][i]['y']] for i in range(len(u_i)) if
# i in self.structure[idx] if ['y'] in self.structure[idx][i]]
emos = self.emotions[idx]
return his, u_i, rel_head, head_utts, rel_dep, dep_utts, emos
def __len__(self):
return self.len
def generate_chatgpt_inp(task='Sentiment Analysis'):
"""his = "Yes, and it is my dying wish to have that ring.\nSee, if I'm not buried with that ring then my spirit is going to wander the nether world for all eternity",
u_i="Okay, that's enough honey!",
rel_head=['Acknowledgement'],
head_utts=["See, if I'm not buried with that ring then my spirit is going to wander the nether world for all eternity"],
rel_dep=['Comment'],
dep_utts=["I don't know. Let me see the ring."]"""
data = meld_to_dp(downstream_data='MELD', ddp_data='molweni')
# _, _, data = emory_to_dp(downstream_data='Emory', ddp_data='molweni')
data_ = LLMData(data)
emotions_ = []
responses = []
for dat in tqdm(data_):
his, u_i, rel_head, head_utts, rel_dep, dep_utts, emos = dat
emotions_.append(emos)
response = []
for idx, u in enumerate(u_i):
prompt = get_prompt(
his=his,
u_i=u,
rel_head=rel_head[idx],
head_utts=head_utts[idx],
rel_dep=rel_dep[idx],
dep_utts=dep_utts[idx])
res = gen_response(prompt, task=task, engine="gpt-3.5-turbo-0301")
response.append(res)
responses.append(response)
with open('./llm/responses.json', 'w') as f:
json.dump(responses, f)
with open('./llm/emotions.json', 'w') as f:
json.dump(emotions_, f)
# generate_chatgpt_inp(task='SA')
metrics(path='./llm/molweni_chagpt_3/')