-
Notifications
You must be signed in to change notification settings - Fork 7
/
qa_inference.py
152 lines (140 loc) · 6.18 KB
/
qa_inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import json
from argparse import ArgumentParser
from operator import itemgetter
from typing import Dict, List
from tqdm import tqdm
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, QuestionAnsweringPipeline
intent_questions: Dict[str, List[str]] = {
"LookupSong": [
"Is the intent asking about looking up songs ?",
"Is the user asking about looking up songs ?",
"Are there any websites which advertise or advertise songs for free?",
"Is the users question about looking up songs?",
"Is there a way to ask for help with the search of songs?",
"How much time does someone waste searching for songs?",
"Is the user asked about searching up song?",
"Is a user ask about searching up songs?",
"Does the user consider to look up songs?",
],
"PlaySong": [
"Is the intent asking about playing songs ?",
"Is the user asking about playing songs ?",
"Is the user asking about playing songs?",
"Is your user asking about playing songs?",
"Is the user asking about playing music?",
"Why does the user ask about playing a song?",
"Is a user asking about playing songs?",
"Does my iPhone asks about playing songs?",
"Does the user ask about playing songs?",
"Is the user planning to playing songs ?",
],
"LookupMusic": [
"Is the intent asking about looking up music ?",
"Is the user asking about looking up music ?",
"Are you asking people to look up music?",
"Is the user asking about looking up music?",
"Is the user asking about searching for music?",
"Why does it seem that people are obsessed with looking up music?",
"Is the user asking about searching music?",
"How s/he asked about searching up music?",
"Will the user ask about finding other music?",
"Is it helpful when I ask for help about searching for music on a website?",
"Is it the user asking about looking up songs (or saying songs)?",
"Why is the user so interested in looking up music?",
"Does the user want to look up music ?",
],
"FindMovies": [
"Is the intent asking about finding movies ?",
"Is the user asking about finding movies ?",
"Does someone want to find a movie?",
"Does the user ask about finding movies?",
"Why does user ask to find movies?",
"Is the user asking about finding movies?",
"Is the user about looking movies and trawl?",
"Is the user asking about finding movies. Is it true that it is the same question of no different people?",
"When did you start a game and you start asking about movies?",
"What are the users complaints about getting movies?",
"Does the user hope to find movies ?",
],
"GetTimesForMovie": [
"Is the intent asking about getting the time for movies ?",
"Is the user asking about getting the time for movies ?",
"What's your question about getting the time for movies?",
"Is my mom asking about getting time for movies?",
"How can I get the time for movies?",
"Is the user asking about getting the time for movies?",
"Can you fix my time problem for movies?",
"What is the thing the user is asking about getting a time in movie or TV watching?",
"How do you determine if you have enough time to watch movies?",
"Is the user asking about getting time for movies?",
"If you are a movie watcher, would you like to give you a good amount of time for your filmmaking needs?",
"Is getting the time for movies the purpose of the user?",
],
"FindAttractions": [
"Is the intent asking about finding attractions ?",
"Is the user asking about finding attractions ?",
"Is the user asking about finding attractions?",
"Is the user asking about how to find attractions?",
"How can I find an attraction?",
"What are some of the common questions asked by a visitor about how to find an attraction?",
"Is it the user asking about finding attractions?",
"Is the User Asking about Theme parks?",
"Does the user have trouble finding attractions ?",
],
}
sgd_intents: Dict[str, str] = {
f"{intent}-{q}": q
for intent, questions in intent_questions.items()
for q in questions
}
def classify_intent(example: Dict) -> Dict:
instances = [
(idx, intent, f"yes. no. {turn}", question)
for idx, turn in enumerate(example)
for intent, question in sgd_intents.items()
]
results = nlp(
question=list(map(itemgetter(-1), instances)),
context=list(map(itemgetter(-2), instances)),
)
mappings = {i[:2]: r["answer"] for i, r in zip(instances, results)}
new_dialog = [
{
"id": idx,
"text": turn,
"intent": list(
set(
[
intent.split("-")[0]
for intent in sgd_intents
if mappings.get((idx, intent), None) == "yes."
]
)
),
}
for idx, turn in enumerate(example)
]
return new_dialog
parser = ArgumentParser()
parser.add_argument("--device", type=int, default=-1)
parser.add_argument("--data_file", type=str, default="blender.jsonl")
parser.add_argument("--output_file", type=str, default="intent_sample.json")
args = parser.parse_args()
MODEL_NAME = "adamlin/distilbert-base-cased-sgd_qa-step5000"
REVISION = "negative_sample-questions"
model = AutoModelForQuestionAnswering.from_pretrained(MODEL_NAME, revision=REVISION)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, revision=REVISION)
nlp = QuestionAnsweringPipeline(model, tokenizer, device=args.device)
samples = [json.loads(i) for i in open(args.data_file, "r")]
utterances = []
for s in samples:
tempt = []
for d in s["dialog"]:
p1, p2 = d[0]["text"], d[1]["text"]
tempt.append(p1)
tempt.append(p2)
utterances.append(tempt)
intent_samples = []
for e in tqdm(utterances):
intent_samples.append(classify_intent(e))
json.dump(intent_samples, open(args.output_file, "w"))