-
Notifications
You must be signed in to change notification settings - Fork 1
/
squad.py
45 lines (39 loc) · 1.54 KB
/
squad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import json
import pandas as pd
class Squad:
def __init__(self, input_location):
self.location = input_location
file = open(input_location)
json_file = json.load(file)
self.version = json_file['version']
self.data = json_file['data']
df_builder = []
for sample in self.data:
title = sample['title']
paragraphs = sample['paragraphs']
for paragraph in paragraphs:
context = paragraph['context']
questions = paragraph['qas']
for question in questions:
q_id = question['id']
q_content = question['question']
answers = question['answers']
is_impossible = question['is_impossible']
qas = {
'id':q_id,
'wiki_title':title,
'context':context,
'content':q_content,
'is_impossible':is_impossible
}
if is_impossible:
qas['answer'] = ""
qas['answer_start'] = -1
else:
answer = answers[0]
qas['answer'] = answer['text']
qas['answer_start'] = answer['answer_start']
df_builder.append(qas)
self.df = pd.DataFrame(df_builder)
def get_dataframe(self):
return self.df