-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
49 lines (40 loc) · 1.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import requests
import json
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt')
# Get all bills by last updated date
limit = 3
headers = {'Content-Type': 'application/json'}
url = "https://api.congress.gov/v3/bill?api_key=Y1hRdQCbKpOFFQVY1sUsxyiAzZ3XQgejdC45GRP1&limit=" + str(limit)
payload = {}
response = requests.request("GET", url, headers=headers, data=payload)
obj = json.loads(response.text)
json_formatted_str = json.dumps(obj, indent=4)
print(json_formatted_str)
# TOKENIZE TERMS & INDEX DOCUMENTS
terms = []
stopwords = ["for", "of", "the", ",", "or", "a", "to"]
for i in range(len(obj["bills"])):
title = obj["bills"][i]["title"]
latestActionDate = obj["bills"][i]["latestAction"]["actionDate"]
tokenlist = word_tokenize(title)
for token in tokenlist:
if token not in terms and token not in stopwords:
terms.append(token)
print(latestActionDate, title)
print(terms)
# Stopwords
# Lemmatization AND / OR Stemming
# Tokenization Inverted Index OR other method
# WRITE KEY PERFORMANCE INDICATORS
# Query Time
# Index Time
# Precision
# Recall
# F-Measure
# WRITE CHARTS
# Count of Keywords Monthly Bar Chart
# WRITE QUERY-ABILITY - SINGLE & MULTIPLE TERM
# {"healthcare"} or {"health care"} or {"healthcare", "standards", "consumer"}
# Lemmtization AND / OR Stemming on Query