-
Notifications
You must be signed in to change notification settings - Fork 0
/
addon_scriptv2.py
132 lines (102 loc) · 3.36 KB
/
addon_scriptv2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import pgdb
from pgdb import Connection
from kafka import KafkaProducer
from kafka import KafkaConsumer
from kafka.admin import KafkaAdminClient, NewTopic
import csv
import avro
import datetime
create_table_cmd = (
"""
CREATE TABLE IF NOT EXISTS sample8451Data (
bask_no varchar(255),
hshd_no varchar(255),
purchase varchar(255),
prod_num varchar(255),
spend money,
unit int,
store varchar(255),
wk varchar(255),
yr int
);
""")
conn = None
producer = KafkaProducer(bootstrap_servers=['localhost:9092'],api_version=(0,10,1))
print("Successfully connected to KafkaProducer")
insert_table_cmd = (
"""
CREATE TABLE IF NOT EXISTS updates (
update varchar(255)
);
""")
try:
conn = pgdb.Connection(database='postgres',host='localhost',user='postgres', password='postgres')
cur = conn.cursor()
cur.execute(create_table_cmd)
cur.execute(insert_table_cmd)
print('Connection successful!')
except:
print('Connection unsuccessful!')
def load_data(filename):
with open(filename, mode='r') as read_file:
read_data = csv.reader(read_file, delimiter=',')
line = 0
line_ct = 0
for lines in read_data:
if (line_ct > 300):
break
elif line_ct == 0: # exclude header row
pass
# print([x.strip(' ') for x in lines])
clean_line = [x.strip(' ') for x in lines]
# print(clean_line)
insert_cmd = """INSERT INTO sample8451Data (bask_no, hshd_no, purchase, prod_num, spend, unit, store, wk, yr) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
update_cmd = """INSERT INTO updates (messages) VALUES ('updated')"""
try:
# print("trying command")
cur.execute(insert_cmd, tuple(tuple(clean_line),))
# print("command executed")
for _ in range(100):
producer.send('Added', b'new value')
cur.execute(update_cmd)
conn.commit()
except:
# print("failed commit. Rolling back!")
conn.rollback()
# cur.execute(update_cmd)
line_ct += 1
print("{} lines were loaded.".format(line_ct))
return
def find_data(query,location):
# tracks amount spent at a particular location
search_cmd = """SELECT {} FROM sample8451Data WHERE store = '{}';""".format(str(query),str(location))
cur.execute(search_cmd)
obj = cur.fetchall()
sum = 0.00
for x in obj:
raw_val = list(x)[0].replace('$','')
if (raw_val[0] == '('):
raw_val = raw_val.replace('(','').replace(')','')
# print("{} - {}".format(sum,raw_val))
sum =round(sum - float(raw_val),2)
else:
# print("{} + {}".format(sum,raw_val))
sum = round(sum + float(raw_val),2)
# print("Sum is now {}".format(sum))
return sum
filename = "5000_transactions.csv"
table_create = 1
if (table_create):
load_data(filename)
if (input("Do you want to run the query to find the amount of money spent at certain store region?(y/n) ") == 'y'):
query = "spend"
location = "SOUTH"
topic_list = []
topic_list.append(NewTopic(name="example_topic", num_partitions=1, replication_factor=1))
result = find_data(query,location)
print("The total amount spent at locations in {} was {}".format(location,result))
else:
print("Query was aborted!")
cur.close()
conn.close()
print("Connection closed.")