forked from GatorIncubator/gatorgrouper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
workbook.py
91 lines (69 loc) · 2.72 KB
/
workbook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""Integrate GatorGrouper with Google Sheets."""
import csv
import math
import logging
import gspread
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
from genetic_algorithm import Student
import config
EMAIL_COL = None
PREFERENCES_COL = None
SKILLS_COLS = set()
STUDENTS = None
GROUPING_SIZE = None
def get(group_size):
"""Retrieve data from Google Sheets and write to a CSV file."""
global EMAIL_COL
global PREFERENCES_COL
global SKILLS_COLS
logging.info(
"Authenticating to Google Sheets...")
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name(
'client_secret.json', scope)
client = gspread.authorize(creds)
logging.info("Opening spreadsheet...")
sheet = client.open(config.WORKBOOK).sheet1
logging.info("Extracting data from spreadsheet...")
records = sheet.get_all_records()
formatted_records = list()
for entry in records:
formatted_entry = list()
for index, (question, response) in enumerate(entry.items()):
if question == 'Email Address':
EMAIL_COL = index - 1 # subtracting one because timestamp column not collected
formatted_entry.append(response)
elif "prefer" in question:
PREFERENCES_COL = index - 1
formatted_entry.append(response)
elif "skill" in question:
SKILLS_COLS.add(index - 1)
formatted_entry.append(response)
formatted_records.append(formatted_entry)
logging.debug("Writing formatted records to " + config.WORKBOOK_CSV + "...")
with open(config.WORKBOOK_CSV, 'w') as output:
writer = csv.writer(output, quoting=csv.QUOTE_ALL)
for item in formatted_records:
writer.writerow(item)
global STUDENTS
global GROUPING_SIZE
# EMAIL_COL = 0
# PREFERENCES_COL = 1
# SKILLS_COLS = [2, 3, 4, 5, 6]
DATA = pd.read_csv(config.WORKBOOK_CSV, header=None)
EMAILS = DATA.iloc[:, EMAIL_COL]
STUDENTS = list()
for current_row, email in enumerate(EMAILS):
skills = list()
for skill_col in SKILLS_COLS:
skills.append(DATA.iat[current_row, skill_col])
preferences_str = DATA.iat[current_row, PREFERENCES_COL]
if isinstance(preferences_str, float) and math.isnan(preferences_str):
preferences = []
else:
preferences = preferences_str.replace(" ", "").split(",")
STUDENTS.append(Student(email, skills, preferences))
# for student in STUDENTS:
# print(str(student) + "\n")
GROUPING_SIZE = math.floor(len(STUDENTS) / group_size)