-
Notifications
You must be signed in to change notification settings - Fork 0
/
schema_creator.py
103 lines (88 loc) · 3.48 KB
/
schema_creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import re
import subprocess
from typing import TextIO, Dict
import pandas as pd
from utils.configs import (
sheets_mapping,
additional_keys,
different_templates,
RANDOM_STRING,
GUIDELINE_BLOCKS,
TEMPLATE_FILE,
)
from utils.filler_utils import (
split_sheet,
get_requirements_columns,
get_version_name_for_database,
get_guideline_name_for_database,
read_dataframes
)
# IMPORTANT NOTE "sheet" means the original file name, "sheet_name" is the mapped one
# This dict is filled during the initialization
additional_templates = {}
# End of configurations
def get_block(f: TextIO):
text = ""
last_line = "a"
while last_line.lstrip() != ("}" + os.linesep) and last_line != "":
last_line = f.readline()
text += last_line
return text
def get_template_for(sheet: str, template: str):
template = additional_templates.get(sheet, template)
original_key_ref = ""
lines = template.splitlines()
for index, line in enumerate(lines):
if "@relation" in line:
original_key_ref = template.splitlines()[index]
for var_name in additional_keys.get(sheet, []):
new_key_ref = original_key_ref.replace("[name", f"[name, {var_name},")
new_key_ref = new_key_ref.replace(",]", "]")
template = template.replace(original_key_ref, new_key_ref)
return template.replace("Sheet", sheet)
def generate_template(df: Dict[str, pd.DataFrame]):
file = open(TEMPLATE_FILE, "r")
general_part = ""
for _ in range(GUIDELINE_BLOCKS):
general_part += get_block(file)
# this part is only needed to make the template compliant to prisma guideline.
# this two regexes are only needed because the number of spaces/tabs between type and name is variable.
general_part = re.sub(
rf"Sheet{RANDOM_STRING} *Sheet{RANDOM_STRING}\[]", "", general_part
)
for sheet in different_templates:
general_part = re.sub(
rf"{sheet}{RANDOM_STRING} *{sheet}{RANDOM_STRING}\[]", "", general_part
)
general_part += "\n// Guidelines tables"
with open("output.prisma", "w") as f:
f.write(general_part)
# Remove the "Sheet" part that is useless
get_block(file)
# Get the general template
general_template = get_block(file)
for table in different_templates:
additional_templates[table] = get_block(file)
file.close()
for sheet in dataframe:
# sheet_name is the name that will be used in the database
sheet_name = sheets_mapping.get(sheet)
if not sheet_name:
continue
actual_template = get_template_for(sheet_name, general_template)
_, protocols_dataframe = split_sheet(df[sheet])
requirements_columns = get_requirements_columns(protocols_dataframe, sheet)
# prepare the guidelines for the next step
for guideline in requirements_columns:
for column_name in requirements_columns[guideline]:
version_name = get_version_name_for_database(column_name)
guideline = get_guideline_name_for_database(guideline)
new_name = (guideline + version_name).upper()
with open("output.prisma", "a") as f:
f.write(actual_template.replace("7WJsEz", new_name))
subprocess.call(["prisma", "format", "--schema=output.prisma"])
subprocess.call(["prisma", "db", "push", "--schema=output.prisma"])
if __name__ == "__main__":
dataframe = read_dataframes()
generate_template(dataframe)