-
Notifications
You must be signed in to change notification settings - Fork 1
/
insert_criminal_records.py
109 lines (100 loc) · 3.85 KB
/
insert_criminal_records.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import sqlite3
import xml.etree.ElementTree as ET
import os
from update_db import get_primary_sql_key
def insert_criminal_records(DB_name):
print("Inserting the criminal register information")
conn = sqlite3.connect(DB_name)
c = conn.cursor()
file_address = os.path.join(str(os.getcwd()), "data", "criminal_records.xml")
tree = ET.parse(file_address)
root = tree.getroot()
for child in root:
process_individual_extracts(child, c)
conn.commit()
conn.close()
return
def process_individual_extracts(extract, c):
ICO = get_ICO(extract)
if ICO == -1:
return
else:
court_records = find_court_records(extract)
relevant_paragraphs = find_relevant_paragraphs(extract)
penalties = find_penalties(extract)
primary_sql_key = get_primary_sql_key(c, ICO)
if primary_sql_key != 0:
insert_crimnal_data_to_DB(c, primary_sql_key, court_records, relevant_paragraphs, penalties)
return
def get_ICO(extract):
person_entry = extract[0][0][2]
temp_ico = person_entry.text
if temp_ico.isnumeric() == True:
return int(temp_ico)
else:
return -1
def find_court_records(extract):
# TODO --- Make sure that the records are extracted correctly based on the tags
court_records = []
court_records_rider = extract[1][0][0]
for child in court_records_rider:
if "spisZnacka" in child.tag:
court_records.append(child.text)
if "organizace" in child.tag:
court_records.append(child.text)
if "odvolaci" in child.tag:
for sub_child in child:
if "spisZnacka" in sub_child.tag:
court_records.append(sub_child.text)
if "organizace" in sub_child.tag:
court_records.append(sub_child.text)
return court_records
def find_relevant_paragraphs(extract):
paragraphs = []
paragraphs_rider = extract[1][0][1]
for individual_paragraph in paragraphs_rider:
paragraphs.append(extract_paragraph_info(individual_paragraph))
return paragraphs
def extract_paragraph_info(individual_paragraph):
text_description = "§"
for child in individual_paragraph[0]:
if "Cislo" in child.tag:
text_description += child.text
if "Pismeno" in child.tag:
text_description += ", odst. "
text_description += child.text
if "zakon" in child.tag:
temp_law_description = child.text
# temp_law_description[0].tolower()
text_description += ", "
text_description += temp_law_description
return text_description
def find_penalties(extract):
paragraphs_rider = extract[1][0]
for child in paragraphs_rider:
if "tresty" in child.tag:
return extract_penalties_info(child)
def extract_penalties_info(extract):
penalties = []
for child in extract:
if "druh" in child[0].tag:
penalties.append(child[0].text)
return penalties
def insert_crimnal_data_to_DB(c, primary_sql_key, court_records, relevant_paragraphs, penalties):
first_instance = court_records[1] + ", sp. zn. " + court_records[0]
if len(court_records) > 2:
second_instance = court_records[3] + ", sp. zn. " + court_records[2]
else:
second_instance = None
text_paragraphs = ""
if relevant_paragraphs != None:
for elem in relevant_paragraphs:
text_paragraphs += elem
text_paragraphs += ", "
text_penalties = ""
if penalties != None:
for elem in penalties:
text_penalties += elem
text_penalties += ", "
c.execute("INSERT INTO criminal_records (company_id, first_instance, second_instance, paragraphs, penalties) VALUES (?, ?, ?, ?, ?)", (primary_sql_key, first_instance, second_instance, text_paragraphs[:-2], text_penalties[:-2],))
return 0