-
Notifications
You must be signed in to change notification settings - Fork 2
/
packages.py
202 lines (154 loc) · 6.2 KB
/
packages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
from __future__ import absolute_import, division, unicode_literals
import xml.etree.ElementTree as ElT
import bz2
import io
import os
import uuid
import sqlite3
import config
import re
import datetime
import pickle
import requests
REPODATA_ARC_SUFFIX = "x86_64/"
METADATA_SUFFIX = "repodata/repomd.xml"
PACKAGE_TIMESTAMP_FILE = config.DATA_DIR + 'packages_timestamp.pickled'
YUM_REPODATA_XML_NAMESPACE = 'http://linux.duke.edu/metadata/repo'
def _find_db_link_in_xml(xml_text):
root = ElT.fromstring(xml_text)
for data_elmnt in root.iter('{' + YUM_REPODATA_XML_NAMESPACE + '}data'):
if data_elmnt.attrib['type'] == 'primary_db':
return data_elmnt.find('{' + YUM_REPODATA_XML_NAMESPACE + '}location').attrib['href']
else:
raise ValueError('Data not found in XML')
def _download_one(version):
for repo in config.active_repos():
repo_base_url = config.REPO_BASE_URL + unicode(version) + \
'/' + repo + '/' + REPODATA_ARC_SUFFIX
metadata_request_ulr = repo_base_url + METADATA_SUFFIX
metadata_request = requests.get(metadata_request_ulr)
db_href = _find_db_link_in_xml(metadata_request.text)
db_request_url = repo_base_url + db_href
db_request = requests.get(db_request_url)
if db_request.status_code != 200:
raise IOError('Could not get file ' + db_request_url)
database = bz2.decompress(db_request.content)
temp_filename = config.DATA_DIR + unicode(uuid.uuid1())
final_filename = config.DATA_DIR + repo + '_' + version + '.sqlite'
with io.open(temp_filename, mode='wb') as file:
file.write(database)
os.rename(temp_filename, final_filename)
def download():
for version in config.OS_VERSIONS:
_download_one(version)
def _conn_factory(version, repo):
conn = sqlite3.connect(config.DATA_DIR + repo + '_' + version + '.sqlite')
conn.row_factory = sqlite3.Row
return conn
def _primary_query_execute(conn, repo):
c = conn.cursor()
query = '''
SELECT name, arch, version, epoch,
? AS repo, "release", summary, description, rpm_sourcerpm,
url, rpm_license AS license, location_href, pkgKey
FROM packages
WHERE 1=1
-- AND name = 'kernel'
--LIMIT 15
'''
c.execute(query, (repo,))
return c.fetchall()
def _read_from_dbs(version):
package_list = []
for repo in config.active_repos():
conn = _conn_factory(version, repo)
package_list = package_list + _primary_query_execute(conn, repo)
return package_list
def _prepare(package_list):
prepared = {}
for row in package_list:
prepared.setdefault(row[b'name'], []).append(dict(row))
for name in prepared:
prepared[name].sort(cmp=compare_rpm_versions)
return prepared
def _not_none_epoch(epoch):
if epoch is not None:
return epoch
return '0'
def _is_int(mystring):
try:
int(mystring)
return True
except ValueError:
return False
# http://stackoverflow.com/questions/3206319/how-do-i-compare-rpm-versions-in-python
# hold my beer while I implement this
def _compare_rpm_label_fields(field1, field2):
alphanumeric_matches = lambda field: list(re.finditer(r'[a-zA-Z0-9]+', field))
field1_matches, field2_matches = alphanumeric_matches(field1), alphanumeric_matches(field2)
for match_pair in zip(field1_matches, field2_matches):
value_pair = [match.group() for match in match_pair]
numeric_vals = [_is_int(value) for value in value_pair]
# Non-equal types
if not all(numeric_vals) and any(numeric_vals):
if numeric_vals[1]:
return -1
if numeric_vals[0]:
return 1
# Equal types: Alphanumeric
if not any(numeric_vals):
if value_pair[0] < value_pair[1]:
return -1
if value_pair[0] > value_pair[1]:
return 1
# Equal types: Numeric
if all(numeric_vals):
if int(value_pair[0]) < int(value_pair[1]):
return -1
if int(value_pair[0]) > int(value_pair[1]):
return 1
assert value_pair[0] == value_pair[1]
# Decision by no. of fields
if len(field1_matches) < len(field2_matches):
return -1
if len(field1_matches) > len(field2_matches):
return 1
if len(field1_matches) == len(field2_matches):
return 0
raise RuntimeError('This code should not be reached, because one of the if paths '
'should have been executed.')
def compare_rpm_versions(version_one, version_two):
label_components = ['epoch', 'version', 'release']
for component in label_components:
result = _compare_rpm_label_fields(version_one[component], version_two[component])
if result != 0:
break
return result
def get_version(version):
return _prepare(_read_from_dbs(version))
def get_all():
packages_dict = {}
for os_version in config.OS_VERSIONS:
packages_dict[os_version] = get_version(os_version)
return packages_dict
def minor_os_release(all_packages_dict):
newest_package_version = all_packages_dict['centos-release'][-1]
major_release = newest_package_version['version']
minor_release_integer = re.match(r'.*?\.', newest_package_version['release']).group()[:-1]
return major_release + '.' + minor_release_integer
def set_timestamp_to_now():
now = datetime.datetime.now()
with io.open(PACKAGE_TIMESTAMP_FILE, mode='wb') as myfile:
pickle.dump(now, myfile)
def get_timestamp():
with io.open(PACKAGE_TIMESTAMP_FILE, mode='rb') as myfile:
timestamp = pickle.load(myfile)
return timestamp
def rpm_download_url(package_version, os_version):
return config.REPO_BASE_URL + os_version + '/' + package_version['repo'] + \
'/' + REPODATA_ARC_SUFFIX + package_version['location_href']
def newest_versions_as_list(os_version, all_packages_dict):
newest_versions_list = []
for package_name in all_packages_dict[os_version]:
newest_versions_list.append(all_packages_dict[os_version][package_name][-1])
return newest_versions_list