This repository has been archived by the owner on Oct 13, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
/
gitdata.py
264 lines (219 loc) · 10.7 KB
/
gitdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# This file is part of gitdata project <https://github.com/adammhaile/gitdata>
# and released under LGPL v3 <https://www.gnu.org/licenses/lgpl-3.0.en.html>
from future import standard_library
standard_library.install_aliases()
import yaml
import logging
import urllib.parse
import os
import shutil
import io
from . import exectools
from .pushd import Dir
from doozerlib import constants
SCHEMES = ['ssh', 'ssh+git', "http", "https"]
class GitDataException(Exception):
"""A broad exception for errors during GitData operations"""
pass
class GitDataBranchException(GitDataException):
pass
class GitDataPathException(GitDataException):
pass
class DataObj(object):
def __init__(self, key, path, data):
self.key = key
self.path = path
self.base_dir = os.path.dirname(self.path)
self.filename = self.path.replace(self.base_dir, '').strip('/')
self.data = data
def __repr__(self):
result = {
'key': self.key,
'path': self.path,
'data': self.data
}
return str(result)
def reload(self):
with io.open(self.path, 'r', encoding="utf-8") as f:
self.data = yaml.full_load(f)
def save(self):
with io.open(self.path, 'w', encoding="utf-8") as f:
yaml.safe_dump(self.data, f, default_flow_style=False)
class GitData(object):
def __init__(self, data_path=None, clone_dir='./', commitish='master',
sub_dir=None, exts=['yaml', 'yml', 'json'], reclone=False, logger=None):
"""
Load structured data from a git source.
:param str data_path: Git url (git/http/https) or local directory path
:param str clone_dir: Location to clone data into
:param str commitish: Repo branch (tag or sha also allowed) to checkout
:param str sub_dir: Sub dir in data to treat as root
:param list exts: List of valid extensions to search for in data, with out period
:param reclone: If a clone is already present, remove it and reclone latest.
:param logger: Python logging object to use
:raises GitDataException:
"""
self.logger = logger
if logger is None:
logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger()
self.clone_dir = clone_dir
self.branch = commitish
self.remote_path = None
self.sub_dir = sub_dir
self.exts = ['.' + e.lower() for e in exts]
self.commit_hash = None
self.origin_url = None
self.reclone = reclone
if data_path:
self.clone_data(data_path)
def clone_data(self, data_path):
"""
Clones data for given data_path:
:param str data_path: Git url (git/http/https) or local directory path
"""
# Remove trailing slash to prevent GitDataBranchException:
self.data_path = data_path.rstrip('/')
data_url = urllib.parse.urlparse(self.data_path)
if data_url.scheme in SCHEMES or (data_url.scheme == '' and ':' in data_url.path):
data_name = os.path.splitext(os.path.basename(data_url.path))[0]
data_destination = os.path.join(self.clone_dir, data_name)
clone_data = True
if self.reclone and os.path.isdir(data_destination):
shutil.rmtree(data_destination)
if os.path.isdir(data_destination):
self.logger.info('Data clone directory already exists, checking commit sha')
with Dir(data_destination):
# check the current status of what's local
rc, out, err = exectools.cmd_gather("git status -sb")
if rc:
raise GitDataException('Error getting data repo status: {}'.format(err))
lines = out.strip().split('\n')
synced = ('ahead' not in lines[0] and 'behind' not in lines[0] and len(lines) == 1)
# check if there are unpushed
# verify local branch
rc, out, err = exectools.cmd_gather("git rev-parse --abbrev-ref HEAD")
if rc:
raise GitDataException('Error checking local branch name: {}'.format(err))
branch = out.strip()
if branch != self.branch:
if not synced:
msg = ('Local branch is `{}`, but requested `{}` and you have uncommitted/pushed changes\n'
'You must either clear your local data or manually checkout the correct branch.'
).format(branch, self.branch)
raise GitDataBranchException(msg)
else:
# Check if local is synced with remote
rc, out, err = exectools.cmd_gather(["git", "ls-remote", self.data_path, self.branch])
if rc:
raise GitDataException('Unable to check remote sha: {}'.format(err))
remote = out.strip().split('\t')[0]
try:
exectools.cmd_assert('git branch --contains {}'.format(remote))
self.logger.info('{} is already cloned and latest'.format(self.data_path))
clone_data = False
except:
if not synced:
msg = ('Local data is out of sync with remote and you have unpushed commits: {}\n'
'You must either clear your local data\n'
'or manually rebase from latest remote to continue'
).format(data_destination)
raise GitDataException(msg)
if clone_data:
if os.path.isdir(data_destination): # delete if already there
shutil.rmtree(data_destination)
self.logger.info('Cloning config data from {}'.format(self.data_path))
if not os.path.isdir(data_destination):
# Clone all branches as we must sometimes reference master /OWNERS for maintainer information
cmd = "git clone --no-single-branch {} {}".format(self.data_path, data_destination)
exectools.cmd_assert(cmd, set_env=constants.GIT_NO_PROMPTS)
exectools.cmd_assert(f'git -C {data_destination} checkout {self.branch}', set_env=constants.GIT_NO_PROMPTS)
self.remote_path = self.data_path
self.data_path = data_destination
elif data_url.scheme in ['', 'file']:
self.remote_path = None
self.data_path = os.path.abspath(self.data_path) # just in case relative path was given
else:
raise ValueError(
'Invalid data_path: {} - invalid scheme: {}'
.format(self.data_path, data_url.scheme)
)
if self.sub_dir:
self.data_dir = os.path.join(self.data_path, self.sub_dir)
else:
self.data_dir = self.data_path
self.origin_url, _ = exectools.cmd_assert(f'git -C {self.data_path} remote get-url origin', strip=True)
self.commit_hash, _ = exectools.cmd_assert(f'git -C {self.data_path} rev-parse HEAD', strip=True)
self.logger.info(f'On commit: {self.commit_hash}')
if not os.path.isdir(self.data_dir):
raise GitDataPathException('{} is not a valid sub-directory in the data'.format(self.sub_dir))
def load_data(self, path='', key=None, keys=None, exclude=None, filter_funcs=None, replace_vars={}):
full_path = os.path.join(self.data_dir, path.replace('\\', '/'))
if path and not os.path.isdir(full_path):
raise GitDataPathException('Cannot find "{}" under "{}"'.format(path, self.data_dir))
if filter_funcs is not None and not isinstance(filter_funcs, list):
filter_funcs = [filter_funcs]
if exclude is not None and not isinstance(exclude, list):
exclude = [exclude]
if key and keys:
raise GitDataException('Must use key or keys, but not both!')
if key:
keys = [key]
if keys:
if not isinstance(keys, list):
keys = [keys]
files = []
for k in keys:
for ext in self.exts:
path = k + ext
if os.path.isfile(os.path.join(full_path, k + ext)):
files.append(path)
break # found for this key, move on
else:
files = os.listdir(full_path)
result = {}
for name in files:
base_name, ext = os.path.splitext(name)
if ext.lower() in self.exts:
data_file = os.path.join(full_path, name)
if os.path.isfile(data_file):
with io.open(data_file, 'r', encoding="utf-8") as f:
raw_text = f.read()
if replace_vars:
try:
raw_text = raw_text.format(**replace_vars)
except KeyError as e:
self.logger.warning('{} contains template key `{}` but no value was provided'.format(data_file, e.args[0]))
try:
data = yaml.full_load(raw_text)
except Exception as e:
raise ValueError(f"error parsing file {data_file}: {e}")
use = True
if exclude and base_name in exclude:
use = False
if use and filter_funcs:
for func in filter_funcs:
use &= func(base_name, data)
if not use:
break
if use:
result[base_name] = DataObj(base_name, data_file, data)
if key and key in result:
result = result[key]
return result
def commit(self, msg):
"""
Commit outstanding data changes
"""
self.logger.info('Commit config: {}'.format(msg))
with Dir(self.data_path):
exectools.cmd_assert('git add .')
exectools.cmd_assert('git commit --allow-empty -m "{}"'.format(msg))
def push(self):
"""
Push changes back to data repo.
Will of course fail if user does not have write access.
"""
self.logger.info('Pushing config...')
with Dir(self.data_path):
exectools.cmd_assert('git push')