-
Notifications
You must be signed in to change notification settings - Fork 1
/
log_tracker.py
222 lines (174 loc) · 7.58 KB
/
log_tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import re
import itertools
from collections import defaultdict
from sortedcontainers import SortedDict
class CombinedLog:
def __init__(self):
self.logs = SortedDict()
self.views = dict()
self.last_timestamp = None
def add_commit(self, peer_name, appended_commit):
timestamp = appended_commit['timestamp']
if self.last_timestamp is None or self.last_timestamp < timestamp:
self.last_timestamp = timestamp
if timestamp in self.logs:
orig_commit = self.logs[timestamp]
self.merge_commit(peer_name, orig_commit, appended_commit)
else:
logged_commit = dict()
new_file_dict = dict()
for fname, digest in appended_commit['new_file'].items():
new_file_dict[fname] = (digest, peer_name)
modified_dict = dict()
for fname, digest in appended_commit['modified'].items():
modified_dict[fname] = (digest, peer_name)
copied_dict = dict()
for occurence in appended_commit['copied']:
copied_dict[occurence] = peer_name
deleted_dict = dict()
for fname in appended_commit['deleted']:
deleted_dict[fname] = peer_name
logged_commit = {
'new_file': new_file_dict,
'modified': modified_dict,
'copied': copied_dict,
'deleted': deleted_dict,
'timestamp': appended_commit['timestamp'],
}
self.logs[timestamp] = logged_commit
self.replay_from(timestamp)
def merge_commit(self, peer_name, orig_commit, appended_commit):
assert orig_commit['timestamp'] == appended_commit['timestamp']
orig_dict = orig_commit['new_file']
appended_dict = appended_commit['new_file']
for fname, new_digest in appended_dict.items():
if fname not in orig_dict or orig_dict[fname][0] > new_digest:
orig_dict[fname] = (new_digest, peer_name)
orig_dict = orig_commit['modified']
appended_dict = appended_commit['modified']
for fname, new_digest in appended_dict.items():
if fname not in orig_dict or orig_dict[fname][0] > new_digest:
orig_dict[fname] = (new_digest, peer_name)
orig_dict = orig_commit['copied']
appended_dict = appended_commit['copied']
for occurence in appended_dict:
if occurence not in orig_dict:
orig_dict[occurence] = peer_name
orig_dict = orig_commit['deleted']
appended_dict = appended_commit['deleted']
for occurence in appended_dict:
if occurence not in orig_dict:
orig_dict[occurence] = peer_name
def replay_from(self, timestamp):
assert timestamp in self.logs
lower_ind = self.logs.index(timestamp) - 1
if lower_ind >= 0:
lower_timestamp = self.logs.peekitem(lower_ind)[0]
cur_view = self.views[lower_timestamp]
else:
cur_view = dict()
for stamp in self.logs.irange(timestamp):
commit = self.logs[stamp]
cur_view = cur_view.copy()
# apply copy
for (from_name, to_name), peer in commit['copied'].items():
if from_name in cur_view:
# workaround to copy tuple
digest, peer = cur_view[from_name]
cur_view[to_name] = (digest, peer)
# apply creation
for fname, (digest, peer) in commit['new_file'].items():
if fname not in cur_view:
cur_view[fname] = (digest, peer)
# apply modification
for fname, (digest, peer) in commit['modified'].items():
if fname in cur_view:
cur_view[fname] = (digest, peer)
# apply deletion
for fname, peer_name in commit['deleted'].items():
if fname in cur_view:
del cur_view[fname]
self.views[stamp] = cur_view
def get_logical_view(self):
if self.views:
assert self.last_timestamp is not None
return {
fname: digest
for fname, (digest, peer) in self.views[self.last_timestamp].items()
}
else:
return dict()
def get_combined_history(self):
def transform(logged_commit):
return {
'new_file': {
fname: digest
for fname, (digest, peer) in logged_commit['new_file'].items()
},
'modified': {
fname: digest
for fname, (digest, peer) in logged_commit['modified'].items()
},
'copied': set(
(from_name, to_name) for (from_name, to_name), peer in logged_commit['copied'].items()
),
'deleted': set(
fname for fname, peer in logged_commit['deleted'].items()
),
'timestamp': logged_commit['timestamp'],
}
return list(map(transform, self.logs.values()))
class LogTracker:
def __init__(self):
self.log_table = defaultdict(list)
self.combined_log = CombinedLog()
self.md5_regex = re.compile(br'^[0-9a-fA-F]{32}$')
def update_history(self, name, history):
log = self.log_table[name]
# compare old commits
if len(history) < len(log):
return False, 'New history has less commits than earilier history, current={}, earlier={}'.format(
history,
log,
)
if history[:len(log)] != log:
return False, 'New history diverses from earlier history, current={}, earlier={}'.format(
history,
log,
)
# utility function
def md5_verifier(s):
return self.md5_regex.match(s) is not None
# add new commits
prev_commit = log[-1] if log else None
for commit in history[len(log):]:
assert 'timestamp' in commit
assert 'new_file' in commit
assert 'modified' in commit
assert 'copied' in commit
assert 'deleted' in commit
# validate commit correctness
if not (commit['new_file'] or commit['modified'] or commit['copied'] or commit['deleted']):
return False,'Empty commit detected'
hash_values = itertools.chain(commit['new_file'].values(), commit['modified'].values())
if not all(map(md5_verifier, hash_values)):
return False, 'Invalid MD5 hash format'
if prev_commit is not None and prev_commit['timestamp'] > commit['timestamp']:
return False, 'Timestamp is not correct'
# save commit
log.append(commit)
self.combined_log.add_commit(name, commit)
prev_commit = commit
return True, None
def get_expected_logical_view(self):
return self.combined_log.get_logical_view()
def get_expected_history(self, name):
return self.log_table[name]
def get_expected_combined_history(self):
return self.combined_log.get_combined_history()
def verify_logical_view(self, proposed_view):
expected_view = self.combined_log.get_logical_view()
return expected_view == proposed_view
def verify_combined_history(self, proposed_history):
expected_history = self.combined_log.get_combined_history()
return proposed_history == expected_history