-
Notifications
You must be signed in to change notification settings - Fork 7
/
_range_expr.py
427 lines (337 loc) · 13.8 KB
/
_range_expr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
from __future__ import annotations
from bisect import bisect
from collections.abc import Iterator, Sized
from functools import total_ordering
from itertools import chain
from typing import Tuple
from ._errors import ExpressionError, TokenError
from ._tokenstream import Token, TokenStream, TokenType
class IntRangeExpr(Sized):
"""An Int Range Expression is a set of integer values represented as a sorted list of IntRange objects."""
_start: int
_end: int
_ranges: list[IntRange]
_length: int
_range_length_indicies: list[int]
def __init__(self, ranges: list[IntRange]):
# Sort the ranges, then combine them where possible
sorted_ranges = sorted(ranges)
self._ranges = [sorted_ranges[0]]
for range in sorted_ranges[1:]:
if (
self._ranges[-1].step == range.step
and self._ranges[-1].end + range.step == range.start
):
self._ranges[-1] = IntRange(self._ranges[-1].start, range.end, range.step)
else:
self._ranges.append(range)
self._start = self.ranges[0].start
self._end = self.ranges[-1].end
# used to binary search ranges for __getitem__
# ie. [32, 100, 132]
self._range_length_indicies = []
length = 0
for r in self.ranges:
length += len(r)
self._range_length_indicies.append(length)
self._length = length
self._validate()
@staticmethod
def from_str(range_str: str) -> IntRangeExpr:
"""Creates a range expression object from a range stored as a string."""
return Parser().parse(range_str)
@staticmethod
def from_list(values: list[int | str]) -> IntRangeExpr:
"""Creates a range expression object from a list of integers/strings containing integers."""
if len(values) == 0:
return IntRangeExpr([])
elif len(values) == 1:
value = int(values[0])
return IntRangeExpr([IntRange(value, value)])
else:
# Convert to integers, remove duplicates, and sort
values_as_int: list[int] = sorted({int(i) for i in values})
# Find all the ranges, and concatenate them
ranges = []
start = values_as_int[0]
step = None
for value in values_as_int[1:]:
if step is None:
end = value
step = end - start
else:
if value - end == step:
end = value
else:
ranges.append(IntRange(start, end, step))
start = value
step = None
ranges.append(IntRange(start, end, step or 1))
return IntRangeExpr(ranges)
def __len__(self) -> int:
return self._length
def __eq__(self, other: object) -> bool:
if not isinstance(other, IntRangeExpr):
raise NotImplementedError
return self.ranges == other.ranges
def __str__(self) -> str:
return ",".join(str(range) for range in self.ranges)
def __repr__(self) -> str:
return f"{type(self).__name__}({self.ranges})"
def __iter__(self) -> Iterator[int]:
return chain(*self.ranges)
def __getitem__(self, index: int) -> int:
"""
Note: since we have to binary search the underlying ranges in the expression, this function is O(log(n))
"""
# support negative indicies
if index < 0:
index = len(self) + index
if not (0 <= index < self._length):
raise IndexError(f"index {index} is out of range")
# gets the index for insertion position
# (ie. we receive the index to the range that contains the item we're looking for)
range_index = bisect(self._range_length_indicies, index)
if range_index == 0:
return self.ranges[0][index]
else:
actual_index = index - self._range_length_indicies[range_index - 1]
return self.ranges[range_index][actual_index]
@property
def start(self) -> int:
"""The smallest value in the range expression."""
return self._start
@property
def end(self) -> int:
"""The largest value in the range expression"""
return self._end
@property
def ranges(self) -> list[IntRange]:
"""read-only property"""
return self._ranges.copy()
def _validate(self) -> None:
"""raises: ValueError - if not valid"""
if len(self) <= 0:
raise ValueError("range expression cannot be empty")
# Validate that the ranges are not overlapping
prev_range: IntRange | None = None
for range_ in self.ranges:
# With the ranges already sorted, we can just ensure that
# earlier entries are completely less than later entries, regardless
# of ascending vs. descending
if prev_range and max(prev_range.start, prev_range.end) >= min(
range_.start, range_.end
):
raise ValueError(
f"Range expression is not valid due to overlapping ranges:\n"
f"\t{prev_range} overlaps with {range_}"
)
prev_range = range_
@total_ordering
class IntRange(Sized):
"""Inclusive on the start and end value"""
_start: int
_end: int
_step: int
_range: range
def __init__(self, start: int, end: int, step: int = 1):
self._start = start
self._end = end
self._step = step
# makes the range inclusive on end value
offset = 0
if self._step > 0:
offset = 1
elif self._step < 0:
offset = -1
self._range = range(self._start, self._end + offset, self._step)
self._validate()
def __str__(self) -> str:
if len(self) == 1:
return str(self._start)
elif self.step == 1:
return f"{self._start}-{self._end}"
else:
return f"{self._start}-{self._end}:{self._step}"
def __repr__(self) -> str:
return f"{type(self).__name__}(start={self._start}, end={self._end}, step={self._step})"
def __len__(self):
return len(self._range)
def __eq__(self, other: object) -> bool:
if not isinstance(other, IntRange):
raise NotImplementedError
return (self.start, self.end, self.step) == (other.start, other.end, other.step)
def __lt__(self, other: object) -> bool:
if not isinstance(other, IntRange):
raise NotImplementedError
return (self.start, self.end, self.step) < (other.start, other.end, other.step)
def __iter__(self) -> Iterator[int]:
return iter(self._range)
def __getitem__(self, index: int) -> int:
if index >= len(self):
raise IndexError(f"index {index} is out of range")
return self._range[index]
@property
def start(self) -> int:
"""read-only property"""
return self._start
@property
def end(self) -> int:
"""read-only property"""
return self._end
@property
def step(self) -> int:
"""read-only property"""
return self._step
def _validate(self) -> None:
"""raises: ValueError - if not valid"""
if self._step == 0:
raise ValueError("Range: step must not be zero")
if self._start < self._end and self._step < 0:
raise ValueError("Range: an ascending range must have a positive step")
if self._start > self._end and self._step > 0:
raise ValueError("Range: a descending range must have a negative step")
if len(self) <= 0:
raise ValueError("Range: cannot be empty")
class PosIntToken(Token):
"""A positive integer"""
class HyphenToken(Token):
"""The '-' character."""
class ColonToken(Token):
"""The ':' character."""
class CommaToken(Token):
"""The ',' character."""
# Map of TokenTypes to their corresponding Token class.
# Required by the TokenStream used by the parser to map
# lexical tokens to the correct token class.
_tokenmap = {
TokenType.POSINT: PosIntToken,
TokenType.HYPHEN: HyphenToken,
TokenType.COLON: ColonToken,
TokenType.COMMA: CommaToken,
}
class Parser:
"""Range expression parser.
Full Grammar:
<RangeExpr> ::= <Element> | <Element>,<RangeExpr>
<Element> ::= <WS>*<Number><WS>* | <WS>*<Range><WS>* | <WS>*<StepRange><WS>*
<Range> ::= <Number><WS>*-<WS>*<Number>
<StepRange> ::= <Range>:<Step>
<Number> ::= Any numeric base-10 value (int)
<Step> ::= base-10 non-zero number
<WS> ::= whitespace character: tabs or spaces
"""
def parse(self, expr: str) -> IntRangeExpr:
"""Generate an IntRangeExpr for the given string range expression.
Raises:
TokenError: If an unexpected token is encountered.
ExpressionError: If the expresssion is malformed.
"""
# Raises: TokenError
self._tokens = TokenStream(expr, supported_tokens=_tokenmap)
if self._tokens.at_end():
raise ExpressionError("Empty expression")
result = self._expression()
if not self._tokens.at_end():
token = self._tokens.next()
raise TokenError(self._tokens.expr, token.value, token.start)
return result
def _integer(self) -> Tuple[str, PosIntToken]:
"""Matches one number (integer) within a range expression
Grammar:
<Number> ::= Any numeric base-10 value (int)
"""
num_sign = "+" # positive/negative 0 is still 0
try:
# Check if there's a hyphen preceding the number, indicating a negative number
if isinstance(self._tokens.lookahead(0), HyphenToken):
num_sign = "-"
self._tokens.next()
token = self._tokens.next()
if not isinstance(token, PosIntToken):
raise ExpressionError(f"Expected {PosIntToken}, received {token}")
except IndexError as e:
raise ExpressionError(
"Unexpectedly reached end of expression when parsing an integer"
) from e
return num_sign, token
def _range(self) -> IntRange:
"""Matches one element within a range expression.
Grammar:
<Element> ::= <WS>*<Number><WS>* | <WS>*<Range><WS>* | <WS>*<StepRange><WS>*
<StepRange> ::= <Range>:<Step>
<Range> ::= <Number><WS>*-<WS>*<Number>
<Number> ::= Any numeric base-10 value (int)
<Step> ::= base-10 non-zero number
<WS> ::= whitespace character: tabs or spaces
Raises
ExpressionError: If the expresssion is malformed.
Returns:
"""
# get the start integer
start_sign, start = self._integer()
# Just a start number? Technically a Range
if self._tokens.at_end() or isinstance(self._tokens.lookahead(0), CommaToken):
return IntRange(
start=int(start_sign + start.value), end=int(start_sign + start.value), step=1
)
token = self._tokens.next()
if not isinstance(token, HyphenToken):
raise ExpressionError(f"Expected {HyphenToken}, received {token}")
# get the end integer
end_sign, end = self._integer()
# Check if we're done with this range, or if there's a step to handle
if self._tokens.at_end() or isinstance(self._tokens.lookahead(0), CommaToken):
return IntRange(
start=int(start_sign + start.value), end=int(end_sign + end.value), step=1
)
# Not done, now expecting a colon to indicate the step
if self._tokens.at_end():
raise ExpressionError(f"Expected {ColonToken}, reached end of expression")
token = self._tokens.next()
if not isinstance(token, ColonToken):
raise ExpressionError(f"Expected {ColonToken}, received {token}")
# get the step integer
step_sign, step = self._integer()
try:
return IntRange(
start=int(start_sign + start.value),
end=int(end_sign + end.value),
step=int(step_sign + step.value),
)
except ValueError:
raise ExpressionError("Failed to create Range") from ValueError
def _expression(self) -> IntRangeExpr:
"""Matches a range expression.
Grammar:
<RangeExpr> ::= <Element> | <Element>,<RangeExpr>
<Element> ::= <WS>*<Number><WS>* | <WS>*<Range><WS>* | <WS>*<StepRange><WS>*
<Range> ::= <Number><WS>*-<WS>*<Number>
<StepRange> ::= <Range>:<Step>
<Number> ::= Any numeric base-10 value (int)
<Step> ::= base-10 non-zero number
<WS> ::= whitespace character: tabs or spaces
Raises:
TokenError: If an unexpected token is encountered
ExpressionError: If the expresssion is malformed.
Returns:
IntRangeExpr: The full range expression parsed
"""
range_ = self._range()
ranges: list[IntRange] = [range_]
try:
while isinstance(self._tokens.lookahead(0), CommaToken):
self._tokens.next()
range_ = self._range()
ranges.append(range_)
else:
if not self._tokens.at_end():
token = self._tokens.next()
raise ExpressionError(f"Expected {CommaToken}, received {token}")
except IndexError:
pass
try:
return IntRangeExpr(ranges)
except ValueError as error:
raise ExpressionError("Failed to create IntRangeExpr") from error