Skip to content

Commit

Permalink
Refactor set_jds methods into mixin class
Browse files Browse the repository at this point in the history
  • Loading branch information
taldcroft committed Sep 25, 2020
1 parent 104a2ce commit d9ddab3
Showing 1 changed file with 103 additions and 54 deletions.
157 changes: 103 additions & 54 deletions cxotime/cxotime.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,94 @@ def now(cls):
now.__doc__ = Time.now.__doc__


class FastDateParserMixin:
def set_jds_fast_or_python(self, val1, val2):
"""Parse the time strings contained in val1 and set jd1, jd2"""
# If specific input subformat is required then use the Python parser.
# Also do this if Time format class does not define `use_fast_parser`
# or if the fast parser is entirely disabled.
if self.in_subfmt != '*':
self.set_jds_python(self, val1, val2)
else:
try:
self.set_jds_fast(val1)
except Exception:
# Fall through to the Python parser.
self.set_jds_python(self, val1, val2)

def set_jds_fast(self, val1):
"""Use fast C parser to parse time strings in val1 and set jd1, jd2"""
# Handle bytes or str input and flatten down to a single array of uint8.
char_size = 4 if val1.dtype.kind == 'U' else 1
val1_str_len = int(val1.dtype.itemsize // char_size)
chars = val1.ravel().view(np.uint8)

if char_size == 4:
# Check that this is pure ASCII
status = libpt.check_unicode(chars, len(chars) // 4)
if status != 0:
raise ValueError('input is not pure ASCII')
# It might be possible to avoid this copy with cleverness in
# parse_times.c but leave that for another day.
chars = chars[::4]
chars = np.ascontiguousarray(chars)

# Pre-allocate output components
n_times = len(chars) // val1_str_len
year = np.zeros(n_times, dtype=np.intc)
month = np.zeros(n_times, dtype=np.intc)
day = np.zeros(n_times, dtype=np.intc)
hour = np.zeros(n_times, dtype=np.intc)
minute = np.zeros(n_times, dtype=np.intc)
second = np.zeros(n_times, dtype=np.double)

# Set up parser parameters as numpy arrays for passing to C parser
delims = np.array(self.delims, dtype=np.uint8)
starts = np.array(self.starts, dtype=np.intc)
stops = np.array(self.stops, dtype=np.intc)
break_allowed = np.array(self.break_allowed, dtype=np.intc)

# Call C parser
status = libpt.parse_ymdhms_times(chars, n_times, val1_str_len, self.has_day_of_year,
delims, starts, stops, break_allowed,
year, month, day, hour, minute, second)
if status == 0:
# All went well, finish the job
jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
year, month, day, hour, minute, second)
jd1.shape = val1.shape
jd2.shape = val1.shape
self.jd1, self.jd2 = day_frac(jd1, jd2)
else:
msgs = {1: 'time string ends at beginning of component where break is not allowed',
2: 'time string ends in middle of component',
3: 'required delimiter character not found',
4: 'non-digit found where digit (0-9) required',
5: 'bad day of year (1 <= doy <= 365 or 366 for leap year'}
raise ValueError(f'fast C time string parser failed: {msgs[status]}')

def set_jds_python(self, val1, val2):
"""Parse the time strings contained in val1 and set jd1, jd2"""
# Select subformats based on current self.in_subfmt
subfmts = self._select_subfmts(self.in_subfmt)
# Be liberal in what we accept: convert bytes to ascii.
# Here .item() is needed for arrays with entries of unequal length,
# to strip trailing 0 bytes.
to_string = (str if val1.dtype.kind == 'U' else
lambda x: str(x.item(), encoding='ascii'))
iterator = np.nditer([val1, None, None, None, None, None, None],
flags=['zerosize_ok'],
op_dtypes=[None] + 5 * [np.intc] + [np.double])
for val, iy, im, id, ihr, imin, dsec in iterator:
val = to_string(val)
iy[...], im[...], id[...], ihr[...], imin[...], dsec[...] = (
self.parse_string(val, subfmts))

jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
*iterator.operands[1:])
self.jd1, self.jd2 = day_frac(jd1, jd2)


class TimeSecs(TimeCxcSec):
"""
Chandra X-ray Center seconds from 1998-01-01 00:00:00 TT.
Expand All @@ -170,7 +258,7 @@ class TimeSecs(TimeCxcSec):
name = 'secs'


class TimeDate(TimeYearDayTime):
class TimeDate(TimeYearDayTime, FastDateParserMixin):
"""
Year, day-of-year and time as "YYYY:DOY:HH:MM:SS.sss..." in UTC.
Expand All @@ -194,6 +282,14 @@ class TimeDate(TimeYearDayTime):
"""
name = 'date'

# Class attributes for fast C-parsing
delims = (0, 0, ord(':'), ord(':'), ord(':'), ord(':'), ord('.'))
starts = (0, -1, 4, 8, 11, 14, 17)
stops = (3, -1, 7, 10, 13, 16, -1)
# Break before: y m d h m s f
break_allowed = (0, 0, 0, 1, 0, 1, 1)
has_day_of_year = 1

def to_value(self, parent=None, **kwargs):
if self.scale == 'utc':
return super().value
Expand All @@ -202,6 +298,10 @@ def to_value(self, parent=None, **kwargs):

value = property(to_value)

def set_jds(self, val1, val2):
"""Parse the time strings contained in val1 and set jd1, jd2"""
self.set_jds_fast_or_python(val1, val2)


class TimeFracYear(TimeDecimalYear):
"""
Expand All @@ -222,7 +322,7 @@ def to_value(self, parent=None, **kwargs):
value = property(to_value)


class TimeGreta(TimeDate):
class TimeGreta(TimeDate, FastDateParserMixin):
"""
Date in format YYYYDDD.hhmmsssss, where sssss is number of milliseconds.
Expand Down Expand Up @@ -273,58 +373,7 @@ def set_jds(self, val1, val2):
val1 = np.array(['{:.9f}'.format(x) for x in val1.flat])
val1.shape = shape

self.set_jds_fast(val1)

def set_jds_fast(self, val1):
"""Use fast C parser to parse time strings in val1 and set jd1, jd2"""
# Handle bytes or str input and flatten down to a single array of uint8.
char_size = 4 if val1.dtype.kind == 'U' else 1
val1_str_len = int(val1.dtype.itemsize // char_size)
chars = val1.ravel().view(np.uint8)

if char_size == 4:
# Check that this is pure ASCII
status = libpt.check_unicode(chars, len(chars) // 4)
if status != 0:
raise ValueError('input is not pure ASCII')
# It might be possible to avoid this copy with cleverness in
# parse_times.c but leave that for another day.
chars = chars[::4]
chars = np.ascontiguousarray(chars)

# Pre-allocate output components
n_times = len(chars) // val1_str_len
year = np.zeros(n_times, dtype=np.intc)
month = np.zeros(n_times, dtype=np.intc)
day = np.zeros(n_times, dtype=np.intc)
hour = np.zeros(n_times, dtype=np.intc)
minute = np.zeros(n_times, dtype=np.intc)
second = np.zeros(n_times, dtype=np.double)

# Set up parser parameters as numpy arrays for passing to C parser
delims = np.array(self.delims, dtype=np.uint8)
starts = np.array(self.starts, dtype=np.intc)
stops = np.array(self.stops, dtype=np.intc)
break_allowed = np.array(self.break_allowed, dtype=np.intc)

# Call C parser
status = libpt.parse_ymdhms_times(chars, n_times, val1_str_len, self.has_day_of_year,
delims, starts, stops, break_allowed,
year, month, day, hour, minute, second)
if status == 0:
# All went well, finish the job
jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
year, month, day, hour, minute, second)
jd1.shape = val1.shape
jd2.shape = val1.shape
self.jd1, self.jd2 = day_frac(jd1, jd2)
else:
msgs = {1: 'time string ends at beginning of component where break is not allowed',
2: 'time string ends in middle of component',
3: 'required delimiter character not found',
4: 'non-digit found where digit (0-9) required',
5: 'bad day of year (1 <= doy <= 365 or 366 for leap year'}
raise ValueError(f'fast C time string parser failed: {msgs[status]}')
self.set_jds_fast_or_python(val1, val2)

def to_value(self, parent=None, **kwargs):
if self.scale == 'utc':
Expand Down

0 comments on commit d9ddab3

Please sign in to comment.