Refactor set_jds methods into mixin class

sot · Sep 25, 2020 · d9ddab3 · d9ddab3
1 parent 104a2ce
commit d9ddab3
Showing 1 changed file with 103 additions and 54 deletions.
diff --git a/cxotime/cxotime.py b/cxotime/cxotime.py
@@ -162,6 +162,94 @@ def now(cls):
     now.__doc__ = Time.now.__doc__
 
 
+class FastDateParserMixin:
+    def set_jds_fast_or_python(self, val1, val2):
+        """Parse the time strings contained in val1 and set jd1, jd2"""
+        # If specific input subformat is required then use the Python parser.
+        # Also do this if Time format class does not define `use_fast_parser`
+        # or if the fast parser is entirely disabled.
+        if self.in_subfmt != '*':
+            self.set_jds_python(self, val1, val2)
+        else:
+            try:
+                self.set_jds_fast(val1)
+            except Exception:
+                # Fall through to the Python parser.
+                self.set_jds_python(self, val1, val2)
+
+    def set_jds_fast(self, val1):
+        """Use fast C parser to parse time strings in val1 and set jd1, jd2"""
+        # Handle bytes or str input and flatten down to a single array of uint8.
+        char_size = 4 if val1.dtype.kind == 'U' else 1
+        val1_str_len = int(val1.dtype.itemsize // char_size)
+        chars = val1.ravel().view(np.uint8)
+
+        if char_size == 4:
+            # Check that this is pure ASCII
+            status = libpt.check_unicode(chars, len(chars) // 4)
+            if status != 0:
+                raise ValueError('input is not pure ASCII')
+            # It might be possible to avoid this copy with cleverness in
+            # parse_times.c but leave that for another day.
+            chars = chars[::4]
+        chars = np.ascontiguousarray(chars)
+
+        # Pre-allocate output components
+        n_times = len(chars) // val1_str_len
+        year = np.zeros(n_times, dtype=np.intc)
+        month = np.zeros(n_times, dtype=np.intc)
+        day = np.zeros(n_times, dtype=np.intc)
+        hour = np.zeros(n_times, dtype=np.intc)
+        minute = np.zeros(n_times, dtype=np.intc)
+        second = np.zeros(n_times, dtype=np.double)
+
+        # Set up parser parameters as numpy arrays for passing to C parser
+        delims = np.array(self.delims, dtype=np.uint8)
+        starts = np.array(self.starts, dtype=np.intc)
+        stops = np.array(self.stops, dtype=np.intc)
+        break_allowed = np.array(self.break_allowed, dtype=np.intc)
+
+        # Call C parser
+        status = libpt.parse_ymdhms_times(chars, n_times, val1_str_len, self.has_day_of_year,
+                                          delims, starts, stops, break_allowed,
+                                          year, month, day, hour, minute, second)
+        if status == 0:
+            # All went well, finish the job
+            jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
+                                  year, month, day, hour, minute, second)
+            jd1.shape = val1.shape
+            jd2.shape = val1.shape
+            self.jd1, self.jd2 = day_frac(jd1, jd2)
+        else:
+            msgs = {1: 'time string ends at beginning of component where break is not allowed',
+                    2: 'time string ends in middle of component',
+                    3: 'required delimiter character not found',
+                    4: 'non-digit found where digit (0-9) required',
+                    5: 'bad day of year (1 <= doy <= 365 or 366 for leap year'}
+            raise ValueError(f'fast C time string parser failed: {msgs[status]}')
+
+    def set_jds_python(self, val1, val2):
+        """Parse the time strings contained in val1 and set jd1, jd2"""
+        # Select subformats based on current self.in_subfmt
+        subfmts = self._select_subfmts(self.in_subfmt)
+        # Be liberal in what we accept: convert bytes to ascii.
+        # Here .item() is needed for arrays with entries of unequal length,
+        # to strip trailing 0 bytes.
+        to_string = (str if val1.dtype.kind == 'U' else
+                     lambda x: str(x.item(), encoding='ascii'))
+        iterator = np.nditer([val1, None, None, None, None, None, None],
+                             flags=['zerosize_ok'],
+                             op_dtypes=[None] + 5 * [np.intc] + [np.double])
+        for val, iy, im, id, ihr, imin, dsec in iterator:
+            val = to_string(val)
+            iy[...], im[...], id[...], ihr[...], imin[...], dsec[...] = (
+                self.parse_string(val, subfmts))
+
+        jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
+                              *iterator.operands[1:])
+        self.jd1, self.jd2 = day_frac(jd1, jd2)
+
+
 class TimeSecs(TimeCxcSec):
     """
     Chandra X-ray Center seconds from 1998-01-01 00:00:00 TT.
@@ -170,7 +258,7 @@ class TimeSecs(TimeCxcSec):
     name = 'secs'
 
 
-class TimeDate(TimeYearDayTime):
+class TimeDate(TimeYearDayTime, FastDateParserMixin):
     """
     Year, day-of-year and time as "YYYY:DOY:HH:MM:SS.sss..." in UTC.
 
@@ -194,6 +282,14 @@ class TimeDate(TimeYearDayTime):
     """
     name = 'date'
 
+    # Class attributes for fast C-parsing
+    delims = (0, 0, ord(':'), ord(':'), ord(':'), ord(':'), ord('.'))
+    starts = (0, -1, 4, 8, 11, 14, 17)
+    stops = (3, -1, 7, 10, 13, 16, -1)
+    # Break before:  y  m  d  h  m  s  f
+    break_allowed = (0, 0, 0, 1, 0, 1, 1)
+    has_day_of_year = 1
+
     def to_value(self, parent=None, **kwargs):
         if self.scale == 'utc':
             return super().value
@@ -202,6 +298,10 @@ def to_value(self, parent=None, **kwargs):
 
     value = property(to_value)
 
+    def set_jds(self, val1, val2):
+        """Parse the time strings contained in val1 and set jd1, jd2"""
+        self.set_jds_fast_or_python(val1, val2)
+
 
 class TimeFracYear(TimeDecimalYear):
     """
@@ -222,7 +322,7 @@ def to_value(self, parent=None, **kwargs):
     value = property(to_value)
 
 
-class TimeGreta(TimeDate):
+class TimeGreta(TimeDate, FastDateParserMixin):
     """
     Date in format YYYYDDD.hhmmsssss, where sssss is number of milliseconds.
 
@@ -273,58 +373,7 @@ def set_jds(self, val1, val2):
             val1 = np.array(['{:.9f}'.format(x) for x in val1.flat])
             val1.shape = shape
 
-        self.set_jds_fast(val1)
-
-    def set_jds_fast(self, val1):
-        """Use fast C parser to parse time strings in val1 and set jd1, jd2"""
-        # Handle bytes or str input and flatten down to a single array of uint8.
-        char_size = 4 if val1.dtype.kind == 'U' else 1
-        val1_str_len = int(val1.dtype.itemsize // char_size)
-        chars = val1.ravel().view(np.uint8)
-
-        if char_size == 4:
-            # Check that this is pure ASCII
-            status = libpt.check_unicode(chars, len(chars) // 4)
-            if status != 0:
-                raise ValueError('input is not pure ASCII')
-            # It might be possible to avoid this copy with cleverness in
-            # parse_times.c but leave that for another day.
-            chars = chars[::4]
-        chars = np.ascontiguousarray(chars)
-
-        # Pre-allocate output components
-        n_times = len(chars) // val1_str_len
-        year = np.zeros(n_times, dtype=np.intc)
-        month = np.zeros(n_times, dtype=np.intc)
-        day = np.zeros(n_times, dtype=np.intc)
-        hour = np.zeros(n_times, dtype=np.intc)
-        minute = np.zeros(n_times, dtype=np.intc)
-        second = np.zeros(n_times, dtype=np.double)
-
-        # Set up parser parameters as numpy arrays for passing to C parser
-        delims = np.array(self.delims, dtype=np.uint8)
-        starts = np.array(self.starts, dtype=np.intc)
-        stops = np.array(self.stops, dtype=np.intc)
-        break_allowed = np.array(self.break_allowed, dtype=np.intc)
-
-        # Call C parser
-        status = libpt.parse_ymdhms_times(chars, n_times, val1_str_len, self.has_day_of_year,
-                                          delims, starts, stops, break_allowed,
-                                          year, month, day, hour, minute, second)
-        if status == 0:
-            # All went well, finish the job
-            jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
-                                  year, month, day, hour, minute, second)
-            jd1.shape = val1.shape
-            jd2.shape = val1.shape
-            self.jd1, self.jd2 = day_frac(jd1, jd2)
-        else:
-            msgs = {1: 'time string ends at beginning of component where break is not allowed',
-                    2: 'time string ends in middle of component',
-                    3: 'required delimiter character not found',
-                    4: 'non-digit found where digit (0-9) required',
-                    5: 'bad day of year (1 <= doy <= 365 or 366 for leap year'}
-            raise ValueError(f'fast C time string parser failed: {msgs[status]}')
+        self.set_jds_fast_or_python(val1, val2)
 
     def to_value(self, parent=None, **kwargs):
         if self.scale == 'utc':