From 9147cb13a96b29dbb2eb1b575dc0cfccd36f976f Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Mon, 2 Jan 2023 16:07:35 -0500 Subject: [PATCH 1/3] Use astropy fast time parsing not cxotime fast parsing --- cxotime/cxotime.py | 173 +++----------------- cxotime/parse_times.c | 358 ------------------------------------------ setup.py | 32 +--- 3 files changed, 24 insertions(+), 539 deletions(-) delete mode 100644 cxotime/parse_times.c diff --git a/cxotime/cxotime.py b/cxotime/cxotime.py index fae7f42..c42b906 100644 --- a/cxotime/cxotime.py +++ b/cxotime/cxotime.py @@ -33,41 +33,6 @@ # allow auto downloads. iers.conf.auto_download = False -# Input types in the parse_times.c code -array_1d_char = npct.ndpointer(dtype=np.uint8, ndim=1, flags="C_CONTIGUOUS") -array_1d_double = npct.ndpointer(dtype=np.double, ndim=1, flags="C_CONTIGUOUS") -array_1d_int = npct.ndpointer(dtype=np.intc, ndim=1, flags="C_CONTIGUOUS") - -# load the library, using numpy mechanisms -libpt = npct.load_library("parse_times", Path(__file__).parent) - -# Set up the return types and argument types for parse_ymdhms_times() -# int parse_ymdhms_times(char *times, int n_times, int max_str_len, -# char *delims, int *starts, int *stops, int *break_allowed, -# int *years, int *months, int *days, int *hours, -# int *minutes, double *seconds) -libpt.parse_ymdhms_times.restype = c_int -libpt.parse_ymdhms_times.argtypes = [ - array_1d_char, - c_int, - c_int, - c_int, - array_1d_char, - array_1d_int, - array_1d_int, - array_1d_int, - array_1d_int, - array_1d_int, - array_1d_int, - array_1d_int, - array_1d_int, - array_1d_double, -] -libpt.check_unicode.restype = c_int - -# Set up returns types and args for the unicode checker -libpt.check_unicode.argtypes = [array_1d_char, c_int] - def print_time_conversions(): """Interface to entry_point script ``cxotime`` to print time conversions""" @@ -399,84 +364,6 @@ def print_conversions(self): print("\n".join(lines)) -class FastDateParserMixin: - def set_jds_fast(self, val1): - """Use fast C parser to parse time strings in val1 and set jd1, jd2""" - # Handle bytes or str input and flatten down to a single array of uint8. - char_size = 4 if val1.dtype.kind == "U" else 1 - val1_str_len = int(val1.dtype.itemsize // char_size) - chars = val1.ravel().view(np.uint8) - - if char_size == 4: - # Check that this is pure ASCII - status = libpt.check_unicode(chars, len(chars) // 4) - if status != 0: - raise ValueError("input is not pure ASCII") - # It might be possible to avoid this copy with cleverness in - # parse_times.c but leave that for another day. - chars = chars[::4] - chars = np.ascontiguousarray(chars) - - # Pre-allocate output components - n_times = len(chars) // val1_str_len - year = np.zeros(n_times, dtype=np.intc) - month = np.zeros(n_times, dtype=np.intc) - day = np.zeros(n_times, dtype=np.intc) - hour = np.zeros(n_times, dtype=np.intc) - minute = np.zeros(n_times, dtype=np.intc) - second = np.zeros(n_times, dtype=np.double) - - # Set up parser parameters as numpy arrays for passing to C parser - delims = np.array(self.delims, dtype=np.uint8) - starts = np.array(self.starts, dtype=np.intc) - stops = np.array(self.stops, dtype=np.intc) - break_allowed = np.array(self.break_allowed, dtype=np.intc) - - # Call C parser - status = libpt.parse_ymdhms_times( - chars, - n_times, - val1_str_len, - self.has_day_of_year, - delims, - starts, - stops, - break_allowed, - year, - month, - day, - hour, - minute, - second, - ) - if status == 0: - # All went well, finish the job - jd1, jd2 = erfa.dtf2d( - self.scale.upper().encode("ascii"), - year, - month, - day, - hour, - minute, - second, - ) - jd1.shape = val1.shape - jd2.shape = val1.shape - self.jd1, self.jd2 = day_frac(jd1, jd2) - else: - msgs = { - 1: ( - "time string ends at beginning of component where break is not" - " allowed" - ), - 2: "time string ends in middle of component", - 3: "required delimiter character not found", - 4: "non-digit found where digit (0-9) required", - 5: "bad day of year (1 <= doy <= 365 or 366 for leap year", - } - raise ValueError(f"fast C time string parser failed: {msgs[status]}") - - class TimeSecs(TimeCxcSec): """ Chandra X-ray Center seconds from 1998-01-01 00:00:00 TT. @@ -486,7 +373,7 @@ class TimeSecs(TimeCxcSec): name = "secs" -class TimeDate(TimeYearDayTime, FastDateParserMixin): +class TimeDate(TimeYearDayTime): """ Year, day-of-year and time as "YYYY:DOY:HH:MM:SS.sss..." in UTC. @@ -511,14 +398,6 @@ class TimeDate(TimeYearDayTime, FastDateParserMixin): name = "date" - # Class attributes for fast C-parsing - delims = (0, 0, ord(":"), ord(":"), ord(":"), ord(":"), ord(".")) - starts = (0, -1, 4, 8, 11, 14, 17) - stops = (3, -1, 7, 10, 13, 16, -1) - # Break before: y m d h m s f - break_allowed = (0, 0, 0, 1, 0, 1, 1) - has_day_of_year = 1 - def to_value(self, parent=None, **kwargs): if self.scale == "utc": return super().value @@ -531,7 +410,7 @@ def set_jds(self, val1, val2): """Parse the time strings contained in val1 and set jd1, jd2""" if val2 is not None: raise ValueError(f"cannot supply val2 for {self.name} format") - self.set_jds_fast(val1) + self.jd1, self.jd2 = self.get_jds_fast(val1, val2) class TimeFracYear(TimeDecimalYear): @@ -554,7 +433,7 @@ def to_value(self, parent=None, **kwargs): value = property(to_value) -class TimeGreta(TimeDate, FastDateParserMixin): +class TimeGreta(TimeDate): """ Date as a string in format 'YYYYDDD.hhmmsssss', where sssss is number of milliseconds. @@ -585,12 +464,15 @@ class TimeGreta(TimeDate, FastDateParserMixin): # stops: position where component ends (-1 => continue to end of string) # Before: yr mon doy hour minute second frac - delims = (0, 0, 0, ord("."), 0, 0, 0) - starts = (0, -1, 4, 7, 10, 12, 14) - stops = (3, -1, 6, 9, 11, 13, -1) - # Break before: y m d h m s f - break_allowed = (0, 0, 0, 1, 0, 1, 1) - has_day_of_year = 1 + + fast_parser_pars = dict( + delims=(0, 0, 0, ord("."), 0, 0, 0), + starts=(0, -1, 4, 7, 10, 12, 14), + stops=(3, -1, 6, 9, 11, 13, -1), + # Break before: y m d h m s f + break_allowed=(0, 0, 0, 1, 0, 1, 1), + has_day_of_year=1, + ) def _check_val_type(self, val1, val2): if val2 is not None: @@ -600,18 +482,11 @@ def _check_val_type(self, val1, val2): raise TypeError( "Input values for {0} class must be string or number".format(self.name) ) - return val1, None - def set_jds(self, val1, val2): - """Parse the time strings contained in val1 and set jd1, jd2""" - # If specific input subformat is required then use the Python parser. - # Also do this if Time format class does not define `use_fast_parser` - # or if the fast parser is entirely disabled. - # Allow for float input if val1.dtype.kind in ("f", "i"): val1 = np.array(["{:.9f}".format(x) for x in val1.flat]).reshape(val1.shape) - self.set_jds_fast(val1) + return val1, None def to_value(self, parent=None, **kwargs): if self.scale == "utc": @@ -625,7 +500,7 @@ def to_value(self, parent=None, **kwargs): value = property(to_value) -class TimeMaude(TimeDate, FastDateParserMixin): +class TimeMaude(TimeDate): """ Date as a 64-bit integer in format YYYYDDDhhmmsss, where sss is number of milliseconds. @@ -655,13 +530,15 @@ class TimeMaude(TimeDate, FastDateParserMixin): # stops: position where component ends (-1 => continue to end of string) # Before: yr mon doy hour minute second frac - use_fast_parser = True - delims = (0, 0, 0, 0, 0, 0, 0) - starts = (0, -1, 4, 7, 9, 11, 13) - stops = (3, -1, 6, 8, 10, 12, -1) - # Break before: y m d h m s f - break_allowed = (0, 0, 0, 1, 0, 1, 1) - has_day_of_year = 1 + fast_parser_pars = dict( + use_fast_parser=True, + delims=(0, 0, 0, 0, 0, 0, 0), + starts=(0, -1, 4, 7, 9, 11, 13), + stops=(3, -1, 6, 8, 10, 12, -1), + # Break before: y m d h m s f, + break_allowed=(0, 0, 0, 1, 0, 1, 1), + has_day_of_year=1, + ) def _check_val_type(self, val1, val2): if val2 is not None: @@ -677,10 +554,6 @@ def _check_val_type(self, val1, val2): return val1, None - def set_jds(self, val1, val2): - """Parse the time strings contained in val1 and set jd1, jd2""" - self.set_jds_fast(val1) - def to_value(self, parent=None, **kwargs): if self.scale == "utc": out = super().value diff --git a/cxotime/parse_times.c b/cxotime/parse_times.c deleted file mode 100644 index 2fe052a..0000000 --- a/cxotime/parse_times.c +++ /dev/null @@ -1,358 +0,0 @@ -#include -#include - -// ASCII codes for '0' and '9' -const char char_zero = 48; -const char char_nine = 57; - -// Distutils on Windows automatically exports ``PyInit_parse_times``, -// create dummy to prevent linker complaining about missing symbol. -// Based on convolution/src/convolve.c. -#if defined(_MSC_VER) -void PyInit_parse_times(void) -{ - return; -} -#endif - -int parse_int_from_char_array(char *chars, int str_len, - char delim, int idx0, int idx1, - int *val) -// Parse integer from positions idx0:idx1 (inclusive) within chars, optionally -// starting with a delimiter. -// -// Example: "2020-01-24" -// ^^^ -// 0123456789 -// -// int day, status; -// status = parse_int_from_char_array("2020-01-24", &day, 10, '-', 7, 9); -// -// Inputs: -// char *chars: time string -// int str_len: length of *chars string -// char delim: optional character at position idx0 when delim > 0 -// int idx0: start index for parsing integer -// int idx1: stop index (inclusive) for parsing integer -// -// Output: -// int *val: output value -// -// Returns: -// int status: -// 0: OK -// 1: String ends at the beginning of requested value -// 2: String ends in the middle of requested value -// 3: Required delimiter character not found -// 4: Non-digit found where digit (0-9) required -{ - int mult = 1; - char digit; - char ch; - int ii; - - // Check if string ends (has 0x00) before str_len. Require that this segment - // of the string is entirely contained in the string (idx1 < str_len), - // remembering that idx1 is inclusive and counts from 0. - if (idx1 < str_len) { - for (ii = idx0; ii <= idx1; ii++) { - if (chars[ii] == 0) { - str_len = ii; - break; - } - } - } - // String ends before the beginning of requested value, - // e.g. "2000-01" (str_len=7) for day (idx0=7). This is OK in some - // cases, e.g. before hour (2000-01-01). - if (idx0 >= str_len) { - return 1; - } - - // String ends in the middle of requested value. This implies a badly - // formatted time. - if (idx1 >= str_len) { - return 2; - } - - // Look for optional delimiter character, e.g. ':' before minute. If delim == 0 - // then no character is required. - if (delim > 0) { - // Required start character not found. - if (chars[idx0] != delim) { - return 3; - } - idx0 += 1; - } - - // Build up the value using reversed digits - *val = 0; - for (ii = idx1; ii >= idx0; ii--) - { - ch = chars[ii]; - if (ch < char_zero || ch > char_nine) { - // Not a digit, implying badly formatted time. - return 4; - } - digit = ch - char_zero; - *val += digit * mult; - mult *= 10; - } - - return 0; -} - -int parse_frac_from_char_array(char *chars, int str_len, char delim, int idx0, - double *val) -// Parse trailing fraction starting from position idx0 in chars. -// -// Example: "2020-01-24T12:13:14.5556" -// ^^^^^ -// 012345678901234567890123 -// -// int status; -// float frac; -// status = parse_frac_from_char_array("2020-01-24T12:13:14.5556", &frac, 24, '.', 19); -// -// Inputs: -// char *chars: time string -// int str_len: length of *chars string -// char delim: optional character at position idx0 when delim > 0 -// int idx0: start index for parsing integer -// -// Output: -// double *val: output value -// -// Returns: -// int status: -// 0: OK -// 1: String ends at the beginning of requested value -// 3: Required delimiter character not found -// 4: Non-digit found where digit (0-9) required -{ - double mult = 0.1; - char digit; - char ch; - int ii; - - *val = 0.0; - - // String ends at exactly before the beginning of requested fraction. - // e.g. "2000-01-01 12:13:14". Fraction value is zero. - if (idx0 == str_len) { - return 1; - } - - // Look for optional delimiter character, e.g. '.' before fraction. If delim == 0 - // then no character is required. This can happen for unusual formats like - // Chandra GRETA time yyyyddd.hhmmssfff. - if (delim > 0) { - // Required start character not found. - if (chars[idx0] != delim) { - return 3; - } - idx0 += 1; - } - - for (ii = idx0; ii < str_len; ii++) - { - ch = chars[ii]; - if (ch < char_zero || ch > char_nine) { - // Not a digit, implying badly formatted time. - return 4; - } - digit = ch - char_zero; - *val += digit * mult; - mult /= 10.0; - } - return 0; -} - -static inline int is_leap_year (int year) -// Determine if year is a leap year. -// Inspired by from https://stackoverflow.com/questions/17634282 -{ - return ((year & 3) == 0) - && ((year % 100 != 0) - || (((year / 100) & 3) == 0)); -} - -int convert_day_of_year_to_month_day(int year, int day_of_year, int *month, int *day_of_month) -// Convert year and day_of_year into month, day_of_month -// Inspired by from https://stackoverflow.com/questions/17634282, determine -{ - int leap_year = is_leap_year(year) ? 1 : 0; - int days_in_year = leap_year ? 366 : 365; - const unsigned short int _mon_yday_normal[13] = - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }; - const unsigned short int _mon_yday_leap[13] = - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }; - const unsigned short int *mon_yday = leap_year ? _mon_yday_leap :_mon_yday_normal; - int mon; - - if (day_of_year < 1 || day_of_year > days_in_year) { - // Error in day_of_year - return 5; - } - - for (mon = 1; mon <= 12; mon++) { - if (day_of_year <= mon_yday[mon]) { - *month = mon; - *day_of_month = day_of_year - mon_yday[mon - 1]; - break; - } - } - - return 0; -} - -int parse_ymdhms_times(char *times, int n_times, int max_str_len, int has_day_of_year, - char *delims, int *starts, int *stops, int *break_allowed, - int *years, int *months, int *days, int *hours, - int *minutes, double *seconds) -// Parse a string time in `chars` which has year, (month, day | day_of_year), -// hour, minute, seconds components. -// -// Examples: "2020-01-24T12:13:14.5556", "2020:123:12:13:14.5556" -// -// Inputs: -// char *times: time characters (flattened n_times x max_str_len array) -// int n_times: number of time strings (each max_str_len long) -// int max_str_len: max length of string (may be null-terminated before this) -// int has_day_of_year: time includes day-of-year instead of month, day-of-month -// char *delims: array of delimiters preceding yr, mon, day, hr, min, isec, frac -// components. Value of 0 means no preceding delimiter. -// int *starts, *stop: arrays of start/stop indexes into time string. -// int *break_allowed: if true (1) then the time string can legally end just -// before the corresponding component (e.g. "2000-01-01" is a valid time but -// "2000-01-01 12" is not). -// -// Outputs: -// int *year, *month, *day, *hour, *minute: output components (n_times long) -// double *second: output seconds (n_times long) -// -// Returns: -// int status: -// 0: OK -// 1: String ends at the beginning of requested value -// 2: String ends in the middle of requested value -// 3: Required delimiter character not found -// 4: Non-digit found where digit (0-9) required -// 5: Bad day of year -{ - int str_len; - int status = 0; - int isec; - double frac; - char *time; - int *year, *month, *day, *hour, *minute; - double *second; - int i, ii; - - for (ii = 0; ii < n_times; ii++) - { - time = times + ii * max_str_len; - year = years + ii; - month = months + ii; - day = days + ii; - hour = hours + ii; - minute = minutes + ii; - second = seconds + ii; - - // Initialize default values - *month = 1; - *day = 1; - *hour = 0; - *minute = 0; - *second = 0.0; - - // Parse "2000-01-12 13:14:15.678" - // 01234567890123456789012 - - // Check for null termination before max_str_len. If called using a contiguous - // numpy 2-d array of chars there may or may not be null terminations. - str_len = max_str_len; - for (i = 0; i < max_str_len; i++) { - if (time[i] == 0) { - str_len = i; - break; - } - } - - // Get each time component year, month, day, hour, minute, isec, frac - status = parse_int_from_char_array(time, str_len, delims[0], starts[0], stops[0], year); - if (status) { - if (status == 1 && break_allowed[0]) { continue; } - else { return status; } - } - - // Optionally parse month - if (! has_day_of_year) { - status = parse_int_from_char_array(time, str_len, delims[1], starts[1], stops[1], month); - if (status) { - if (status == 1 && break_allowed[1]) { continue; } - else { return status; } - } - } - - // This might be day-of-month or day-of-year - status = parse_int_from_char_array(time, str_len, delims[2], starts[2], stops[2], day); - if (status) { - if (status == 1 && break_allowed[2]) { continue; } - else { return status; } - } - - if (has_day_of_year) { - // day contains day of year at this point, but convert it to day of month - status = convert_day_of_year_to_month_day(*year, *day, month, day); - if (status) { - return status; - } - } - - status = parse_int_from_char_array(time, str_len, delims[3], starts[3], stops[3], hour); - if (status) { - if (status == 1 && break_allowed[3]) { continue; } - else { return status; } - } - - status = parse_int_from_char_array(time, str_len, delims[4], starts[4], stops[4], minute); - if (status) { - if (status == 1 && break_allowed[4]) { continue; } - else { return status; } - } - - status = parse_int_from_char_array(time, str_len, delims[5], starts[5], stops[5], &isec); - if (status) { - if (status == 1 && break_allowed[5]) { continue; } - else { return status; } - } - - status = parse_frac_from_char_array(time, str_len, delims[6], starts[6], &frac); - if (status) { - if (status != 1 || ! break_allowed[6]) { return status; } - } - - *second = isec + frac; - } - - return 0; -} - -int check_unicode(char *chars, int n_unicode_char) -// Check if *chars is pure ASCII, assuming input is UTF-32 -{ - char *ch; - int ii; - - ch = chars; - for (ii = 0; ii < n_unicode_char; ii++) - { - ch++; - if (*ch++) return 1; - if (*ch++) return 1; - if (*ch++) return 1; - } - return 0; - -} diff --git a/setup.py b/setup.py index 77790e6..c4369ed 100644 --- a/setup.py +++ b/setup.py @@ -1,40 +1,11 @@ # Licensed under a 3-clause BSD style license - see LICENSE.rst -import sys - -from setuptools import Extension, setup +from setuptools import setup try: from testr.setup_helper import cmdclass except ImportError: cmdclass = {} -# NOTE: add '-Rpass-missed=.*' to ``extra_compile_args`` when compiling with clang -# to report missed optimizations. -if sys.platform.startswith("win"): - extra_compile_args = [] - extra_link_args = ["/EXPORT:parse_ymdhms_times", "/EXPORT:check_unicode"] -else: - extra_compile_args = ["-fPIC"] - extra_link_args = [] - -# Set up extension for C-based time parser. Numpy is required for build but is -# optional for other things like `python setup.py --version`. -try: - import numpy - - ext_modules = [ - Extension( - name="cxotime.parse_times", - sources=["cxotime/parse_times.c"], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - include_dirs=[numpy.get_include()], - language="c", - ) - ] -except ImportError: - ext_modules = [] - entry_points = { "console_scripts": [ "cxotime = cxotime.cxotime:print_time_conversions", @@ -47,7 +18,6 @@ description="Chandra Time class base on astropy Time", author_email="taldcroft@cfa.harvard.edu", use_scm_version=True, - ext_modules=ext_modules, setup_requires=["setuptools_scm", "setuptools_scm_git_archive"], zip_safe=False, packages=["cxotime", "cxotime.tests"], From b1ef81368254161122b83e784d81bf6d0c8518fd Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Mon, 2 Jan 2023 16:27:17 -0500 Subject: [PATCH 2/3] Remove unused imports --- cxotime/cxotime.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cxotime/cxotime.py b/cxotime/cxotime.py index c42b906..d25f0a1 100644 --- a/cxotime/cxotime.py +++ b/cxotime/cxotime.py @@ -3,15 +3,11 @@ import sys import warnings from copy import copy -from ctypes import c_int -from pathlib import Path from typing import Union import numpy as np -import numpy.ctypeslib as npct import numpy.typing as npt from astropy.time import Time, TimeCxcSec, TimeDecimalYear, TimeYearDayTime -from astropy.time.utils import day_frac from astropy.utils import iers # in astropy versions < 4.2, erfa was an astropy private package: From 6190952d219d2252af2b32056d9e714fff2893e6 Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Mon, 2 Jan 2023 16:33:24 -0500 Subject: [PATCH 3/3] Clean up import cruft from old astropy --- cxotime/cxotime.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cxotime/cxotime.py b/cxotime/cxotime.py index d25f0a1..c3f1b41 100644 --- a/cxotime/cxotime.py +++ b/cxotime/cxotime.py @@ -5,17 +5,12 @@ from copy import copy from typing import Union +import erfa import numpy as np import numpy.typing as npt from astropy.time import Time, TimeCxcSec, TimeDecimalYear, TimeYearDayTime from astropy.utils import iers -# in astropy versions < 4.2, erfa was an astropy private package: -try: - import erfa -except ModuleNotFoundError: - from astropy import _erfa as erfa - # TODO: use npt.NDArray with numpy 1.21 CxoTimeLike = Union["CxoTime", str, float, int, np.ndarray, npt.ArrayLike, None]