From 4f0aca536c9fb598bb318eafd733684fbab39772 Mon Sep 17 00:00:00 2001 From: Maxime Carbonneau-Leclerc Date: Fri, 6 Jan 2023 12:29:37 -0500 Subject: [PATCH] =?UTF-8?q?[ISSUE=20#20322]=20add=20datetime=5Fgranularity?= =?UTF-8?q?=20logic=20to=20DatetimeStreamSlicer=E2=80=A6=20(#20717)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [ISSUE #20322] add datetime_granularity logic to DatetimeStreamSlicer and migrate duration to ISO8601 * [ISSUE #20322] fix tests * [ISSUE #20322] code review based on clnoll's comments and fixed tests * [ISSUE #20322] fix flake8 error * [ISSUE #20322] fix source tests * [ISSUE #20322] fixing yet another error in source * [ISSUE #20322] code review * [ISSUE #20322] adding new sources using datetime slicer * [ISSUE #20322] fixing source-datascope and increasing version * [ISSUE #20322] regenerate component schema * [ISSUE #20322] fixing source-datascope * [ISSUE #20322] extra field error --- airbyte-cdk/python/CHANGELOG.md | 3 + .../declarative_component_schema.yaml | 3 + .../models/declarative_component_schema.py | 1 + .../stream_slicers/datetime_stream_slicer.py | 49 ++-- airbyte-cdk/python/setup.py | 3 +- .../retrievers/test_simple_retriever.py | 4 +- .../test_cartesian_product_stream_slicer.py | 6 +- .../test_datetime_stream_slicer.py | 257 ++++++++++-------- .../sources/declarative/test_factory.py | 10 +- .../tests/test_incremental.py | 2 +- .../source-braze/source_braze/braze.yaml | 5 +- .../unit_tests/test_datetime_slicer.py | 3 +- .../source_callrail/callrail.yaml | 3 +- .../source_close_com/close_com.yaml | 9 +- .../source_coin_api/coin_api.yaml | 3 +- .../coingecko_coins.yaml | 3 +- .../acceptance-test-config.yml | 1 + .../source_datascope/datascope.yaml | 5 +- .../source_delighted/delighted.yaml | 3 +- .../source-gnews/source_gnews/gnews.yaml | 3 +- .../source_mailersend/mailersend.yaml | 3 +- .../source-nytimes/acceptance-test-config.yml | 4 +- .../source_nytimes/nytimes.yaml | 3 +- .../source_posthog/posthog.yaml | 12 +- .../unit_tests/test_components.py | 41 +-- .../source_prestashop/prestashop.yaml | 3 +- .../source_punk_api/punk_api.yaml | 3 +- .../source_sendgrid/sendgrid.yaml | 4 +- .../source_senseforce/senseforce.yaml | 3 +- .../source-square/source_square/square.yaml | 13 +- .../unit_tests/test_component.py | 7 +- .../source-tempo/source_tempo/tempo.yaml | 3 +- .../acceptance-test-config.yml | 3 +- .../the_guardian_api.yaml | 3 +- .../tvmaze_schedule.yaml | 3 +- .../source_twitter/twitter.yaml | 3 +- .../wikipedia_pageviews.yaml | 6 +- .../source_woocommerce/components.py | 22 -- .../source_woocommerce/woocommerce.yaml | 9 +- .../source_zenloop/zenloop.yaml | 3 +- .../stream-slicers.md | 19 +- 41 files changed, 305 insertions(+), 241 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-woocommerce/source_woocommerce/components.py diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 44e2a40ac743..d45f18a2f16f 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.18.0 +Adding `cursor_granularity` to the declarative API of DatetimeStreamSlicer + ## 0.17.0 Add utility class to infer schemas from real records diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 1eef180598d0..b9d9eb37a5b2 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -330,6 +330,7 @@ definitions: - cursor_field - end_datetime - datetime_format + - cursor_granularity - start_datetime - step properties: @@ -340,6 +341,8 @@ definitions: type: string datetime_format: type: string + cursor_granularity: + type: string end_datetime: anyOf: - type: string diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index e2153536819c..4a44dfaf4349 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -321,6 +321,7 @@ class DatetimeStreamSlicer(BaseModel): type: Literal["DatetimeStreamSlicer"] cursor_field: str datetime_format: str + cursor_granularity: str end_datetime: Union[str, MinMaxDatetime] start_datetime: Union[str, MinMaxDatetime] step: str diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py index 64c3be3a0dfb..9b475f822379 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py @@ -3,7 +3,6 @@ # import datetime -import re from dataclasses import InitVar, dataclass, field from typing import Any, Iterable, Mapping, Optional, Union @@ -16,7 +15,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState from dataclasses_jsonschema import JsonSchemaMixin -from dateutil.relativedelta import relativedelta +from isodate import Duration, parse_duration @dataclass @@ -27,16 +26,7 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): Given a start time, end time, a step function, and an optional lookback window, the stream slicer will partition the date range from start time - lookback window to end time. - The step function is defined as a string of the form: - `""` - - where unit can be one of - - years, y - - months, m - - weeks, w - - days, d - - For example, "1d" will produce windows of 1 day, and "2w" windows of 2 weeks. + The step function is defined as a string of the form ISO8601 duration The timestamp format accepts the same format codes as datetime.strfptime, which are all the format codes required by the 1989 C standard. @@ -45,15 +35,16 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): Attributes: start_datetime (Union[MinMaxDatetime, str]): the datetime that determines the earliest record that should be synced end_datetime (Union[MinMaxDatetime, str]): the datetime that determines the last record that should be synced - step (str): size of the timewindow + step (str): size of the timewindow (ISO8601 duration) cursor_field (Union[InterpolatedString, str]): record's cursor field datetime_format (str): format of the datetime + cursor_granularity (str): smallest increment the datetime_format has (ISO 8601 duration) that will be used to ensure that the start of a slice does not overlap with the end of the previous one config (Config): connection config start_time_option (Optional[RequestOption]): request option for start time end_time_option (Optional[RequestOption]): request option for end time stream_state_field_start (Optional[str]): stream slice start time field stream_state_field_end (Optional[str]): stream slice end time field - lookback_window (Optional[InterpolatedString]): how many days before start_datetime to read data for + lookback_window (Optional[InterpolatedString]): how many days before start_datetime to read data for (ISO8601 duration) """ start_datetime: Union[MinMaxDatetime, str] @@ -61,6 +52,7 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): step: str cursor_field: Union[InterpolatedString, str] datetime_format: str + cursor_granularity: str config: Config options: InitVar[Mapping[str, Any]] _cursor: dict = field(repr=False, default=None) # tracks current datetime @@ -71,10 +63,6 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin): stream_state_field_end: Optional[str] = None lookback_window: Optional[Union[InterpolatedString, str]] = None - timedelta_regex = re.compile( - r"((?P[\.\d]+?)y)?" r"((?P[\.\d]+?)m)?" r"((?P[\.\d]+?)w)?" r"((?P[\.\d]+?)d)?$" - ) - def __post_init__(self, options: Mapping[str, Any]): if not isinstance(self.start_datetime, MinMaxDatetime): self.start_datetime = MinMaxDatetime(self.start_datetime, options) @@ -85,6 +73,7 @@ def __post_init__(self, options: Mapping[str, Any]): self._interpolation = JinjaInterpolation() self._step = self._parse_timedelta(self.step) + self._cursor_granularity = self._parse_timedelta(self.cursor_granularity) self.cursor_field = InterpolatedString.create(self.cursor_field, options=options) self.stream_slice_field_start = InterpolatedString.create(self.stream_state_field_start or "start_time", options=options) self.stream_slice_field_end = InterpolatedString.create(self.stream_state_field_end or "end_time", options=options) @@ -144,7 +133,7 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> stream_state = stream_state or {} kwargs = {"stream_state": stream_state} end_datetime = min(self.end_datetime.get_datetime(self.config, **kwargs), datetime.datetime.now(tz=self._timezone)) - lookback_delta = self._parse_timedelta(self.lookback_window.eval(self.config, **kwargs) if self.lookback_window else "0d") + lookback_delta = self._parse_timedelta(self.lookback_window.eval(self.config, **kwargs) if self.lookback_window else "P0D") earliest_possible_start_datetime = min(self.start_datetime.get_datetime(self.config, **kwargs), end_datetime) cursor_datetime = self._calculate_cursor_datetime_from_state(stream_state) @@ -154,18 +143,18 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) -> def _calculate_cursor_datetime_from_state(self, stream_state: Mapping[str, Any]) -> datetime.datetime: if self.cursor_field.eval(self.config, stream_state=stream_state) in stream_state: - return self.parse_date(stream_state[self.cursor_field.eval(self.config)]) + datetime.timedelta(days=1) + return self.parse_date(stream_state[self.cursor_field.eval(self.config)]) return datetime.datetime.min.replace(tzinfo=datetime.timezone.utc) def _format_datetime(self, dt: datetime.datetime): return self._parser.format(dt, self.datetime_format) - def _partition_daterange(self, start, end, step: datetime.timedelta): + def _partition_daterange(self, start: datetime.datetime, end: datetime.datetime, step: Union[datetime.timedelta, Duration]): start_field = self.stream_slice_field_start.eval(self.config) end_field = self.stream_slice_field_end.eval(self.config) dates = [] while start <= end: - end_date = self._get_date(start + step - datetime.timedelta(days=1), end, min) + end_date = self._get_date(start + step - self._cursor_granularity, end, min) dates.append({start_field: self._format_datetime(start), end_field: self._format_datetime(end_date)}) start += step return dates @@ -178,19 +167,13 @@ def parse_date(self, date: str) -> datetime.datetime: return self._parser.parse(date, self.datetime_format, self._timezone) @classmethod - def _parse_timedelta(cls, time_str): + def _parse_timedelta(cls, time_str) -> Union[datetime.timedelta, Duration]: """ - Parse a time string e.g. (2h13m) into a timedelta object. - Modified from virhilo's answer at https://stackoverflow.com/a/4628148/851699 - :param time_str: A string identifying a duration. (eg. 2h13m) - :return relativedelta: A relativedelta object + :return Parses an ISO 8601 durations into datetime.timedelta or Duration objects. """ - parts = cls.timedelta_regex.match(time_str) - - assert parts is not None - - time_params = {name: float(param) for name, param in parts.groupdict().items() if param} - return relativedelta(**time_params) + if not time_str: + return datetime.timedelta(0) + return parse_duration(time_str) def get_request_params( self, diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 4be162a0e630..66ce7c067620 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.17.0", + version="0.18.0", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", @@ -48,6 +48,7 @@ # pinned to the last working version for us temporarily while we fix "dataclasses-jsonschema==2.15.1", "dpath~=2.0.1", + "isodate~=0.6.1", "jsonschema~=3.2.0", "jsonref~=0.2", "pendulum", diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index 5a4f4b45fd9f..abb33bf3ca37 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -122,7 +122,7 @@ def test_simple_retriever_with_request_response_logs(mock_http_stream): paginator = MagicMock() record_selector = MagicMock() iterator = DatetimeStreamSlicer( - start_datetime="", end_datetime="", step="1d", cursor_field="id", datetime_format="", config={}, options={} + start_datetime="", end_datetime="", step="P1D", cursor_field="id", datetime_format="", cursor_granularity="P1D", config={}, options={} ) retriever = SimpleRetriever( @@ -153,7 +153,7 @@ def test_simple_retriever_with_request_response_log_last_records(mock_http_strea record_selector.select_records.return_value = request_response_logs response = requests.Response() iterator = DatetimeStreamSlicer( - start_datetime="", end_datetime="", step="1d", cursor_field="id", datetime_format="", config={}, options={} + start_datetime="", end_datetime="", step="P1D", cursor_field="id", datetime_format="", cursor_granularity="P1D", config={}, options={} ) retriever = SimpleRetriever( diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py index eced8061f340..fa361ef4fc38 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_cartesian_product_stream_slicer.py @@ -42,9 +42,10 @@ DatetimeStreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01", datetime_format="%Y-%m-%d", options={}), end_datetime=MinMaxDatetime(datetime="2021-01-03", datetime_format="%Y-%m-%d", options={}), - step="1d", + step="P1D", cursor_field=InterpolatedString.create("", options={}), datetime_format="%Y-%m-%d", + cursor_granularity="P1D", config={}, options={}, ), @@ -87,9 +88,10 @@ def test_update_cursor(test_name, stream_slice, expected_state): DatetimeStreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01", datetime_format="%Y-%m-%d", options={}), end_datetime=MinMaxDatetime(datetime="2021-01-03", datetime_format="%Y-%m-%d", options={}), - step="1d", + step="P1D", cursor_field=InterpolatedString(string="date", options={}), datetime_format="%Y-%m-%d", + cursor_granularity="P1D", config={}, options={}, ), diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py index e86216c809c8..9d53297b2dc1 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py @@ -13,6 +13,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.datetime_stream_slicer import DatetimeStreamSlicer datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z" +cursor_granularity = "PT0.000001S" FAKE_NOW = datetime.datetime(2022, 1, 1, tzinfo=datetime.timezone.utc) config = {"start_date": "2021-01-01T00:00:00.000000+0000", "start_date_ymd": "2021-01-01"} @@ -33,27 +34,28 @@ def mock_datetime_now(monkeypatch): @pytest.mark.parametrize( - "test_name, stream_state, start, end, step, cursor_field, lookback_window, datetime_format, expected_slices", + "test_name, stream_state, start, end, step, cursor_field, lookback_window, datetime_format, cursor_granularity, expected_slices", [ ( "test_1_day", None, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "1d", + "P1D", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T00:00:00.000000+0000"}, - {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T00:00:00.000000+0000"}, - {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T00:00:00.000000+0000"}, - {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T00:00:00.000000+0000"}, - {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, - {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T00:00:00.000000+0000"}, - {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T00:00:00.000000+0000"}, - {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T00:00:00.000000+0000"}, - {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T00:00:00.000000+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T23:59:59.999999+0000"}, + {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T23:59:59.999999+0000"}, + {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T23:59:59.999999+0000"}, + {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T23:59:59.999999+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T23:59:59.999999+0000"}, + {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T23:59:59.999999+0000"}, + {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T23:59:59.999999+0000"}, + {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T23:59:59.999999+0000"}, + {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T23:59:59.999999+0000"}, {"start_time": "2021-01-10T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], ), @@ -62,15 +64,16 @@ def mock_datetime_now(monkeypatch): None, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "2d", + "P2D", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-02T00:00:00.000000+0000"}, - {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-04T00:00:00.000000+0000"}, - {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-06T00:00:00.000000+0000"}, - {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-08T00:00:00.000000+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-02T23:59:59.999999+0000"}, + {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-04T23:59:59.999999+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-06T23:59:59.999999+0000"}, + {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-08T23:59:59.999999+0000"}, {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], ), @@ -79,16 +82,17 @@ def mock_datetime_now(monkeypatch): None, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-02-10T00:00:00.000000+0000", options={}), - "1w", + "P1W", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-07T00:00:00.000000+0000"}, - {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-14T00:00:00.000000+0000"}, - {"start_time": "2021-01-15T00:00:00.000000+0000", "end_time": "2021-01-21T00:00:00.000000+0000"}, - {"start_time": "2021-01-22T00:00:00.000000+0000", "end_time": "2021-01-28T00:00:00.000000+0000"}, - {"start_time": "2021-01-29T00:00:00.000000+0000", "end_time": "2021-02-04T00:00:00.000000+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-07T23:59:59.999999+0000"}, + {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-14T23:59:59.999999+0000"}, + {"start_time": "2021-01-15T00:00:00.000000+0000", "end_time": "2021-01-21T23:59:59.999999+0000"}, + {"start_time": "2021-01-22T00:00:00.000000+0000", "end_time": "2021-01-28T23:59:59.999999+0000"}, + {"start_time": "2021-01-29T00:00:00.000000+0000", "end_time": "2021-02-04T23:59:59.999999+0000"}, {"start_time": "2021-02-05T00:00:00.000000+0000", "end_time": "2021-02-10T00:00:00.000000+0000"}, ], ), @@ -97,16 +101,17 @@ def mock_datetime_now(monkeypatch): None, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-06-10T00:00:00.000000+0000", options={}), - "1m", + "P1M", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-31T00:00:00.000000+0000"}, - {"start_time": "2021-02-01T00:00:00.000000+0000", "end_time": "2021-02-28T00:00:00.000000+0000"}, - {"start_time": "2021-03-01T00:00:00.000000+0000", "end_time": "2021-03-31T00:00:00.000000+0000"}, - {"start_time": "2021-04-01T00:00:00.000000+0000", "end_time": "2021-04-30T00:00:00.000000+0000"}, - {"start_time": "2021-05-01T00:00:00.000000+0000", "end_time": "2021-05-31T00:00:00.000000+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-31T23:59:59.999999+0000"}, + {"start_time": "2021-02-01T00:00:00.000000+0000", "end_time": "2021-02-28T23:59:59.999999+0000"}, + {"start_time": "2021-03-01T00:00:00.000000+0000", "end_time": "2021-03-31T23:59:59.999999+0000"}, + {"start_time": "2021-04-01T00:00:00.000000+0000", "end_time": "2021-04-30T23:59:59.999999+0000"}, + {"start_time": "2021-05-01T00:00:00.000000+0000", "end_time": "2021-05-31T23:59:59.999999+0000"}, {"start_time": "2021-06-01T00:00:00.000000+0000", "end_time": "2021-06-10T00:00:00.000000+0000"}, ], ), @@ -115,12 +120,13 @@ def mock_datetime_now(monkeypatch): None, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2022-06-10T00:00:00.000000+0000", options={}), - "1y", + "P1Y", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-12-31T00:00:00.000000+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-12-31T23:59:59.999999+0000"}, {"start_time": "2022-01-01T00:00:00.000000+0000", "end_time": "2022-01-01T00:00:00.000000+0000"}, ], ), @@ -129,16 +135,17 @@ def mock_datetime_now(monkeypatch): {"date": "2021-01-05T00:00:00.000000+0000"}, MinMaxDatetime(datetime="{{ stream_state['date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "1d", + "P1D", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, - {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T00:00:00.000000+0000"}, - {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T00:00:00.000000+0000"}, - {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T00:00:00.000000+0000"}, - {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T00:00:00.000000+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T23:59:59.999999+0000"}, + {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T23:59:59.999999+0000"}, + {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T23:59:59.999999+0000"}, + {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T23:59:59.999999+0000"}, + {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T23:59:59.999999+0000"}, {"start_time": "2021-01-10T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], ), @@ -147,10 +154,11 @@ def mock_datetime_now(monkeypatch): None, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "12d", + "P12D", cursor_field, None, datetime_format, + cursor_granularity, [ {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], @@ -160,15 +168,16 @@ def mock_datetime_now(monkeypatch): None, MinMaxDatetime(datetime="2021-12-28T00:00:00.000000+0000", options={}), MinMaxDatetime(datetime=f"{(FAKE_NOW + datetime.timedelta(days=1)).strftime(datetime_format)}", options={}), - "1d", + "P1D", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-12-28T00:00:00.000000+0000", "end_time": "2021-12-28T00:00:00.000000+0000"}, - {"start_time": "2021-12-29T00:00:00.000000+0000", "end_time": "2021-12-29T00:00:00.000000+0000"}, - {"start_time": "2021-12-30T00:00:00.000000+0000", "end_time": "2021-12-30T00:00:00.000000+0000"}, - {"start_time": "2021-12-31T00:00:00.000000+0000", "end_time": "2021-12-31T00:00:00.000000+0000"}, + {"start_time": "2021-12-28T00:00:00.000000+0000", "end_time": "2021-12-28T23:59:59.999999+0000"}, + {"start_time": "2021-12-29T00:00:00.000000+0000", "end_time": "2021-12-29T23:59:59.999999+0000"}, + {"start_time": "2021-12-30T00:00:00.000000+0000", "end_time": "2021-12-30T23:59:59.999999+0000"}, + {"start_time": "2021-12-31T00:00:00.000000+0000", "end_time": "2021-12-31T23:59:59.999999+0000"}, {"start_time": "2022-01-01T00:00:00.000000+0000", "end_time": "2022-01-01T00:00:00.000000+0000"}, ], ), @@ -177,10 +186,11 @@ def mock_datetime_now(monkeypatch): None, MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), MinMaxDatetime(datetime="2021-01-05T00:00:00.000000+0000", options={}), - "1d", + "P1D", cursor_field, None, datetime_format, + cursor_granularity, [ {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, ], @@ -190,16 +200,17 @@ def mock_datetime_now(monkeypatch): {"date": "2021-01-05T00:00:00.000000+0000"}, MinMaxDatetime(datetime="{{ stream_state['date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "1d", + "P1D", InterpolatedString(string="{{ stream_state['date'] }}", options={}), None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, - {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T00:00:00.000000+0000"}, - {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T00:00:00.000000+0000"}, - {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T00:00:00.000000+0000"}, - {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T00:00:00.000000+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T23:59:59.999999+0000"}, + {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T23:59:59.999999+0000"}, + {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T23:59:59.999999+0000"}, + {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T23:59:59.999999+0000"}, + {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T23:59:59.999999+0000"}, {"start_time": "2021-01-10T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], ), @@ -208,14 +219,15 @@ def mock_datetime_now(monkeypatch): {cursor_field: "2021-01-05T00:00:00.000000+0000"}, MinMaxDatetime(datetime="2021-01-03T00:00:00.000000+0000", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "2d", + "P2D", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-07T00:00:00.000000+0000"}, - {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-09T00:00:00.000000+0000"}, - {"start_time": "2021-01-10T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-06T23:59:59.999999+0000"}, + {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-08T23:59:59.999999+0000"}, + {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], ), ( @@ -223,16 +235,17 @@ def mock_datetime_now(monkeypatch): {"date": "2021-01-05T00:00:00.000000+0000"}, MinMaxDatetime(datetime="{{ config['start_date'] }}", min_datetime="{{ stream_state['date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "1d", + "P1D", InterpolatedString(string="{{ stream_state['date'] }}", options={}), None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, - {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T00:00:00.000000+0000"}, - {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T00:00:00.000000+0000"}, - {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T00:00:00.000000+0000"}, - {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T00:00:00.000000+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T23:59:59.999999+0000"}, + {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T23:59:59.999999+0000"}, + {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T23:59:59.999999+0000"}, + {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T23:59:59.999999+0000"}, + {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T23:59:59.999999+0000"}, {"start_time": "2021-01-10T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], ), @@ -241,15 +254,16 @@ def mock_datetime_now(monkeypatch): {"date": "2021-01-05T00:00:00.000000+0000"}, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", max_datetime="{{ stream_state['date'] }}", options={}), - "1d", + "P1D", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T00:00:00.000000+0000"}, - {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T00:00:00.000000+0000"}, - {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T00:00:00.000000+0000"}, - {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T00:00:00.000000+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T23:59:59.999999+0000"}, + {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T23:59:59.999999+0000"}, + {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T23:59:59.999999+0000"}, + {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T23:59:59.999999+0000"}, {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, ], ), @@ -258,10 +272,11 @@ def mock_datetime_now(monkeypatch): {"date": "2021-01-05"}, MinMaxDatetime(datetime="{{ config['start_date_ymd'] }}", options={}), MinMaxDatetime(datetime="2021-01-10", max_datetime="{{ stream_state['date'] }}", options={}), - "1d", + "P1D", cursor_field, None, "%Y-%m-%d", + "P1D", [ {"start_time": "2021-01-01", "end_time": "2021-01-01"}, {"start_time": "2021-01-02", "end_time": "2021-01-02"}, @@ -275,18 +290,19 @@ def mock_datetime_now(monkeypatch): {"date": "2021-01-05"}, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10", max_datetime="{{ stream_state['date'] }}", datetime_format="%Y-%m-%d", options={}), - "1d", + "P1D", cursor_field, - "3d", + "P3D", datetime_format, + cursor_granularity, [ - {"start_time": "2020-12-29T00:00:00.000000+0000", "end_time": "2020-12-29T00:00:00.000000+0000"}, - {"start_time": "2020-12-30T00:00:00.000000+0000", "end_time": "2020-12-30T00:00:00.000000+0000"}, - {"start_time": "2020-12-31T00:00:00.000000+0000", "end_time": "2020-12-31T00:00:00.000000+0000"}, - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T00:00:00.000000+0000"}, - {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T00:00:00.000000+0000"}, - {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T00:00:00.000000+0000"}, - {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T00:00:00.000000+0000"}, + {"start_time": "2020-12-29T00:00:00.000000+0000", "end_time": "2020-12-29T23:59:59.999999+0000"}, + {"start_time": "2020-12-30T00:00:00.000000+0000", "end_time": "2020-12-30T23:59:59.999999+0000"}, + {"start_time": "2020-12-31T00:00:00.000000+0000", "end_time": "2020-12-31T23:59:59.999999+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T23:59:59.999999+0000"}, + {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T23:59:59.999999+0000"}, + {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T23:59:59.999999+0000"}, + {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T23:59:59.999999+0000"}, {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, ], ), @@ -295,14 +311,16 @@ def mock_datetime_now(monkeypatch): {cursor_field: "2021-01-05T00:00:00.000000+0000"}, MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), MinMaxDatetime(datetime="2021-01-06T00:00:00.000000+0000", options={}), - "1d", + "P1D", cursor_field, - "3d", + "P3D", datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T00:00:00.000000+0000"}, - {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T00:00:00.000000+0000"}, - {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, + {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T23:59:59.999999+0000"}, + {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T23:59:59.999999+0000"}, + {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T23:59:59.999999+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T23:59:59.999999+0000"}, {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T00:00:00.000000+0000"}, ], ), @@ -311,15 +329,16 @@ def mock_datetime_now(monkeypatch): {"date": "2021-01-05"}, MinMaxDatetime(datetime="{{ config['start_date'] }}", options={}), MinMaxDatetime(datetime="2021-01-10", max_datetime="{{ stream_state['date'] }}", datetime_format="%Y-%m-%d", options={}), - "1d", + "P1D", cursor_field, "{{ config['does_not_exist'] }}", datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T00:00:00.000000+0000"}, - {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T00:00:00.000000+0000"}, - {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T00:00:00.000000+0000"}, - {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T00:00:00.000000+0000"}, + {"start_time": "2021-01-01T00:00:00.000000+0000", "end_time": "2021-01-01T23:59:59.999999+0000"}, + {"start_time": "2021-01-02T00:00:00.000000+0000", "end_time": "2021-01-02T23:59:59.999999+0000"}, + {"start_time": "2021-01-03T00:00:00.000000+0000", "end_time": "2021-01-03T23:59:59.999999+0000"}, + {"start_time": "2021-01-04T00:00:00.000000+0000", "end_time": "2021-01-04T23:59:59.999999+0000"}, {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T00:00:00.000000+0000"}, ], ), @@ -328,22 +347,34 @@ def mock_datetime_now(monkeypatch): {cursor_field: "2021-01-05T00:00:00.000000+0000"}, MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - "1d", + "P1D", cursor_field, None, datetime_format, + cursor_granularity, [ - {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T00:00:00.000000+0000"}, - {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T00:00:00.000000+0000"}, - {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T00:00:00.000000+0000"}, - {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T00:00:00.000000+0000"}, + {"start_time": "2021-01-05T00:00:00.000000+0000", "end_time": "2021-01-05T23:59:59.999999+0000"}, + {"start_time": "2021-01-06T00:00:00.000000+0000", "end_time": "2021-01-06T23:59:59.999999+0000"}, + {"start_time": "2021-01-07T00:00:00.000000+0000", "end_time": "2021-01-07T23:59:59.999999+0000"}, + {"start_time": "2021-01-08T00:00:00.000000+0000", "end_time": "2021-01-08T23:59:59.999999+0000"}, + {"start_time": "2021-01-09T00:00:00.000000+0000", "end_time": "2021-01-09T23:59:59.999999+0000"}, {"start_time": "2021-01-10T00:00:00.000000+0000", "end_time": "2021-01-10T00:00:00.000000+0000"}, ], ), ], ) def test_stream_slices( - mock_datetime_now, test_name, stream_state, start, end, step, cursor_field, lookback_window, datetime_format, expected_slices + mock_datetime_now, + test_name, + stream_state, + start, + end, + step, + cursor_field, + lookback_window, + datetime_format, + cursor_granularity, + expected_slices, ): lookback_window = InterpolatedString(string=lookback_window, options={}) if lookback_window else None slicer = DatetimeStreamSlicer( @@ -352,13 +383,14 @@ def test_stream_slices( step=step, cursor_field=cursor_field, datetime_format=datetime_format, + cursor_granularity=cursor_granularity, lookback_window=lookback_window, config=config, options={}, ) stream_slices = slicer.stream_slices(SyncMode.incremental, stream_state) - assert expected_slices == stream_slices + assert stream_slices == expected_slices @pytest.mark.parametrize( @@ -406,10 +438,11 @@ def test_update_cursor(test_name, previous_cursor, stream_slice, last_record, ex slicer = DatetimeStreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - step="1d", + step="P1D", cursor_field=InterpolatedString(string=cursor_field, options={}), datetime_format=datetime_format, - lookback_window=InterpolatedString(string="0d", options={}), + cursor_granularity=cursor_granularity, + lookback_window=InterpolatedString(string="P0D", options={}), config=config, options={}, ) @@ -478,10 +511,11 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params, DatetimeStreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - step="1d", + step="P1D", cursor_field=InterpolatedString(string=cursor_field, options={}), datetime_format=datetime_format, - lookback_window=InterpolatedString(string="0d", options={}), + cursor_granularity=cursor_granularity, + lookback_window=InterpolatedString(string="P0D", options={}), start_time_option=start_request_option, end_time_option=end_request_option, config=config, @@ -496,10 +530,11 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params, slicer = DatetimeStreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - step="1d", + step="P1D", cursor_field=InterpolatedString(string=cursor_field, options={}), datetime_format=datetime_format, - lookback_window=InterpolatedString(string="0d", options={}), + cursor_granularity=cursor_granularity, + lookback_window=InterpolatedString(string="P0D", options={}), start_time_option=start_request_option, end_time_option=end_request_option, config=config, @@ -516,31 +551,34 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params, @pytest.mark.parametrize( - "test_name, input_date, date_format, expected_output_date", + "test_name, input_date, date_format, date_format_granularity, expected_output_date", [ ( "test_parse_date_iso", "2021-01-01T00:00:00.000000+0000", "%Y-%m-%dT%H:%M:%S.%f%z", + "PT0.000001S", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), ( "test_parse_timestamp", "1609459200", "%s", + "PT1S", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), - ("test_parse_date_number", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)), + ("test_parse_date_number", "20210101", "%Y%m%d", "P1D", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)), ], ) -def test_parse_date(test_name, input_date, date_format, expected_output_date): +def test_parse_date(test_name, input_date, date_format, date_format_granularity, expected_output_date): slicer = DatetimeStreamSlicer( start_datetime=MinMaxDatetime("2021-01-01T00:00:00.000000+0000", options={}), end_datetime=MinMaxDatetime("2021-01-10T00:00:00.000000+0000", options={}), - step="1d", + step="P1D", cursor_field=InterpolatedString(cursor_field, options={}), datetime_format=date_format, - lookback_window=InterpolatedString("0d", options={}), + cursor_granularity=date_format_granularity, + lookback_window=InterpolatedString("P0D", options={}), config=config, options={}, ) @@ -549,21 +587,22 @@ def test_parse_date(test_name, input_date, date_format, expected_output_date): @pytest.mark.parametrize( - "test_name, input_dt, datetimeformat, expected_output", + "test_name, input_dt, datetimeformat, datetimeformat_granularity, expected_output", [ - ("test_format_timestamp", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%s", "1609459200"), - ("test_format_string", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y-%m-%d", "2021-01-01"), - ("test_format_to_number", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y%m%d", "20210101"), + ("test_format_timestamp", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%s", "PT1S", "1609459200"), + ("test_format_string", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y-%m-%d", "P1D", "2021-01-01"), + ("test_format_to_number", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y%m%d", "P1D", "20210101"), ], ) -def test_format_datetime(test_name, input_dt, datetimeformat, expected_output): +def test_format_datetime(test_name, input_dt, datetimeformat, datetimeformat_granularity, expected_output): slicer = DatetimeStreamSlicer( start_datetime=MinMaxDatetime("2021-01-01T00:00:00.000000+0000", options={}), end_datetime=MinMaxDatetime("2021-01-10T00:00:00.000000+0000", options={}), - step="1d", + step="P1D", cursor_field=InterpolatedString(cursor_field, options={}), datetime_format=datetimeformat, - lookback_window=InterpolatedString("0d", options={}), + cursor_granularity=datetimeformat_granularity, + lookback_window=InterpolatedString("P0D", options={}), config=config, options={}, ) diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py index bb47c97097fc..6265b0e638f0 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/test_factory.py @@ -41,7 +41,6 @@ from airbyte_cdk.sources.declarative.transformations import AddFields, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource -from dateutil.relativedelta import relativedelta from jsonschema import ValidationError factory = DeclarativeComponentFactory() @@ -236,14 +235,15 @@ def test_datetime_stream_slicer(): type: DatetimeStreamSlicer $options: datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" + cursor_granularity: "PT0.000001S" start_datetime: type: MinMaxDatetime datetime: "{{ config['start_time'] }}" min_datetime: "{{ config['start_time'] + day_delta(2) }}" end_datetime: "{{ config['end_time'] }}" - step: "10d" + step: "P10D" cursor_field: "created" - lookback_window: "5d" + lookback_window: "P5D" start_time_option: inject_into: request_parameter field_name: created[gte] @@ -263,9 +263,9 @@ def test_datetime_stream_slicer(): assert stream_slicer.start_datetime.datetime.string == "{{ config['start_time'] }}" assert stream_slicer.start_datetime.min_datetime.string == "{{ config['start_time'] + day_delta(2) }}" assert stream_slicer.end_datetime.datetime.string == "{{ config['end_time'] }}" - assert stream_slicer._step == relativedelta(days=10) + assert stream_slicer._step == datetime.timedelta(days=10) assert stream_slicer.cursor_field.string == "created" - assert stream_slicer.lookback_window.string == "5d" + assert stream_slicer.lookback_window.string == "P5D" assert stream_slicer.start_time_option.inject_into == RequestOptionType.request_parameter assert stream_slicer.start_time_option.field_name == "created[gte]" diff --git a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py index 5ca4e1932fb8..4eee25161a3f 100644 --- a/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py +++ b/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_incremental.py @@ -118,7 +118,7 @@ def _parse_date_value(value) -> datetime: return value if isinstance(value, (int, float)): return pendulum.from_timestamp(value / 1000) - return pendulum.parse(value) + return pendulum.parse(value, strict=False) record_date_value = _parse_date_value(record_value) state_date_value = _parse_date_value(state_value) diff --git a/airbyte-integrations/connectors/source-braze/source_braze/braze.yaml b/airbyte-integrations/connectors/source-braze/source_braze/braze.yaml index 85f011f4d8a4..8566f9a1fa0d 100644 --- a/airbyte-integrations/connectors/source-braze/source_braze/braze.yaml +++ b/airbyte-integrations/connectors/source-braze/source_braze/braze.yaml @@ -94,11 +94,12 @@ definitions: field_name: "length" inject_into: "request_parameter" datetime_format: "%Y-%m-%d" - step: "100d" + step: "P100D" + cursor_granularity: "P1D" datetime_14d_slicer: $ref: "*ref(definitions.datetime_100d_slicer)" - step: "14d" + step: "P14D" # ----- Sliced streams ----- sliced_retriever: diff --git a/airbyte-integrations/connectors/source-braze/unit_tests/test_datetime_slicer.py b/airbyte-integrations/connectors/source-braze/unit_tests/test_datetime_slicer.py index 38cc6f40ab52..681f105370a7 100644 --- a/airbyte-integrations/connectors/source-braze/unit_tests/test_datetime_slicer.py +++ b/airbyte-integrations/connectors/source-braze/unit_tests/test_datetime_slicer.py @@ -18,9 +18,10 @@ def test_datetime_slicer(): slicer = DatetimeStreamSlicerComponent( start_datetime="2022-12-01", end_datetime="2022-12-08", - step="3d", + step="P3D", cursor_field="time", datetime_format="%Y-%m-%d", + cursor_granularity="P1D", config={}, options={}, step_option=RequestOption(field_name="step", inject_into=RequestOptionType.request_parameter, options={}) diff --git a/airbyte-integrations/connectors/source-callrail/source_callrail/callrail.yaml b/airbyte-integrations/connectors/source-callrail/source_callrail/callrail.yaml index 1952b75b63d9..65f2eaabcd2b 100644 --- a/airbyte-integrations/connectors/source-callrail/source_callrail/callrail.yaml +++ b/airbyte-integrations/connectors/source-callrail/source_callrail/callrail.yaml @@ -2,7 +2,7 @@ version: "0.1.0" definitions: page_size: 100 - step: "100d" + step: "P100D" schema_loader: type: JsonSchema @@ -31,6 +31,7 @@ definitions: field_name: "start_date" inject_into: "request_parameter" datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" + cursor_granularity: "PT0.000001S" retriever: type: SimpleRetriever diff --git a/airbyte-integrations/connectors/source-close-com/source_close_com/close_com.yaml b/airbyte-integrations/connectors/source-close-com/source_close_com/close_com.yaml index ce8f4ca3cbed..811c44fe4299 100644 --- a/airbyte-integrations/connectors/source-close-com/source_close_com/close_com.yaml +++ b/airbyte-integrations/connectors/source-close-com/source_close_com/close_com.yaml @@ -55,9 +55,10 @@ definitions: end_datetime: datetime: "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%S.%f%z') }}" datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" # ISO8601 - step: "365d" + step: "P365D" cursor_field: "date_updated" datetime_format: "%Y-%m-%d" + cursor_granularity: "P1D" stream_slicer__cursor_date_created: # type: DatetimeStreamSlicer @@ -74,9 +75,10 @@ definitions: end_time_option: field_name: "date_created__lte" inject_into: "request_parameter" - step: "365d" + step: "P365D" cursor_field: "date_created" datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" + cursor_granularity: "PT0.000001S" stream_slicer__cursor_date_updated: # type: DatetimeStreamSlicer @@ -93,9 +95,10 @@ definitions: end_time_option: field_name: "date_updated__lte" inject_into: "request_parameter" - step: "365d" + step: "P365D" cursor_field: "date_updated" datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" + cursor_granularity: "PT0.000001S" retriever: record_selector: $ref: "*ref(definitions.selector)" diff --git a/airbyte-integrations/connectors/source-coin-api/source_coin_api/coin_api.yaml b/airbyte-integrations/connectors/source-coin-api/source_coin_api/coin_api.yaml index cb41f7aa499c..740c3218755b 100644 --- a/airbyte-integrations/connectors/source-coin-api/source_coin_api/coin_api.yaml +++ b/airbyte-integrations/connectors/source-coin-api/source_coin_api/coin_api.yaml @@ -26,7 +26,8 @@ definitions: datetime: "{{ config['end_time'] }}" datetime_format: "%Y-%m-%dT%H:%M:%S" datetime_format: "%Y-%m-%dT%H:%M:%SZ" - step: 1d + cursor_granularity: "PT1S" + step: "P1D" start_time_option: field_name: time_start inject_into: request_parameter diff --git a/airbyte-integrations/connectors/source-coingecko-coins/source_coingecko_coins/coingecko_coins.yaml b/airbyte-integrations/connectors/source-coingecko-coins/source_coingecko_coins/coingecko_coins.yaml index 4e020fb7037a..3b2ec2de740d 100644 --- a/airbyte-integrations/connectors/source-coingecko-coins/source_coingecko_coins/coingecko_coins.yaml +++ b/airbyte-integrations/connectors/source-coingecko-coins/source_coingecko_coins/coingecko_coins.yaml @@ -20,8 +20,9 @@ definitions: end_datetime: datetime: "{{ config['end_date'] or now_utc().strftime('%d-%m-%Y') }}" datetime_format: "%d-%m-%Y" - step: 1d + step: "P1D" datetime_format: "%d-%m-%Y" + cursor_granularity: "P1D" cursor_field: "date" start_time_option: field_name: "date" diff --git a/airbyte-integrations/connectors/source-datascope/acceptance-test-config.yml b/airbyte-integrations/connectors/source-datascope/acceptance-test-config.yml index 5879b9065fcb..5667fa1776cd 100644 --- a/airbyte-integrations/connectors/source-datascope/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-datascope/acceptance-test-config.yml @@ -19,6 +19,7 @@ tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" future_state_path: "integration_tests/abnormal_state.json" + threshold_days: 1 full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-datascope/source_datascope/datascope.yaml b/airbyte-integrations/connectors/source-datascope/source_datascope/datascope.yaml index 2b86a3b98421..bfa005685edf 100644 --- a/airbyte-integrations/connectors/source-datascope/source_datascope/datascope.yaml +++ b/airbyte-integrations/connectors/source-datascope/source_datascope/datascope.yaml @@ -19,8 +19,11 @@ definitions: end_datetime: datetime: "{{ now_utc().strftime('%d/%m/%Y %H:%M') }}" datetime_format: "%d/%m/%Y %H:%M" - step: "1d" + step: "P1D" datetime_format: "%d/%m/%Y %H:%M" + # There is a discrepancy between the format of the cursor `created_at` and the granularity of the API hence "P1D" + # here instead of "PT1M" if we were to check the datetime_format + cursor_granularity: "P1D" cursor_field: "{{ options['stream_cursor_field'] }}" start_time_option: field_name: "start" diff --git a/airbyte-integrations/connectors/source-delighted/source_delighted/delighted.yaml b/airbyte-integrations/connectors/source-delighted/source_delighted/delighted.yaml index b03df349553c..dd522e2c0904 100644 --- a/airbyte-integrations/connectors/source-delighted/source_delighted/delighted.yaml +++ b/airbyte-integrations/connectors/source-delighted/source_delighted/delighted.yaml @@ -22,13 +22,14 @@ definitions: stream_slicer: cursor_field: "{{ options['stream_cursor_field'] }}" datetime_format: "%s" + cursor_granularity: "PT1S" start_datetime: datetime: "{{ config['since'] }}" datetime_format: "%Y-%m-%d %H:%M:%S" end_datetime: datetime: "{{ today_utc() }}" datetime_format: "%Y-%m-%d" - step: "1w" + step: "P1W" end_time_option: field_name: "until" inject_into: "request_parameter" diff --git a/airbyte-integrations/connectors/source-gnews/source_gnews/gnews.yaml b/airbyte-integrations/connectors/source-gnews/source_gnews/gnews.yaml index 44fe25ea7068..cd7c642eb5fa 100644 --- a/airbyte-integrations/connectors/source-gnews/source_gnews/gnews.yaml +++ b/airbyte-integrations/connectors/source-gnews/source_gnews/gnews.yaml @@ -136,8 +136,9 @@ definitions: end_datetime: datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d %H:%M:%S') }}" datetime_format: "%Y-%m-%d %H:%M:%S" - step: 1w + step: "P1W" datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" cursor_field: "{{ options['stream_cursor_field'] }}" common_parameters: token: "{{ config['api_key'] }}" diff --git a/airbyte-integrations/connectors/source-mailersend/source_mailersend/mailersend.yaml b/airbyte-integrations/connectors/source-mailersend/source_mailersend/mailersend.yaml index 11dd18976de7..15734d3679dd 100644 --- a/airbyte-integrations/connectors/source-mailersend/source_mailersend/mailersend.yaml +++ b/airbyte-integrations/connectors/source-mailersend/source_mailersend/mailersend.yaml @@ -21,12 +21,13 @@ definitions: end_datetime: datetime: "{{ now_utc().strftime('%s') }}" datetime_format: "%s" - step: "1d" + step: "P1D" cursor_field: "{{ options['stream_cursor_field'] }}" start_time_option: field_name: "date_from" inject_into: "request_parameter" datetime_format: "%s" + cursor_granularity: "PT1S" paginator: type: "DefaultPaginator" page_size_option: diff --git a/airbyte-integrations/connectors/source-nytimes/acceptance-test-config.yml b/airbyte-integrations/connectors/source-nytimes/acceptance-test-config.yml index d0e1d6a5606b..7c4bc83849c3 100644 --- a/airbyte-integrations/connectors/source-nytimes/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-nytimes/acceptance-test-config.yml @@ -23,7 +23,9 @@ acceptance_tests: tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" - future_state_path: "integration_tests/abnormal_state.json" + threshold_days: 31 + future_state: + future_state_path: "integration_tests/abnormal_state.json" full_refresh: tests: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-nytimes/source_nytimes/nytimes.yaml b/airbyte-integrations/connectors/source-nytimes/source_nytimes/nytimes.yaml index a2c008b48750..c3a7bdb8c73e 100644 --- a/airbyte-integrations/connectors/source-nytimes/source_nytimes/nytimes.yaml +++ b/airbyte-integrations/connectors/source-nytimes/source_nytimes/nytimes.yaml @@ -26,8 +26,9 @@ definitions: end_datetime: datetime: "{{ config['end_date'] or today_utc().strftime('%Y-%m') }}" datetime_format: "%Y-%m" - step: "1m" + step: "P1M" datetime_format: "%Y-%m-%dT%H:%M:%S%z" + cursor_granularity: "PT1S" cursor_field: "{{ options['stream_cursor_field'] }}" $options: name: "archive" diff --git a/airbyte-integrations/connectors/source-posthog/source_posthog/posthog.yaml b/airbyte-integrations/connectors/source-posthog/source_posthog/posthog.yaml index fbd270572ac3..673950a2034b 100644 --- a/airbyte-integrations/connectors/source-posthog/source_posthog/posthog.yaml +++ b/airbyte-integrations/connectors/source-posthog/source_posthog/posthog.yaml @@ -1,7 +1,6 @@ version: "0.1.0" definitions: - schema_loader: file_path: "./source_posthog/schemas/{{ options['name'] }}.json" @@ -9,7 +8,7 @@ definitions: type: RecordSelector extractor: type: DpathExtractor - field_pointer: ['results'] + field_pointer: ["results"] requester: type: HttpRequester @@ -120,9 +119,9 @@ definitions: stream_slicers: - type: SubstreamSlicer parent_stream_configs: - - stream: "*ref(definitions.projects_stream)" - parent_key: "id" - stream_slice_field: "project_id" + - stream: "*ref(definitions.projects_stream)" + parent_key: "id" + stream_slice_field: "project_id" - type: DatetimeStreamSlicer start_datetime: datetime: "{{ config['start_date'] }}" @@ -131,7 +130,8 @@ definitions: datetime: "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%S%z') }}" datetime_format: "%Y-%m-%dT%H:%M:%S%z" datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" - step: 30d + cursor_granularity: "PT0.000001S" + step: "P30D" cursor_field: timestamp start_time_option: field_name: after diff --git a/airbyte-integrations/connectors/source-posthog/unit_tests/test_components.py b/airbyte-integrations/connectors/source-posthog/unit_tests/test_components.py index 6951aeaa04e2..56377d3e857a 100644 --- a/airbyte-integrations/connectors/source-posthog/unit_tests/test_components.py +++ b/airbyte-integrations/connectors/source-posthog/unit_tests/test_components.py @@ -15,9 +15,10 @@ DatetimeStreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.00+0000", datetime_format="%Y-%m-%dT%H:%M:%S.%f%z", options={}), end_datetime=MinMaxDatetime(datetime="2021-02-01T00:00:00.00+0000", datetime_format="%Y-%m-%dT%H:%M:%S.%f%z", options={}), - step="10d", + step="P10D", cursor_field="timestamp", datetime_format="%Y-%m-%dT%H:%M:%S.%f%z", + cursor_granularity="PT0.000001S", start_time_option=RequestOption(inject_into="request_parameter", field_name="after", options={}), end_time_option=RequestOption(inject_into="request_parameter", field_name="before", options={}), config={}, @@ -85,42 +86,48 @@ def test_update_cursor(test_name, initial_state, stream_slice, last_record, expe [ ("test_empty_state", {}, - [{'end_time': '2021-01-10T00:00:00.000000+0000', + [{'end_time': '2021-01-10T23:59:59.999999+0000', 'project_id': '2331', 'start_time': '2021-01-01T00:00:00.000000+0000'}, - {'end_time': '2021-01-20T00:00:00.000000+0000', + {'end_time': '2021-01-20T23:59:59.999999+0000', 'project_id': '2331', - 'start_time': '2021-01-10T00:00:00.000000+0000'}, - {'end_time': '2021-01-30T00:00:00.000000+0000', + 'start_time': '2021-01-10T23:59:59.999999+0000'}, + {'end_time': '2021-01-30T23:59:59.999999+0000', 'project_id': '2331', - 'start_time': '2021-01-20T00:00:00.000000+0000'}, + 'start_time': '2021-01-20T23:59:59.999999+0000'}, {'end_time': '2021-02-01T00:00:00.000000+0000', 'project_id': '2331', - 'start_time': '2021-01-30T00:00:00.000000+0000'}] + 'start_time': '2021-01-30T23:59:59.999999+0000'}] ), ("test_state", {'2331': {"timestamp": "2021-01-01T17:00:00.000000+0000"}}, - [{'end_time': '2021-01-11T17:00:00.000000+0000', + [{'end_time': '2021-01-11T16:59:59.999999+0000', 'project_id': '2331', 'start_time': '2021-01-01T17:00:00.000000+0000'}, - {'end_time': '2021-01-21T17:00:00.000000+0000', + {'end_time': '2021-01-21T16:59:59.999999+0000', 'project_id': '2331', - 'start_time': '2021-01-11T17:00:00.000000+0000'}, - {'end_time': '2021-01-31T17:00:00.000000+0000', + 'start_time': '2021-01-11T16:59:59.999999+0000'}, + {'end_time': '2021-01-31T16:59:59.999999+0000', 'project_id': '2331', - 'start_time': '2021-01-21T17:00:00.000000+0000'}] + 'start_time': '2021-01-21T16:59:59.999999+0000'}, + {'end_time': '2021-02-01T00:00:00.000000+0000', + 'project_id': '2331', + 'start_time': '2021-01-31T16:59:59.999999+0000'}] ), ("test_old_stype_state", {"timestamp": "2021-01-01T17:00:00.000000+0000"}, - [{'end_time': '2021-01-11T17:00:00.000000+0000', + [{'end_time': '2021-01-11T16:59:59.999999+0000', 'project_id': '2331', 'start_time': '2021-01-01T17:00:00.000000+0000'}, - {'end_time': '2021-01-21T17:00:00.000000+0000', + {'end_time': '2021-01-21T16:59:59.999999+0000', 'project_id': '2331', - 'start_time': '2021-01-11T17:00:00.000000+0000'}, - {'end_time': '2021-01-31T17:00:00.000000+0000', + 'start_time': '2021-01-11T16:59:59.999999+0000'}, + {'end_time': '2021-01-31T16:59:59.999999+0000', + 'project_id': '2331', + 'start_time': '2021-01-21T16:59:59.999999+0000'}, + {'end_time': '2021-02-01T00:00:00.000000+0000', 'project_id': '2331', - 'start_time': '2021-01-21T17:00:00.000000+0000'}] + 'start_time': '2021-01-31T16:59:59.999999+0000'}] ), ("test_state_for_one_slice", {'2331': {"timestamp": "2021-01-27T17:00:00.000000+0000"}}, diff --git a/airbyte-integrations/connectors/source-prestashop/source_prestashop/prestashop.yaml b/airbyte-integrations/connectors/source-prestashop/source_prestashop/prestashop.yaml index 9b743fea3b85..b7bdfe9367e7 100644 --- a/airbyte-integrations/connectors/source-prestashop/source_prestashop/prestashop.yaml +++ b/airbyte-integrations/connectors/source-prestashop/source_prestashop/prestashop.yaml @@ -44,8 +44,9 @@ definitions: end_datetime: datetime: "{{ now_utc() }}" datetime_format: "%Y-%m-%d %H:%M:%S.%f+00:00" - step: "1y" + step: "P1Y" datetime_format: "%Y-%m-%d %H:%M:%S" + cursor_granularity: "PT1S" requester: $ref: "*ref(definitions.requester)" request_options_provider: diff --git a/airbyte-integrations/connectors/source-punk-api/source_punk_api/punk_api.yaml b/airbyte-integrations/connectors/source-punk-api/source_punk_api/punk_api.yaml index fa6ea890b71a..3997c0c17dbe 100644 --- a/airbyte-integrations/connectors/source-punk-api/source_punk_api/punk_api.yaml +++ b/airbyte-integrations/connectors/source-punk-api/source_punk_api/punk_api.yaml @@ -27,8 +27,9 @@ definitions: # end_datetime: # datetime: "{{ now_local() }}" # format: "%Y-%m-%d %H:%M:%S.%f" - step: "1m" + step: "P1M" datetime_format: "mm-YYYY" + cursor_granularity: "P1M" cursor_field: airdate # start_time_option: # field_name: "date" diff --git a/airbyte-integrations/connectors/source-sendgrid/source_sendgrid/sendgrid.yaml b/airbyte-integrations/connectors/source-sendgrid/source_sendgrid/sendgrid.yaml index d964a1724f4e..690950916dd3 100644 --- a/airbyte-integrations/connectors/source-sendgrid/source_sendgrid/sendgrid.yaml +++ b/airbyte-integrations/connectors/source-sendgrid/source_sendgrid/sendgrid.yaml @@ -1,7 +1,7 @@ version: "0.1.0" definitions: page_size: 50 - step: "30d" + step: "P30D" schema_loader: type: JsonFileSchemaLoader @@ -61,6 +61,7 @@ definitions: field_name: "end_time" inject_into: "request_parameter" datetime_format: "%s" + cursor_granularity: "PT1S" messages_stream_slicer: type: "DatetimeStreamSlicer" start_datetime: @@ -72,6 +73,7 @@ definitions: step: "*ref(definitions.step)" cursor_field: "{{ options.stream_cursor_field }}" datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z" + cursor_granularity: "PT0.000001S" base_stream: type: DeclarativeStream diff --git a/airbyte-integrations/connectors/source-senseforce/source_senseforce/senseforce.yaml b/airbyte-integrations/connectors/source-senseforce/source_senseforce/senseforce.yaml index d1a6db08a29e..a9576169789d 100644 --- a/airbyte-integrations/connectors/source-senseforce/source_senseforce/senseforce.yaml +++ b/airbyte-integrations/connectors/source-senseforce/source_senseforce/senseforce.yaml @@ -31,8 +31,9 @@ definitions: end_datetime: datetime: "{{ now_utc() }}" datetime_format: "%Y-%m-%d %H:%M:%S.%f+00:00" - step: "100d" #TODO: Add {{ config['slice_range'] ~ d }} here, once it's possible to use config-values for step definition + step: "P100D" #TODO: Add {{ config['slice_range'] ~ d }} here, once it's possible to use config-values for step definition datetime_format: "%s" + cursor_granularity: "PT1S" cursor_field: "{{ options['stream_cursor_field'] }}" retriever: diff --git a/airbyte-integrations/connectors/source-square/source_square/square.yaml b/airbyte-integrations/connectors/source-square/source_square/square.yaml index c97a43ebbd40..4b8d832b5941 100644 --- a/airbyte-integrations/connectors/source-square/source_square/square.yaml +++ b/airbyte-integrations/connectors/source-square/source_square/square.yaml @@ -29,8 +29,8 @@ definitions: request_options_provider: request_headers: - Square-Version: '2022-10-19' - Content-Type: 'application/json' + Square-Version: "2022-10-19" + Content-Type: "application/json" retriever: record_selector: $ref: "*ref(definitions.selector)" @@ -89,8 +89,9 @@ definitions: end_datetime: datetime: "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%S.%fZ') }}" datetime_format: "%Y-%m-%dT%H:%M:%S.%fZ" - step: 30d + step: "P30D" datetime_format: "%Y-%m-%dT%H:%M:%S.%fZ" + cursor_granularity: "PT0.000001S" cursor_field: "created_at" start_time_option: field_name: begin_time @@ -136,8 +137,9 @@ definitions: end_datetime: datetime: "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%S.%fZ') }}" datetime_format: "%Y-%m-%dT%H:%M:%S.%fZ" - step: 30d + step: "P30D" datetime_format: "%Y-%m-%dT%H:%M:%S.%fZ" + cursor_granularity: "PT0.000001S" cursor_field: "updated_at" start_time_option: field_name: begin_time @@ -285,8 +287,9 @@ definitions: end_datetime: datetime: "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%S.%fZ') }}" datetime_format: "%Y-%m-%dT%H:%M:%S.%fZ" - step: 30d + step: P30D datetime_format: "%Y-%m-%dT%H:%M:%S.%fZ" + cursor_granularity: "PT0.000001S" parent_stream: "*ref(definitions.locations_stream)" cursor_field: "updated_at" parent_key: "id" diff --git a/airbyte-integrations/connectors/source-square/unit_tests/test_component.py b/airbyte-integrations/connectors/source-square/unit_tests/test_component.py index 7c7f5e911880..bca70ca89d6e 100644 --- a/airbyte-integrations/connectors/source-square/unit_tests/test_component.py +++ b/airbyte-integrations/connectors/source-square/unit_tests/test_component.py @@ -16,6 +16,7 @@ from source_square.source import SourceSquare DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" +CURSOR_GRANULARITY = "PT0.000001S" @pytest.fixture @@ -81,9 +82,10 @@ def test_sub_slicer(last_record, expected, records): slicer = SquareSubstreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000+0000", options={}), end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000+0000", options={}), - step="1d", + step="P1D", cursor_field="updated_at", datetime_format=DATETIME_FORMAT, + cursor_granularity=CURSOR_GRANULARITY, options=None, config={"start_date": "2021-01-01T00:00:00.000000+0000"}, parent_key="id", @@ -116,9 +118,10 @@ def test_sub_slicer_request_body(last_record, records, expected_data): slicer = SquareSubstreamSlicer( start_datetime=MinMaxDatetime(datetime="2021-01-01T00:00:00.000000Z", options={}), end_datetime=MinMaxDatetime(datetime="2021-01-10T00:00:00.000000Z", options={}), - step="1d", + step="P1D", cursor_field="updated_at", datetime_format=DATETIME_FORMAT, + cursor_granularity=CURSOR_GRANULARITY, options=None, config={"start_date": "2021-01-01T00:00:00.000000Z"}, parent_key="id", diff --git a/airbyte-integrations/connectors/source-tempo/source_tempo/tempo.yaml b/airbyte-integrations/connectors/source-tempo/source_tempo/tempo.yaml index a1384d3b322d..b2266d468722 100644 --- a/airbyte-integrations/connectors/source-tempo/source_tempo/tempo.yaml +++ b/airbyte-integrations/connectors/source-tempo/source_tempo/tempo.yaml @@ -66,13 +66,14 @@ definitions: stream_slicer: cursor_field: "startDate" datetime_format: "%Y-%m-%d" + cursor_granularity: "P1D" start_datetime: datetime: "2020-01-01" datetime_format: "%Y-%m-%d" end_datetime: datetime: "{{ today_utc() }}" datetime_format: "%Y-%m-%d" - step: "1w" + step: "P1W" end_time_option: field_name: "to" inject_into: "request_parameter" diff --git a/airbyte-integrations/connectors/source-the-guardian-api/acceptance-test-config.yml b/airbyte-integrations/connectors/source-the-guardian-api/acceptance-test-config.yml index 0648031a84aa..3fd41494874c 100644 --- a/airbyte-integrations/connectors/source-the-guardian-api/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-the-guardian-api/acceptance-test-config.yml @@ -23,7 +23,8 @@ acceptance_tests: tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" - future_state_path: "integration_tests/abnormal_state.json" + future_state: + future_state_path: "integration_tests/abnormal_state.json" full_refresh: tests: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-the-guardian-api/source_the_guardian_api/the_guardian_api.yaml b/airbyte-integrations/connectors/source-the-guardian-api/source_the_guardian_api/the_guardian_api.yaml index 83a87e1d49cd..dde7985571b7 100644 --- a/airbyte-integrations/connectors/source-the-guardian-api/source_the_guardian_api/the_guardian_api.yaml +++ b/airbyte-integrations/connectors/source-the-guardian-api/source_the_guardian_api/the_guardian_api.yaml @@ -26,8 +26,9 @@ definitions: end_datetime: datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" datetime_format: "%Y-%m-%d" - step: "7d" + step: "P7D" datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" cursor_field: "{{ options['stream_cursor_field'] }}" start_time_option: field_name: "from-date" diff --git a/airbyte-integrations/connectors/source-tvmaze-schedule/source_tvmaze_schedule/tvmaze_schedule.yaml b/airbyte-integrations/connectors/source-tvmaze-schedule/source_tvmaze_schedule/tvmaze_schedule.yaml index 0b39d36dece0..da85625c6d32 100644 --- a/airbyte-integrations/connectors/source-tvmaze-schedule/source_tvmaze_schedule/tvmaze_schedule.yaml +++ b/airbyte-integrations/connectors/source-tvmaze-schedule/source_tvmaze_schedule/tvmaze_schedule.yaml @@ -25,8 +25,9 @@ definitions: end_datetime: datetime: "{{ config['end_date'] or today_utc() }}" format: "%Y-%m-%d" - step: 1d + step: "P1D" datetime_format: "%Y-%m-%d" + cursor_granularity: "P1D" cursor_field: "airdate" start_time_option: field_name: "date" diff --git a/airbyte-integrations/connectors/source-twitter/source_twitter/twitter.yaml b/airbyte-integrations/connectors/source-twitter/source_twitter/twitter.yaml index 0c4dc189f94f..0fa6a933bdd0 100644 --- a/airbyte-integrations/connectors/source-twitter/source_twitter/twitter.yaml +++ b/airbyte-integrations/connectors/source-twitter/source_twitter/twitter.yaml @@ -25,7 +25,8 @@ definitions: datetime: "{{ config['end_time'] }}" datetime_format: "%Y-%m-%dT%H:%M:%S" datetime_format: "%Y-%m-%dT%H:%M:%SZ" - step: 1d + cursor_granularity: "PT1S" + step: "P1D" start_time_option: field_name: start_time inject_into: request_parameter diff --git a/airbyte-integrations/connectors/source-wikipedia-pageviews/source_wikipedia_pageviews/wikipedia_pageviews.yaml b/airbyte-integrations/connectors/source-wikipedia-pageviews/source_wikipedia_pageviews/wikipedia_pageviews.yaml index 91e0e9dcd83f..d5e7d1b36b7f 100755 --- a/airbyte-integrations/connectors/source-wikipedia-pageviews/source_wikipedia_pageviews/wikipedia_pageviews.yaml +++ b/airbyte-integrations/connectors/source-wikipedia-pageviews/source_wikipedia_pageviews/wikipedia_pageviews.yaml @@ -18,16 +18,18 @@ definitions: end_datetime: datetime: "{{config.start}}" datetime_format: "%Y%m%d" - step: "1d" + step: "P1D" cursor_field: "timestamp" datetime_format: "%Y/%m/%d" + cursor_granularity: "P1D" per_article_stream_slicer: type: DatetimeStreamSlicer start_datetime: "{{config.start}}" end_datetime: "{{config.end}}" - step: "1d" + step: "P1D" cursor_field: "timestamp" datetime_format: "%Y%m%d" + cursor_granularity: "P1D" per_article_requester: $options: diff --git a/airbyte-integrations/connectors/source-woocommerce/source_woocommerce/components.py b/airbyte-integrations/connectors/source-woocommerce/source_woocommerce/components.py deleted file mode 100644 index c1e29416354c..000000000000 --- a/airbyte-integrations/connectors/source-woocommerce/source_woocommerce/components.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# Copyright (c) 2022 Airbyte, Inc., all rights reserved. -# - -import datetime - -from airbyte_cdk.sources.declarative.stream_slicers import DatetimeStreamSlicer - - -class DatetimeStreamSlicerWoocommerce(DatetimeStreamSlicer): - - # TODO: remove whole method after solving https://github.com/airbytehq/airbyte/issues/20322 - def _partition_daterange(self, start, end, step: datetime.timedelta): - start_field = self.stream_slice_field_start.eval(self.config) - end_field = self.stream_slice_field_end.eval(self.config) - dates = [] - while start <= end: - # interval hardcoded to 1 second, as we have datetime_format: "%Y-%m-%dT%H:%M:%S" - end_date = self._get_date(start + step - datetime.timedelta(seconds=1), end, min) - dates.append({start_field: self._format_datetime(start), end_field: self._format_datetime(end_date)}) - start += step - return dates diff --git a/airbyte-integrations/connectors/source-woocommerce/source_woocommerce/woocommerce.yaml b/airbyte-integrations/connectors/source-woocommerce/source_woocommerce/woocommerce.yaml index 4936b31218aa..b3ba88eaa988 100644 --- a/airbyte-integrations/connectors/source-woocommerce/source_woocommerce/woocommerce.yaml +++ b/airbyte-integrations/connectors/source-woocommerce/source_woocommerce/woocommerce.yaml @@ -3,7 +3,7 @@ version: "0.1.0" definitions: selector: extractor: - field_pointer: [ ] + field_pointer: [] schema_loader: type: JsonFileSchemaLoader file_path: "./source_woocommerce/schemas/{{ options['name'] }}.json" @@ -20,8 +20,8 @@ definitions: order: "asc" dates_are_gmt: "true" date_stream_slicer: - class_name: source_woocommerce.components.DatetimeStreamSlicerWoocommerce - lookback_window: "{{ config.get('conversion_window_days', 0) }}d" + type: DatetimeStreamSlicer + lookback_window: "P{{ config.get('conversion_window_days', 0) }}D" start_datetime: datetime: "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S') }}" datetime_format: "%Y-%m-%dT%H:%M:%S" @@ -29,7 +29,8 @@ definitions: datetime: "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%S') }}" datetime_format: "%Y-%m-%dT%H:%M:%S" datetime_format: "%Y-%m-%dT%H:%M:%S" - step: 30d + cursor_granularity: PT1S + step: P30D start_time_option: field_name: modified_after inject_into: request_parameter diff --git a/airbyte-integrations/connectors/source-zenloop/source_zenloop/zenloop.yaml b/airbyte-integrations/connectors/source-zenloop/source_zenloop/zenloop.yaml index e6ba77cf302b..d45a0d7cc19a 100644 --- a/airbyte-integrations/connectors/source-zenloop/source_zenloop/zenloop.yaml +++ b/airbyte-integrations/connectors/source-zenloop/source_zenloop/zenloop.yaml @@ -80,13 +80,14 @@ definitions: type: DatetimeStreamSlicer cursor_field: "inserted_at" datetime_format: "%Y-%m-%dT%H:%M:%S.%fZ" + cursor_granularity: "PT0.000001S" start_datetime: datetime: "{{ config['date_from'] }}" datetime_format: "%Y-%m-%d" end_datetime: datetime: "{{ today_utc() }}" datetime_format: "%Y-%m-%d" - step: "1m" + step: "P1M" end_time_option: field_name: "date_to" inject_into: "request_parameter" diff --git a/docs/connector-development/config-based/understanding-the-yaml-file/stream-slicers.md b/docs/connector-development/config-based/understanding-the-yaml-file/stream-slicers.md index 4dfc1f9cc29a..9adfe6922ec5 100644 --- a/docs/connector-development/config-based/understanding-the-yaml-file/stream-slicers.md +++ b/docs/connector-development/config-based/understanding-the-yaml-file/stream-slicers.md @@ -55,6 +55,7 @@ Schema: - step - cursor_field - datetime_format + - cursor_granularity additional_properties: false properties: "$options": @@ -69,6 +70,8 @@ Schema: type: string datetime_format: type: string + cursor_granularity: + type: string start_time_option: "$ref": "#/definitions/RequestOption" end_time_option: @@ -103,10 +106,12 @@ Example: stream_slicer: start_datetime: "2021-02-01T00:00:00.000000+0000", end_datetime: "2021-03-01T00:00:00.000000+0000", - step: "1d" + datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z", + cursor_granularity: "PT0.000001S" + step: "P1D" ``` -will create one slice per day for the interval `2021-02-01` - `2021-03-01`. +will create one slice per day for the interval `2021-02-01` - `2021-03-01`. The first slice will start from the `start_datetime` and end at `start_datetime + step - granularity` like this: `{"start_time": "2021-02-01T00:00:00.000000+0000", "end_time": "2021-02-01T23:59:59.999999+0000"}`. The `DatetimeStreamSlicer` also supports an optional lookback window, specifying how many days before the start_datetime to read data for. @@ -114,13 +119,15 @@ The `DatetimeStreamSlicer` also supports an optional lookback window, specifying stream_slicer: start_datetime: "2021-02-01T00:00:00.000000+0000", end_datetime: "2021-03-01T00:00:00.000000+0000", - lookback_window: "31d" - step: "1d" + datetime_format: "%Y-%m-%dT%H:%M:%S.%f%z", + cursor_granularity: "PT0.000001S" + lookback_window: "P31D" + step: "P1D" ``` will read data from `2021-01-01` to `2021-03-01`. -The stream slices will be of the form `{"start_date": "2021-02-01T00:00:00.000000+0000", "end_date": "2021-02-01T00:00:00.000000+0000"}` +The stream slices will be of the form `{"start_date": "2021-02-01T00:00:00.000000+0000", "end_date": "2021-02-02T23:59:59.999999+0000"}` The stream slices' field names can be customized through the `stream_state_field_start` and `stream_state_field_end` parameters. The `datetime_format` can be used to specify the format of the start and end time. It is [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339#section-5.6) by default. @@ -128,6 +135,8 @@ The `datetime_format` can be used to specify the format of the start and end tim The Stream's state will be derived by reading the record's `cursor_field`. If the `cursor_field` is `created`, and the record is `{"id": 1234, "created": "2021-02-02T00:00:00.000000+0000"}`, then the state after reading that record is `"created": "2021-02-02T00:00:00.000000+0000"`. [^1] +Note that all durations are expressed as [ISO 8601 durations](https://en.wikipedia.org/wiki/ISO_8601#Durations). + #### Cursor update When reading data from the source, the cursor value will be updated to the max datetime between