diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile b/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile index 09f7dd1b491a..3a2b5695e869 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/Dockerfile @@ -28,5 +28,5 @@ COPY source_google_analytics_data_api ./source_google_analytics_data_api ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.2 +LABEL io.airbyte.version=0.2.3 LABEL io.airbyte.name=airbyte/source-google-analytics-data-api diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/metadata.yaml b/airbyte-integrations/connectors/source-google-analytics-data-api/metadata.yaml index 827699b06e41..91b4b657d9df 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/metadata.yaml +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: 3cc2eafd-84aa-4dca-93af-322d9dfeec1a - dockerImageTag: 0.2.2 + dockerImageTag: 0.2.3 dockerRepository: airbyte/source-google-analytics-data-api githubIssueLabel: source-google-analytics-data-api icon: google-analytics.svg diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/defaults/custom_reports_schema.json b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/defaults/custom_reports_schema.json index 777a8403bafe..2c24dabc7244 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/defaults/custom_reports_schema.json +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/defaults/custom_reports_schema.json @@ -21,6 +21,17 @@ "type": "string" } }, + "offset": { + "type": ["integer", "string"], + "minimum": 0, + "pattern": "^[0-9]+$" + }, + "limit": { + "type": ["integer", "string"], + "minimum": 0, + "maximum": 250000, + "pattern": "^([0-9]|[1-9][0-9]{1,4}|1[0-9]{5}|2[0-4][0-9]{4}|250000)$" + }, "cohortSpec": { "type": ["null", "object"], "properties": { diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py index a36b50fd71c8..903a17e6bec7 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/source_google_analytics_data_api/source.py @@ -57,7 +57,7 @@ class GoogleAnalyticsDataApiAbstractStream(HttpStream, ABC): http_method = "POST" raise_on_http_errors = True - def __init__(self, *, config: Mapping[str, Any], **kwargs): + def __init__(self, *, config: MutableMapping[str, Any], **kwargs): super().__init__(**kwargs) self._config = config @@ -154,13 +154,21 @@ def get_json_schema(self) -> Mapping[str, Any]: def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: r = response.json() - if all(key in r for key in ["limit", "offset", "rowCount"]): - limit, offset, total_rows = r["limit"], r["offset"], r["rowCount"] + if "rowCount" in r: + limit = int(self.config["limit"]) + total_rows = r["rowCount"] + if "next_page_offset" in self.config and self.config["next_page_offset"] is not None: + offset = int(self.config["next_page_offset"]) + else: + offset = int(self.config["offset"]) - if total_rows <= offset: + if total_rows <= offset + limit: + self.config["next_page_offset"] = None return None - return {"limit": limit, "offset": offset + limit} + self.config["next_page_offset"] = str(offset + limit) + + return {"limit": str(limit), "next_page_offset": str(offset + limit)} def path( self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None @@ -201,11 +209,17 @@ def request_body_json( stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, ) -> Optional[Mapping]: + + self.config["offset"] = str(self.config.get("offset", "0")) + self.config["limit"] = str(self.config.get("limit", "10000")) + payload = { "metrics": [{"name": m} for m in self.config["metrics"]], "dimensions": [{"name": d} for d in self.config["dimensions"]], "dateRanges": [stream_slice], "returnPropertyQuota": True, + "offset": self.config['next_page_offset'] if next_page_token else self.config["offset"], + "limit": self.config["limit"] } return payload @@ -398,7 +412,13 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: def instantiate_report_class(report: dict, config: Mapping[str, Any]) -> GoogleAnalyticsDataApiBaseStream: cohort_spec = report.get("cohortSpec") pivots = report.get("pivots") - stream_config = {"metrics": report["metrics"], "dimensions": report["dimensions"], **config} + stream_config = { + "metrics": report["metrics"], + "dimensions": report["dimensions"], + "offset": report.get("offset", "0"), + "limit": report.get("limit", "10000"), + **config + } report_class_tuple = (GoogleAnalyticsDataApiBaseStream,) if pivots: stream_config["pivots"] = pivots diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_source.py b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_source.py index 21dfdcfe2810..eff03ec7dc66 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_source.py @@ -93,6 +93,24 @@ def inner(**kwargs): "\"2020-09-15\" }], \"dimensions\": [\"browser\", \"country\", \"language\"], \"metrics\": [\"sessions\"], " "\"pivots\": {}}]"}, Status.FAILED, '"custom_reports.0.pivots: {} is not of type \'null\', \'array\'"'), + ({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], " + "\"limit\": \"1\", \"offset\": \"1\"}]"}, + Status.SUCCEEDED, None), + ({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], " + "\"limit\": 1, \"offset\": 1}]"}, + Status.SUCCEEDED, None), + ({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], " + "\"limit\": 1.2, \"offset\": 1}]"}, + Status.FAILED, '"custom_reports.0.limit: 1.2 is not of type \'integer\', \'string\'"'), + ({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], " + "\"limit\": 1, \"offset\": 1.2}]"}, + Status.FAILED, '"custom_reports.0.offset: 1.2 is not of type \'integer\', \'string\'"'), + ({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], " + "\"limit\": \"1.2\", \"offset\": \"1\"}]"}, + Status.FAILED, '"custom_reports.0.limit: \'1.2\' does not match \'^([0-9]|[1-9][0-9]{1,4}|1[0-9]{5}|2[0-4][0-9]{4}|250000)$\'"'), + ({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], " + "\"limit\": \"1\", \"offset\": \"1.2\"}]"}, + Status.FAILED, '"custom_reports.0.offset: \'1.2\' does not match \'^[0-9]+$\'"') ], ) def test_check(requests_mock, config_gen, config_values, status, message): diff --git a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py index dfc64ac79f49..b9e303db4fc6 100644 --- a/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-google-analytics-data-api/unit_tests/test_streams.py @@ -88,6 +88,8 @@ def test_request_body_json(patch_base_class): ], "dateRanges": [request_body_params["stream_slice"]], "returnPropertyQuota": True, + "offset": "0", + "limit": "10000" } request_body_json = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_body_json(**request_body_params) @@ -95,23 +97,27 @@ def test_request_body_json(patch_base_class): def test_next_page_token_equal_chunk(patch_base_class): - stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]) + additional_config = { + "offset": "0", + "limit": "10000" + } + stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"] | additional_config) response = MagicMock() response.json.side_effect = [ - {"limit": 100000, "offset": 0, "rowCount": 200000}, - {"limit": 100000, "offset": 100000, "rowCount": 200000}, - {"limit": 100000, "offset": 200000, "rowCount": 200000}, + {"rowCount": 30000}, + {"rowCount": 30000}, + {"rowCount": 30000}, ] inputs = {"response": response} expected_tokens = [ { - "limit": 100000, - "offset": 100000, + "limit": "10000", + "next_page_offset": "10000", }, { - "limit": 100000, - "offset": 200000, + "limit": "10000", + "next_page_offset": "20000", }, None, ] @@ -121,28 +127,37 @@ def test_next_page_token_equal_chunk(patch_base_class): def test_next_page_token(patch_base_class): - stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]) + additional_config = { + "offset": "0", + "limit": "100000" + } + stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"] | additional_config) response = MagicMock() response.json.side_effect = [ - {"limit": 100000, "offset": 0, "rowCount": 250000}, - {"limit": 100000, "offset": 100000, "rowCount": 250000}, - {"limit": 100000, "offset": 200000, "rowCount": 250000}, - {"limit": 100000, "offset": 300000, "rowCount": 250000}, + {"rowCount": 450000}, + {"rowCount": 450000}, + {"rowCount": 450000}, + {"rowCount": 450000}, + {"rowCount": 450000}, ] inputs = {"response": response} expected_tokens = [ { - "limit": 100000, - "offset": 100000, + "limit": "100000", + "next_page_offset": "100000", + }, + { + "limit": "100000", + "next_page_offset": "200000", }, { - "limit": 100000, - "offset": 200000, + "limit": "100000", + "next_page_offset": "300000", }, { - "limit": 100000, - "offset": 300000, + "limit": "100000", + "next_page_offset": "400000", }, None, ] diff --git a/docs/integrations/sources/google-analytics-data-api.md b/docs/integrations/sources/google-analytics-data-api.md index 9f9eccae1918..f2c8aba53e81 100644 --- a/docs/integrations/sources/google-analytics-data-api.md +++ b/docs/integrations/sources/google-analytics-data-api.md @@ -110,15 +110,16 @@ This connector outputs the following incremental streams: ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------| -| 0.2.2 | 2023-05-12 | [25987](https://github.com/airbytehq/airbyte/pull/25987) | Categorized Config Errors Accurately | -| 0.2.1 | 2023-05-11 | [26008](https://github.com/airbytehq/airbyte/pull/26008) | Added handling for `429 - potentiallyThresholdedRequestsPerHour` error | -| 0.2.0 | 2023-04-13 | [25179](https://github.com/airbytehq/airbyte/pull/25179) | Implement support for custom Cohort and Pivot reports | -| 0.1.3 | 2023-03-10 | [23872](https://github.com/airbytehq/airbyte/pull/23872) | Fix parse + cursor for custom reports | -| 0.1.2 | 2023-03-07 | [23822](https://github.com/airbytehq/airbyte/pull/23822) | Improve `rate limits` customer faced error messages and retry logic for `429` | -| 0.1.1 | 2023-01-10 | [21169](https://github.com/airbytehq/airbyte/pull/21169) | Slicer updated, unit tests added | -| 0.1.0 | 2023-01-08 | [20889](https://github.com/airbytehq/airbyte/pull/20889) | Improved config validation, SAT | -| 0.0.3 | 2022-08-15 | [15229](https://github.com/airbytehq/airbyte/pull/15229) | Source Google Analytics Data Api: code refactoring | -| 0.0.2 | 2022-07-27 | [15087](https://github.com/airbytehq/airbyte/pull/15087) | fix documentationUrl | -| 0.0.1 | 2022-05-09 | [12701](https://github.com/airbytehq/airbyte/pull/12701) | Introduce Google Analytics Data API source | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------| +| 0.2.3 | 2023-05-16 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Fix pagination | +| 0.2.2 | 2023-05-12 | [25987](https://github.com/airbytehq/airbyte/pull/25987) | Categorized Config Errors Accurately | +| 0.2.1 | 2023-05-11 | [26008](https://github.com/airbytehq/airbyte/pull/26008) | Added handling for `429 - potentiallyThresholdedRequestsPerHour` error | +| 0.2.0 | 2023-04-13 | [25179](https://github.com/airbytehq/airbyte/pull/25179) | Implement support for custom Cohort and Pivot reports | +| 0.1.3 | 2023-03-10 | [23872](https://github.com/airbytehq/airbyte/pull/23872) | Fix parse + cursor for custom reports | +| 0.1.2 | 2023-03-07 | [23822](https://github.com/airbytehq/airbyte/pull/23822) | Improve `rate limits` customer faced error messages and retry logic for `429` | +| 0.1.1 | 2023-01-10 | [21169](https://github.com/airbytehq/airbyte/pull/21169) | Slicer updated, unit tests added | +| 0.1.0 | 2023-01-08 | [20889](https://github.com/airbytehq/airbyte/pull/20889) | Improved config validation, SAT | +| 0.0.3 | 2022-08-15 | [15229](https://github.com/airbytehq/airbyte/pull/15229) | Source Google Analytics Data Api: code refactoring | +| 0.0.2 | 2022-07-27 | [15087](https://github.com/airbytehq/airbyte/pull/15087) | fix documentationUrl | +| 0.0.1 | 2022-05-09 | [12701](https://github.com/airbytehq/airbyte/pull/12701) | Introduce Google Analytics Data API source |