Skip to content

Commit

Permalink
Merge pull request #3 from mpetrykin/source-google-analytics-data-api…
Browse files Browse the repository at this point in the history
…_pagination

Source google analytics data api: fix pagination
  • Loading branch information
mpetrykin authored May 16, 2023
2 parents 36b3358 + 5e7c567 commit ab04903
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ COPY source_google_analytics_data_api ./source_google_analytics_data_api
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.2
LABEL io.airbyte.version=0.2.3
LABEL io.airbyte.name=airbyte/source-google-analytics-data-api
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 3cc2eafd-84aa-4dca-93af-322d9dfeec1a
dockerImageTag: 0.2.2
dockerImageTag: 0.2.3
dockerRepository: airbyte/source-google-analytics-data-api
githubIssueLabel: source-google-analytics-data-api
icon: google-analytics.svg
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@
"type": "string"
}
},
"offset": {
"type": ["integer", "string"],
"minimum": 0,
"pattern": "^[0-9]+$"
},
"limit": {
"type": ["integer", "string"],
"minimum": 0,
"maximum": 250000,
"pattern": "^([0-9]|[1-9][0-9]{1,4}|1[0-9]{5}|2[0-4][0-9]{4}|250000)$"
},
"cohortSpec": {
"type": ["null", "object"],
"properties": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class GoogleAnalyticsDataApiAbstractStream(HttpStream, ABC):
http_method = "POST"
raise_on_http_errors = True

def __init__(self, *, config: Mapping[str, Any], **kwargs):
def __init__(self, *, config: MutableMapping[str, Any], **kwargs):
super().__init__(**kwargs)
self._config = config

Expand Down Expand Up @@ -154,13 +154,21 @@ def get_json_schema(self) -> Mapping[str, Any]:
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
r = response.json()

if all(key in r for key in ["limit", "offset", "rowCount"]):
limit, offset, total_rows = r["limit"], r["offset"], r["rowCount"]
if "rowCount" in r:
limit = int(self.config["limit"])
total_rows = r["rowCount"]
if "next_page_offset" in self.config and self.config["next_page_offset"] is not None:
offset = int(self.config["next_page_offset"])
else:
offset = int(self.config["offset"])

if total_rows <= offset:
if total_rows <= offset + limit:
self.config["next_page_offset"] = None
return None

return {"limit": limit, "offset": offset + limit}
self.config["next_page_offset"] = str(offset + limit)

return {"limit": str(limit), "next_page_offset": str(offset + limit)}

def path(
self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
Expand Down Expand Up @@ -201,11 +209,17 @@ def request_body_json(
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None,
) -> Optional[Mapping]:

self.config["offset"] = str(self.config.get("offset", "0"))
self.config["limit"] = str(self.config.get("limit", "10000"))

payload = {
"metrics": [{"name": m} for m in self.config["metrics"]],
"dimensions": [{"name": d} for d in self.config["dimensions"]],
"dateRanges": [stream_slice],
"returnPropertyQuota": True,
"offset": self.config['next_page_offset'] if next_page_token else self.config["offset"],
"limit": self.config["limit"]
}
return payload

Expand Down Expand Up @@ -398,7 +412,13 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
def instantiate_report_class(report: dict, config: Mapping[str, Any]) -> GoogleAnalyticsDataApiBaseStream:
cohort_spec = report.get("cohortSpec")
pivots = report.get("pivots")
stream_config = {"metrics": report["metrics"], "dimensions": report["dimensions"], **config}
stream_config = {
"metrics": report["metrics"],
"dimensions": report["dimensions"],
"offset": report.get("offset", "0"),
"limit": report.get("limit", "10000"),
**config
}
report_class_tuple = (GoogleAnalyticsDataApiBaseStream,)
if pivots:
stream_config["pivots"] = pivots
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ def inner(**kwargs):
"\"2020-09-15\" }], \"dimensions\": [\"browser\", \"country\", \"language\"], \"metrics\": [\"sessions\"], "
"\"pivots\": {}}]"},
Status.FAILED, '"custom_reports.0.pivots: {} is not of type \'null\', \'array\'"'),
({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], "
"\"limit\": \"1\", \"offset\": \"1\"}]"},
Status.SUCCEEDED, None),
({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], "
"\"limit\": 1, \"offset\": 1}]"},
Status.SUCCEEDED, None),
({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], "
"\"limit\": 1.2, \"offset\": 1}]"},
Status.FAILED, '"custom_reports.0.limit: 1.2 is not of type \'integer\', \'string\'"'),
({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], "
"\"limit\": 1, \"offset\": 1.2}]"},
Status.FAILED, '"custom_reports.0.offset: 1.2 is not of type \'integer\', \'string\'"'),
({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], "
"\"limit\": \"1.2\", \"offset\": \"1\"}]"},
Status.FAILED, '"custom_reports.0.limit: \'1.2\' does not match \'^([0-9]|[1-9][0-9]{1,4}|1[0-9]{5}|2[0-4][0-9]{4}|250000)$\'"'),
({"custom_reports": "[{\"name\": \"name\", \"dimensions\": [\"country\"], \"metrics\": [\"sessions\"], "
"\"limit\": \"1\", \"offset\": \"1.2\"}]"},
Status.FAILED, '"custom_reports.0.offset: \'1.2\' does not match \'^[0-9]+$\'"')
],
)
def test_check(requests_mock, config_gen, config_values, status, message):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,30 +88,36 @@ def test_request_body_json(patch_base_class):
],
"dateRanges": [request_body_params["stream_slice"]],
"returnPropertyQuota": True,
"offset": "0",
"limit": "10000"
}

request_body_json = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"]).request_body_json(**request_body_params)
assert request_body_json == expected_body_json


def test_next_page_token_equal_chunk(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
additional_config = {
"offset": "0",
"limit": "10000"
}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"] | additional_config)
response = MagicMock()
response.json.side_effect = [
{"limit": 100000, "offset": 0, "rowCount": 200000},
{"limit": 100000, "offset": 100000, "rowCount": 200000},
{"limit": 100000, "offset": 200000, "rowCount": 200000},
{"rowCount": 30000},
{"rowCount": 30000},
{"rowCount": 30000},
]
inputs = {"response": response}

expected_tokens = [
{
"limit": 100000,
"offset": 100000,
"limit": "10000",
"next_page_offset": "10000",
},
{
"limit": 100000,
"offset": 200000,
"limit": "10000",
"next_page_offset": "20000",
},
None,
]
Expand All @@ -121,28 +127,37 @@ def test_next_page_token_equal_chunk(patch_base_class):


def test_next_page_token(patch_base_class):
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"])
additional_config = {
"offset": "0",
"limit": "100000"
}
stream = GoogleAnalyticsDataApiBaseStream(authenticator=MagicMock(), config=patch_base_class["config"] | additional_config)
response = MagicMock()
response.json.side_effect = [
{"limit": 100000, "offset": 0, "rowCount": 250000},
{"limit": 100000, "offset": 100000, "rowCount": 250000},
{"limit": 100000, "offset": 200000, "rowCount": 250000},
{"limit": 100000, "offset": 300000, "rowCount": 250000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
{"rowCount": 450000},
]
inputs = {"response": response}

expected_tokens = [
{
"limit": 100000,
"offset": 100000,
"limit": "100000",
"next_page_offset": "100000",
},
{
"limit": "100000",
"next_page_offset": "200000",
},
{
"limit": 100000,
"offset": 200000,
"limit": "100000",
"next_page_offset": "300000",
},
{
"limit": 100000,
"offset": 300000,
"limit": "100000",
"next_page_offset": "400000",
},
None,
]
Expand Down
25 changes: 13 additions & 12 deletions docs/integrations/sources/google-analytics-data-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,16 @@ This connector outputs the following incremental streams:

## Changelog

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------|
| 0.2.2 | 2023-05-12 | [25987](https://github.com/airbytehq/airbyte/pull/25987) | Categorized Config Errors Accurately |
| 0.2.1 | 2023-05-11 | [26008](https://github.com/airbytehq/airbyte/pull/26008) | Added handling for `429 - potentiallyThresholdedRequestsPerHour` error |
| 0.2.0 | 2023-04-13 | [25179](https://github.com/airbytehq/airbyte/pull/25179) | Implement support for custom Cohort and Pivot reports |
| 0.1.3 | 2023-03-10 | [23872](https://github.com/airbytehq/airbyte/pull/23872) | Fix parse + cursor for custom reports |
| 0.1.2 | 2023-03-07 | [23822](https://github.com/airbytehq/airbyte/pull/23822) | Improve `rate limits` customer faced error messages and retry logic for `429` |
| 0.1.1 | 2023-01-10 | [21169](https://github.com/airbytehq/airbyte/pull/21169) | Slicer updated, unit tests added |
| 0.1.0 | 2023-01-08 | [20889](https://github.com/airbytehq/airbyte/pull/20889) | Improved config validation, SAT |
| 0.0.3 | 2022-08-15 | [15229](https://github.com/airbytehq/airbyte/pull/15229) | Source Google Analytics Data Api: code refactoring |
| 0.0.2 | 2022-07-27 | [15087](https://github.com/airbytehq/airbyte/pull/15087) | fix documentationUrl |
| 0.0.1 | 2022-05-09 | [12701](https://github.com/airbytehq/airbyte/pull/12701) | Introduce Google Analytics Data API source |
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------|
| 0.2.3 | 2023-05-16 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Fix pagination |
| 0.2.2 | 2023-05-12 | [25987](https://github.com/airbytehq/airbyte/pull/25987) | Categorized Config Errors Accurately |
| 0.2.1 | 2023-05-11 | [26008](https://github.com/airbytehq/airbyte/pull/26008) | Added handling for `429 - potentiallyThresholdedRequestsPerHour` error |
| 0.2.0 | 2023-04-13 | [25179](https://github.com/airbytehq/airbyte/pull/25179) | Implement support for custom Cohort and Pivot reports |
| 0.1.3 | 2023-03-10 | [23872](https://github.com/airbytehq/airbyte/pull/23872) | Fix parse + cursor for custom reports |
| 0.1.2 | 2023-03-07 | [23822](https://github.com/airbytehq/airbyte/pull/23822) | Improve `rate limits` customer faced error messages and retry logic for `429` |
| 0.1.1 | 2023-01-10 | [21169](https://github.com/airbytehq/airbyte/pull/21169) | Slicer updated, unit tests added |
| 0.1.0 | 2023-01-08 | [20889](https://github.com/airbytehq/airbyte/pull/20889) | Improved config validation, SAT |
| 0.0.3 | 2022-08-15 | [15229](https://github.com/airbytehq/airbyte/pull/15229) | Source Google Analytics Data Api: code refactoring |
| 0.0.2 | 2022-07-27 | [15087](https://github.com/airbytehq/airbyte/pull/15087) | fix documentationUrl |
| 0.0.1 | 2022-05-09 | [12701](https://github.com/airbytehq/airbyte/pull/12701) | Introduce Google Analytics Data API source |

0 comments on commit ab04903

Please sign in to comment.