-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Source Smartsheets: incremental read and tests (#12077)
* #5520 fix scrambled columns bug * #5520 source smartsheets: add changelog item * #5520 move pytest to optional setup requirements * #12003 source smartsheets: implement incremental read + tests * #12003 source smartsheet: add changelog * #12003 source smartsheets: fix merge conflict on unit tests * #12003 source smartsheets: fix startdate in spec * #12003 source smartsheets: add default start dt to spec * #12003 source smartsheets: add default start dt to spec * auto-bump connector version Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
- Loading branch information
1 parent
d612b8a
commit 2eb9356
Showing
17 changed files
with
741 additions
and
153 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
airbyte-integrations/connectors/source-smartsheets/integration_tests/abnormal_state.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"aws_s3_sample": { | ||
"modifiedAt": "2222-03-07T11:30:00+00:00" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
airbyte-integrations/connectors/source-smartsheets/integration_tests/expected_records.txt
Large diffs are not rendered by default.
Oops, something went wrong.
92 changes: 92 additions & 0 deletions
92
airbyte-integrations/connectors/source-smartsheets/source_smartsheets/sheet.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# | ||
# Copyright (c) 2021 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import logging | ||
from functools import cached_property | ||
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple | ||
|
||
import smartsheet | ||
|
||
|
||
class SmartSheetAPIWrapper: | ||
def __init__(self, config: Mapping[str, Any]): | ||
self._spreadsheet_id = config["spreadsheet_id"] | ||
self._access_token = config["access_token"] | ||
api_client = smartsheet.Smartsheet(self._access_token) | ||
api_client.errors_as_exceptions(True) | ||
# each call to `Sheets` makes a new instance, so we save it here to make no more new objects | ||
self._get_sheet = api_client.Sheets.get_sheet | ||
self._data = None | ||
|
||
def _fetch_sheet(self, from_dt: Optional[str] = None) -> None: | ||
kwargs = {"rows_modified_since": from_dt} | ||
if not from_dt: | ||
kwargs["page_size"] = 1 | ||
self._data = self._get_sheet(self._spreadsheet_id, **kwargs) | ||
|
||
@staticmethod | ||
def _column_to_property(column_type: str) -> Dict[str, any]: | ||
type_mapping = { | ||
"TEXT_NUMBER": {"type": "string"}, | ||
"DATE": {"type": "string", "format": "date"}, | ||
"DATETIME": {"type": "string", "format": "date-time"}, | ||
} | ||
return type_mapping.get(column_type, {"type": "string"}) | ||
|
||
def _construct_record(self, row: smartsheet.models.Row) -> Dict[str, str]: | ||
values_column_map = {cell.column_id: str(cell.value or "") for cell in row.cells} | ||
record = {column.title: values_column_map[column.id] for column in self.data.columns} | ||
record["modifiedAt"] = row.modified_at.isoformat() | ||
return record | ||
|
||
@property | ||
def data(self) -> smartsheet.models.Row: | ||
if not self._data: | ||
self._fetch_sheet() | ||
return self._data | ||
|
||
@property | ||
def name(self) -> str: | ||
return self.data.name | ||
|
||
@property | ||
def row_count(self) -> int: | ||
return len(self.data.rows) | ||
|
||
@cached_property | ||
def primary_key(self) -> str: | ||
for column in self.data.columns: | ||
if column.primary: | ||
return column.title | ||
|
||
@cached_property | ||
def json_schema(self) -> Dict[str, Any]: | ||
column_info = {column.title: self._column_to_property(column.type.value) for column in self.data.columns} | ||
column_info["modifiedAt"] = {"type": "string", "format": "date-time"} # add cursor field explicitly | ||
json_schema = { | ||
"$schema": "http://json-schema.org/draft-07/schema#", | ||
"type": "object", | ||
"properties": column_info, | ||
} | ||
return json_schema | ||
|
||
def read_records(self, from_dt: str) -> Iterable[Dict[str, str]]: | ||
self._fetch_sheet(from_dt) | ||
for row in self.data.rows: | ||
yield self._construct_record(row) | ||
|
||
def check_connection(self, logger: logging.Logger) -> Tuple[bool, Optional[str]]: | ||
try: | ||
_ = self.data | ||
except smartsheet.exceptions.ApiError as e: | ||
err = e.error.result | ||
code = 404 if err.code == 1006 else err.code | ||
reason = f"{err.name}: {code} - {err.message} | Check your spreadsheet ID." | ||
logger.error(reason) | ||
return False, reason | ||
except Exception as e: | ||
reason = str(e) | ||
logger.error(reason) | ||
return False, reason | ||
return True, None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 58 additions & 0 deletions
58
airbyte-integrations/connectors/source-smartsheets/source_smartsheets/streams.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# | ||
# Copyright (c) 2021 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
import datetime | ||
from typing import Any, Dict, Iterable, List, Mapping | ||
|
||
from airbyte_cdk.models import SyncMode | ||
from airbyte_cdk.sources.streams import Stream | ||
from source_smartsheets.sheet import SmartSheetAPIWrapper | ||
|
||
|
||
class SmartsheetStream(Stream): | ||
cursor_field = "modifiedAt" | ||
|
||
def __init__(self, smartsheet: SmartSheetAPIWrapper, config: Mapping[str, Any]): | ||
self.smartsheet = smartsheet | ||
self._state = {} | ||
self._config = config | ||
self._start_datetime = self._config.get("start_datetime") or "2020-01-01T00:00:00+00:00" | ||
|
||
@property | ||
def primary_key(self) -> str: | ||
return self.smartsheet.primary_key | ||
|
||
def get_json_schema(self) -> Dict[str, Any]: | ||
return self.smartsheet.json_schema | ||
|
||
@property | ||
def name(self) -> str: | ||
return self.smartsheet.name | ||
|
||
@property | ||
def state(self) -> Mapping[str, Any]: | ||
if not self._state: | ||
self._state = {self.cursor_field: self._start_datetime} | ||
return self._state | ||
|
||
@state.setter | ||
def state(self, value: Mapping[str, Any]): | ||
self._state = value | ||
|
||
def read_records( | ||
self, | ||
sync_mode: SyncMode, | ||
cursor_field: List[str] = None, | ||
stream_slice: Mapping[str, Any] = None, | ||
stream_state: Mapping[str, Any] = None, | ||
) -> Iterable[Mapping[str, Any]]: | ||
def iso_dt(src): | ||
return datetime.datetime.fromisoformat(src) | ||
|
||
for record in self.smartsheet.read_records(self.state[self.cursor_field]): | ||
current_cursor_value = iso_dt(self.state[self.cursor_field]) | ||
latest_cursor_value = iso_dt(record[self.cursor_field]) | ||
new_cursor_value = max(latest_cursor_value, current_cursor_value) | ||
self.state = {self.cursor_field: new_cursor_value.isoformat("T", "seconds")} | ||
yield record |
Oops, something went wrong.