diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json index d1f7b8b61650..913968015e74 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/36c891d9-4bd9-43ac-bad2-10e12756272c.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "36c891d9-4bd9-43ac-bad2-10e12756272c", "name": "HubSpot", "dockerRepository": "airbyte/source-hubspot", - "dockerImageTag": "0.1.28", + "dockerImageTag": "0.1.29", "documentationUrl": "https://docs.airbyte.io/integrations/sources/hubspot", "icon": "hubspot.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 9a0edbdce4a9..14a03adbd581 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -287,7 +287,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.28 + dockerImageTag: 0.1.29 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 02c566c2dc67..6b06fdca9ee3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2793,7 +2793,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-hubspot:0.1.28" +- dockerImage: "airbyte/source-hubspot:0.1.29" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index 6de72e67312e..7c35101c19b4 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.28 +LABEL io.airbyte.version=0.1.29 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml index dac3581abffb..02faf9dc55f9 100644 --- a/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-hubspot/acceptance-test-config.yml @@ -26,14 +26,14 @@ tests: # and therefore the start date is set at 2021-10-10 for `config_oauth.json`, # but the campaign was created on 2021-01-11 empty_streams: ["campaigns", "workflows"] - # incremental: fixme (eugene): '<=' not supported between instances of 'int' and 'str' - # See https://github.com/airbytehq/airbyte/issues/6509 - # - config_path: "secrets/config.json" - # configured_catalog_path: "sample_files/configured_catalog.json" - # future_state_path: "integration_tests/abnormal_state.json" - # cursor_paths: - # subscription_changes: ["timestamp"] - # email_events: ["timestamp"] + incremental: + - config_path: "secrets/config.json" + configured_catalog_path: "sample_files/configured_catalog.json" + future_state_path: "integration_tests/abnormal_state.json" + cursor_paths: + subscription_changes: ["timestamp"] + email_events: ["timestamp"] + contact_lists: ["timestamp"] full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "sample_files/full_refresh_catalog.json" diff --git a/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json index 1f6f5a46a9e6..5944b5d50c7a 100644 --- a/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-hubspot/integration_tests/abnormal_state.json @@ -1,8 +1,32 @@ { + "companies": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" + }, + "contact_lists": { + "timestamp": "2221-10-12T13:37:56.412000+00:00" + }, + "contacts": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" + }, + "deals": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" + }, "email_events": { - "timestamp": "2121-03-19T17:00:45.743000+00:00" + "timestamp": "2221-10-12T13:37:56.412000+00:00" + }, + "line_items": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" + }, + "products": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" + }, + "quotes": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" }, "subscription_changes": { - "timestamp": "2121-03-19T16:58:54.301000+00:00" + "timestamp": "2221-10-12T13:37:56.412000+00:00" + }, + "tickets": { + "updatedAt": "2221-10-12T13:37:56.412000+00:00" } } diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json index 3eb0dc3836ca..5f9e42c80d30 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json @@ -13,28 +13,37 @@ "stream": { "name": "companies", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { "name": "contact_lists", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { "name": "contacts", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { @@ -49,10 +58,13 @@ "stream": { "name": "deals", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { @@ -88,10 +100,13 @@ "stream": { "name": "line_items", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { @@ -118,19 +133,25 @@ "stream": { "name": "products", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { "name": "quotes", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { @@ -148,10 +169,13 @@ "stream": { "name": "tickets", "json_schema": {}, - "supported_sync_modes": ["full_refresh"] + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["updatedAt"] }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" + "sync_mode": "incremental", + "cursor_field": ["updatedAt"], + "destination_sync_mode": "append" }, { "stream": { diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json b/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json index b56de1fc71ec..84dc55345bbf 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/sample_state.json @@ -1,8 +1,32 @@ { - "subscription_changes": { + "companies": { + "updatedAt": "2021-02-23T00:00:00Z" + }, + "contact_lists": { "timestamp": "2021-02-23T00:00:00Z" }, + "contacts": { + "updatedAt": "2021-02-23T00:00:00Z" + }, + "deals": { + "updatedAt": "2021-02-23T00:00:00Z" + }, "email_events": { "timestamp": "2021-02-23T00:00:00Z" + }, + "line_items": { + "updatedAt": "2021-02-23T00:00:00Z" + }, + "products": { + "updatedAt": "2021-02-23T00:00:00Z" + }, + "quotes": { + "updatedAt": "2021-02-23T00:00:00Z" + }, + "subscription_changes": { + "timestamp": "2021-02-23T00:00:00Z" + }, + "tickets": { + "updatedAt": "2021-02-23T00:00:00Z" } -} +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py index e1ea36cbf4aa..01d353f4920b 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/api.py @@ -436,6 +436,9 @@ class IncrementalStream(Stream, ABC): state_pk = "timestamp" limit = 1000 + # Flag which enable/disable chunked read in read_chunked method + # False -> chunk size is max (only one slice), True -> chunk_size is 30 days + need_chunk = True @property @abstractmethod @@ -446,12 +449,15 @@ def updated_at_field(self): def state(self) -> Optional[Mapping[str, Any]]: """Current state, if wasn't set return None""" if self._state: - return {self.state_pk: str(self._state)} + return ( + {self.state_pk: int(self._state.timestamp() * 1000)} if self.state_pk == "timestamp" else {self.state_pk: str(self._state)} + ) return None @state.setter def state(self, value): - self._state = pendulum.parse(value[self.state_pk]) + state = value[self.state_pk] + self._state = pendulum.parse(str(pendulum.from_timestamp(state / 1000))) if isinstance(state, int) else pendulum.parse(state) self._start_date = max(self._state, self._start_date) def __init__(self, *args, **kwargs): @@ -477,12 +483,13 @@ def read(self, getter: Callable, params: Mapping[str, Any] = None) -> Iterator: self._start_date = self._state def read_chunked( - self, getter: Callable, params: Mapping[str, Any] = None, chunk_size: pendulum.duration = pendulum.duration(days=1) + self, getter: Callable, params: Mapping[str, Any] = None, chunk_size: pendulum.duration = pendulum.duration(days=30) ) -> Iterator: params = {**params} if params else {} now_ts = int(pendulum.now().timestamp() * 1000) start_ts = int(self._start_date.timestamp() * 1000) - chunk_size = int(chunk_size.total_seconds() * 1000) + max_delta = now_ts - start_ts + chunk_size = int(chunk_size.total_seconds() * 1000) if self.need_chunk else max_delta for ts in range(start_ts, now_ts, chunk_size): end_ts = ts + chunk_size @@ -553,6 +560,12 @@ def _flat_associations(self, records: Iterable[MutableMapping]) -> Iterable[Muta yield record +class CRMObjectIncrementalStream(CRMObjectStream, IncrementalStream): + state_pk = "updatedAt" + limit = 100 + need_chunk = False + + class CampaignStream(Stream): """Email campaigns, API v1 There is some confusion between emails and campaigns in docs, this endpoint returns actual emails @@ -571,7 +584,7 @@ def list(self, fields) -> Iterable: yield {**row, **record} -class ContactListStream(Stream): +class ContactListStream(IncrementalStream): """Contact lists, API v1 Docs: https://legacydocs.hubspot.com/docs/methods/lists/get_lists """ @@ -582,6 +595,7 @@ class ContactListStream(Stream): updated_at_field = "updatedAt" created_at_field = "createdAt" limit_field = "count" + need_chunk = False class DealStageHistoryStream(Stream): @@ -608,7 +622,7 @@ def list(self, fields) -> Iterable: yield from self.read(partial(self._api.get, url=self.url), params) -class DealStream(CRMObjectStream): +class DealStream(CRMObjectIncrementalStream): """Deals, API v3""" def __init__(self, **kwargs): diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py index fd48c0816a25..b47bdaeaa4d1 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/client.py @@ -12,7 +12,7 @@ API, CampaignStream, ContactListStream, - CRMObjectStream, + CRMObjectIncrementalStream, DealPipelineStream, DealStream, EmailEventStream, @@ -35,26 +35,26 @@ def __init__(self, start_date, credentials, **kwargs): common_params = dict(api=self._api, start_date=self._start_date) self._apis = { "campaigns": CampaignStream(**common_params), - "companies": CRMObjectStream(entity="company", associations=["contacts"], **common_params), + "companies": CRMObjectIncrementalStream(entity="company", associations=["contacts"], **common_params), "contact_lists": ContactListStream(**common_params), - "contacts": CRMObjectStream(entity="contact", **common_params), + "contacts": CRMObjectIncrementalStream(entity="contact", **common_params), "deal_pipelines": DealPipelineStream(**common_params), "deals": DealStream(associations=["contacts"], **common_params), "email_events": EmailEventStream(**common_params), "engagements": EngagementStream(**common_params), "forms": FormStream(**common_params), - "line_items": CRMObjectStream(entity="line_item", **common_params), + "line_items": CRMObjectIncrementalStream(entity="line_item", **common_params), "marketing_emails": MarketingEmailStream(**common_params), "owners": OwnerStream(**common_params), - "products": CRMObjectStream(entity="product", **common_params), + "products": CRMObjectIncrementalStream(entity="product", **common_params), "subscription_changes": SubscriptionChangeStream(**common_params), - "tickets": CRMObjectStream(entity="ticket", **common_params), + "tickets": CRMObjectIncrementalStream(entity="ticket", **common_params), "workflows": WorkflowStream(**common_params), } credentials_title = credentials.get("credentials_title") if credentials_title == "API Key Credentials": - self._apis["quotes"] = CRMObjectStream(entity="quote", **common_params) + self._apis["quotes"] = CRMObjectIncrementalStream(entity="quote", **common_params) super().__init__(**kwargs) diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/deals.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/deals.json index f6564e9bb731..c8545f9459b3 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/deals.json +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/schemas/deals.json @@ -310,10 +310,12 @@ } }, "createdAt": { - "type": ["null", "string"] + "type": ["null", "string"], + "format": "date-time" }, "updatedAt": { - "type": ["null", "string"] + "type": ["null", "string"], + "format": "date-time" }, "archived": { "type": ["null", "boolean"] diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 1ae36591ea99..d1c6ccb7b5c2 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -18,21 +18,21 @@ Check out common troubleshooting issues for the HubSpot connector on our Discour This source is capable of syncing the following tables and their data: * [Campaigns](https://developers.hubspot.com/docs/methods/email/get_campaign_data) -* [Companies](https://developers.hubspot.com/docs/api/crm/companies) -* [Contact Lists](http://developers.hubspot.com/docs/methods/lists/get_lists) -* [Contacts](https://developers.hubspot.com/docs/methods/contacts/get_contacts) +* [Companies](https://developers.hubspot.com/docs/api/crm/companies) \(Incremental\) +* [Contact Lists](http://developers.hubspot.com/docs/methods/lists/get_lists) \(Incremental\) +* [Contacts](https://developers.hubspot.com/docs/methods/contacts/get_contacts) \(Incremental\) * [Deal Pipelines](https://developers.hubspot.com/docs/methods/pipelines/get_pipelines_for_object_type) -* [Deals](https://developers.hubspot.com/docs/api/crm/deals) \(including Contact associations\) +* [Deals](https://developers.hubspot.com/docs/api/crm/deals) \(including Contact associations\) \(Incremental\) * [Email Events](https://developers.hubspot.com/docs/methods/email/get_events) \(Incremental\) * [Engagements](https://legacydocs.hubspot.com/docs/methods/engagements/get-all-engagements) * [Forms](https://developers.hubspot.com/docs/api/marketing/forms) -* [Line Items](https://developers.hubspot.com/docs/api/crm/line-items) +* [Line Items](https://developers.hubspot.com/docs/api/crm/line-items) \(Incremental\) * [Marketing Emails](https://legacydocs.hubspot.com/docs/methods/cms_email/get-all-marketing-email-statistics) * [Owners](https://developers.hubspot.com/docs/methods/owners/get_owners) -* [Products](https://developers.hubspot.com/docs/api/crm/products) -* [Quotes](https://developers.hubspot.com/docs/api/crm/quotes) +* [Products](https://developers.hubspot.com/docs/api/crm/products) \(Incremental\) +* [Quotes](https://developers.hubspot.com/docs/api/crm/quotes) \(Incremental\) * [Subscription Changes](https://developers.hubspot.com/docs/methods/email/get_subscriptions_timeline) \(Incremental\) -* [Tickets](https://developers.hubspot.com/docs/api/crm/tickets) +* [Tickets](https://developers.hubspot.com/docs/api/crm/tickets) \(Incremental\) * [Workflows](https://legacydocs.hubspot.com/docs/methods/workflows/v3/get_workflows) **Note**: HubSpot API currently only supports `quotes` endpoint using API Key, using Oauth it is impossible to access this stream (as reported by [community.hubspot.com](https://community.hubspot.com/t5/APIs-Integrations/Help-with-using-Feedback-CRM-API-and-Quotes-CRM-API/m-p/449104/highlight/true#M44411)). @@ -96,11 +96,12 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | updated fields and descriptions | -| 0.1.27 | 2021-12-09 | [8658](https://github.com/airbytehq/airbyte/pull/8658) | fixed config backward compatibility issue by allowing additional properties in the spec | -| 0.1.26 | 2021-11-30 | [8329](https://github.com/airbytehq/airbyte/pull/8329) | removed 'skip_dynamic_fields' config param | -| 0.1.25 | 2021-11-23 | [8216](https://github.com/airbytehq/airbyte/pull/8216) | skip dynamic fields for testing only | -| 0.1.24 | 2021-11-09 | [7683](https://github.com/airbytehq/airbyte/pull/7683) | bugfix 'Hubspot' -> 'HubSpot' | +| 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | +| 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | +| 0.1.27 | 2021-12-09 | [8658](https://github.com/airbytehq/airbyte/pull/8658) | Fixed config backward compatibility issue by allowing additional properties in the spec | +| 0.1.26 | 2021-11-30 | [8329](https://github.com/airbytehq/airbyte/pull/8329) | Removed 'skip_dynamic_fields' config param | +| 0.1.25 | 2021-11-23 | [8216](https://github.com/airbytehq/airbyte/pull/8216) | Add skip dynamic fields for testing only | +| 0.1.24 | 2021-11-09 | [7683](https://github.com/airbytehq/airbyte/pull/7683) | Fix name issue 'Hubspot' -> 'HubSpot' | | 0.1.23 | 2021-11-08 | [7730](https://github.com/airbytehq/airbyte/pull/7730) | Fix oAuth flow schema| | 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | | 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` |