diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index aa6c1e9280bb..2faea7820a9c 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -399,7 +399,7 @@ - name: Harvest sourceDefinitionId: fe2b4084-3386-4d3b-9ad6-308f61a6f1e6 dockerRepository: airbyte/source-harvest - dockerImageTag: 0.1.8 + dockerImageTag: 0.1.9 documentationUrl: https://docs.airbyte.io/integrations/sources/harvest icon: harvest.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index eed36e2fc1f2..15870f02884d 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3547,7 +3547,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-harvest:0.1.8" +- dockerImage: "airbyte/source-harvest:0.1.9" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/harvest" connectionSpecification: @@ -3587,14 +3587,11 @@ - "client_id" - "client_secret" - "refresh_token" - additionalProperties: false + additionalProperties: true properties: auth_type: type: "string" const: "Client" - enum: - - "Client" - default: "Client" order: 0 client_id: title: "Client ID" @@ -3614,14 +3611,11 @@ title: "Authenticate with Personal Access Token" required: - "api_token" - additionalProperties: false + additionalProperties: true properties: auth_type: type: "string" const: "Token" - enum: - - "Token" - default: "Token" order: 0 api_token: title: "Personal Access Token" @@ -3654,7 +3648,7 @@ oauth_config_specification: complete_oauth_output_specification: type: "object" - additionalProperties: false + additionalProperties: true properties: refresh_token: type: "string" @@ -3663,7 +3657,7 @@ - "refresh_token" complete_oauth_server_input_specification: type: "object" - additionalProperties: false + additionalProperties: true properties: client_id: type: "string" @@ -3671,7 +3665,7 @@ type: "string" complete_oauth_server_output_specification: type: "object" - additionalProperties: false + additionalProperties: true properties: client_id: type: "string" diff --git a/airbyte-integrations/connectors/source-harvest/Dockerfile b/airbyte-integrations/connectors/source-harvest/Dockerfile index aab94b394e66..357777ca5bd7 100644 --- a/airbyte-integrations/connectors/source-harvest/Dockerfile +++ b/airbyte-integrations/connectors/source-harvest/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.8 +LABEL io.airbyte.version=0.1.9 LABEL io.airbyte.name=airbyte/source-harvest diff --git a/airbyte-integrations/connectors/source-harvest/README.md b/airbyte-integrations/connectors/source-harvest/README.md index ea44019174c2..0d0b500167f4 100644 --- a/airbyte-integrations/connectors/source-harvest/README.md +++ b/airbyte-integrations/connectors/source-harvest/README.md @@ -79,7 +79,7 @@ docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integrat Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. First install test dependencies into your virtual environment: ``` -pip install .[tests] +pip install .'[tests]' ``` ### Unit Tests To run unit tests locally, from the connector directory run: diff --git a/airbyte-integrations/connectors/source-harvest/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-harvest/integration_tests/acceptance.py index 1302b2f57e10..950b53b59d41 100644 --- a/airbyte-integrations/connectors/source-harvest/integration_tests/acceptance.py +++ b/airbyte-integrations/connectors/source-harvest/integration_tests/acceptance.py @@ -11,6 +11,4 @@ @pytest.fixture(scope="session", autouse=True) def connector_setup(): """This fixture is a placeholder for external resources that acceptance test might require.""" - # TODO: setup test dependencies if needed. otherwise remove the TODO comments yield - # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-harvest/requirements.txt b/airbyte-integrations/connectors/source-harvest/requirements.txt new file mode 100644 index 000000000000..7be17a56d745 --- /dev/null +++ b/airbyte-integrations/connectors/source-harvest/requirements.txt @@ -0,0 +1,3 @@ +# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-harvest/setup.py b/airbyte-integrations/connectors/source-harvest/setup.py index f7bc9e62a88c..25cfab38fd2f 100644 --- a/airbyte-integrations/connectors/source-harvest/setup.py +++ b/airbyte-integrations/connectors/source-harvest/setup.py @@ -11,6 +11,8 @@ TEST_REQUIREMENTS = [ "pytest~=6.1", + "requests-mock", + "source-acceptance-test", ] setup( diff --git a/airbyte-integrations/connectors/source-harvest/source_harvest/schemas/time_entries.json b/airbyte-integrations/connectors/source-harvest/source_harvest/schemas/time_entries.json index e4a8abe42858..03a8993eb149 100644 --- a/airbyte-integrations/connectors/source-harvest/source_harvest/schemas/time_entries.json +++ b/airbyte-integrations/connectors/source-harvest/source_harvest/schemas/time_entries.json @@ -151,12 +151,10 @@ "type": ["null", "string"] }, "started_time": { - "type": ["null", "string"], - "format": "date-time" + "type": ["null", "string"] }, "ended_time": { - "type": ["null", "string"], - "format": "date-time" + "type": ["null", "string"] }, "is_running": { "type": ["null", "boolean"] diff --git a/airbyte-integrations/connectors/source-harvest/source_harvest/spec.json b/airbyte-integrations/connectors/source-harvest/source_harvest/spec.json index 3c8325220e7d..75b9cdad3d7a 100644 --- a/airbyte-integrations/connectors/source-harvest/source_harvest/spec.json +++ b/airbyte-integrations/connectors/source-harvest/source_harvest/spec.json @@ -32,13 +32,11 @@ "type": "object", "title": "Authenticate via Harvest (OAuth)", "required": ["client_id", "client_secret", "refresh_token"], - "additionalProperties": false, + "additionalProperties": true, "properties": { "auth_type": { "type": "string", "const": "Client", - "enum": ["Client"], - "default": "Client", "order": 0 }, "client_id": { @@ -64,13 +62,11 @@ "type": "object", "title": "Authenticate with Personal Access Token", "required": ["api_token"], - "additionalProperties": false, + "additionalProperties": true, "properties": { "auth_type": { "type": "string", "const": "Token", - "enum": ["Token"], - "default": "Token", "order": 0 }, "api_token": { @@ -102,7 +98,7 @@ "oauth_config_specification": { "complete_oauth_output_specification": { "type": "object", - "additionalProperties": false, + "additionalProperties": true, "properties": { "refresh_token": { "type": "string", @@ -112,7 +108,7 @@ }, "complete_oauth_server_input_specification": { "type": "object", - "additionalProperties": false, + "additionalProperties": true, "properties": { "client_id": { "type": "string" @@ -124,7 +120,7 @@ }, "complete_oauth_server_output_specification": { "type": "object", - "additionalProperties": false, + "additionalProperties": true, "properties": { "client_id": { "type": "string", diff --git a/airbyte-integrations/connectors/source-harvest/source_harvest/streams.py b/airbyte-integrations/connectors/source-harvest/source_harvest/streams.py index e1b7f79d5ab8..dfc828a3abf9 100644 --- a/airbyte-integrations/connectors/source-harvest/source_harvest/streams.py +++ b/airbyte-integrations/connectors/source-harvest/source_harvest/streams.py @@ -24,6 +24,13 @@ def data_field(self) -> str: """ return self.name + def backoff_time(self, response: requests.Response): + if "Retry-After" in response.headers: + return int(response.headers["Retry-After"]) + else: + self.logger.info("Retry-after header not found. Using default backoff value") + return super().backoff_time(response) + def path(self, **kwargs) -> str: return self.name @@ -294,14 +301,6 @@ def __init__(self, from_date: pendulum.date = None, **kwargs): else: self._to_date = current_date - def request_params(self, stream_state, **kwargs) -> MutableMapping[str, Any]: - params = super().request_params(stream_state, **kwargs) - current_date = pendulum.now() - # `from` and `to` params are required for reports calls - # min `from` value is current_date - 1 year - params.update({"from": self._from_date.strftime("%Y%m%d"), "to": current_date.strftime("%Y%m%d")}) - return params - def path(self, **kwargs) -> str: return f"reports/{self.report_path}" @@ -323,26 +322,9 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp ) yield record - def request_params(self, stream_state: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: - stream_state = stream_state or {} + def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: params = super().request_params(stream_state, **kwargs) - - # subtract `from` date by 1 year to avoid Harvest exception - # `from` date may not be less than `to` - 1 year - if stream_state.get(self.cursor_field): - cursor_date = pendulum.parse(stream_state[self.cursor_field]).date() - dates_diff = cursor_date - self._from_date - if dates_diff.years > 0 or dates_diff.years == 0 and dates_diff.remaining_days > 0: - self._from_date = cursor_date.subtract(years=1) - - # `from` and `to` params are required for reports calls - # min `from` value is current_date - 1 year - params.update( - { - "from": self._from_date.strftime(self.date_param_template), - "to": stream_state.get(self.cursor_field, self._to_date.strftime(self.date_param_template)), - } - ) + params = {**params, **stream_slice} if stream_slice else params return params def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]): @@ -355,6 +337,27 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late return {self.cursor_field: max(latest_benchmark, current_stream_state[self.cursor_field])} return {self.cursor_field: latest_benchmark} + def stream_slices(self, sync_mode, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[MutableMapping[str, any]]]: + """ + Override default stream_slices CDK method to provide date_slices as page chunks for data fetch. + """ + start_date = self._from_date + end_date = pendulum.now().date() + + # determine stream_state, if no stream_state we use start_date + if stream_state: + start_date = pendulum.parse(stream_state.get(self.cursor_field)).date() + + while start_date < end_date: + # Max size of date chunks is 1 year + # Docs: https://help.getharvest.com/api-v2/reports-api/reports/time-reports/ + end_date_slice = end_date if start_date >= end_date.subtract(years=1) else start_date.add(years=1) + date_slice = {"from": start_date.strftime(self.date_param_template), "to": end_date_slice.strftime(self.date_param_template)} + + start_date = end_date_slice + + yield date_slice + class ExpensesClients(IncrementalReportsBase): """ @@ -388,11 +391,9 @@ class ExpensesTeam(IncrementalReportsBase): report_path = "expenses/team" -class Uninvoiced(ReportsBase): +class Uninvoiced(IncrementalReportsBase): """ Docs: https://help.getharvest.com/api-v2/reports-api/reports/uninvoiced-report/ - - TODO: `from`/`to` pagination does not work for `uninvoiced` stream. Look like a bug on Harvest side. Check out later. """ report_path = "uninvoiced" @@ -424,7 +425,7 @@ class TimeTasks(IncrementalReportsBase): class TimeTeam(IncrementalReportsBase): """ - Docs: https://help.getharvest.com/api-v2/reports-api/reports/time-reports/ (Team Report) + Docs: https://help.getharvest.com/api-v2/reports-api/reports/time-reports/ """ report_path = "time/team" @@ -432,7 +433,7 @@ class TimeTeam(IncrementalReportsBase): class ProjectBudget(ReportsBase): """ - Docs: https://help.getharvest.com/api-v2/reports-api/reports/time-reports/#team-report + Docs: https://help.getharvest.com/api-v2/reports-api/reports/project-budget-report/#project-budget-report """ report_path = "project_budget" diff --git a/airbyte-integrations/connectors/source-harvest/unit_tests/conftest.py b/airbyte-integrations/connectors/source-harvest/unit_tests/conftest.py new file mode 100644 index 000000000000..fd66898034ab --- /dev/null +++ b/airbyte-integrations/connectors/source-harvest/unit_tests/conftest.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + +from pendulum import parse +from pytest import fixture + + +@fixture(name="config") +def config_fixture(requests_mock): + url = "https://id.getharvest.com/api/v2/oauth2/token" + requests_mock.get(url, json={}) + + config = {"account_id": "ID", "replication_start_date": "2021-01-01T21:20:07Z", "credentials": {"api_token": "TOKEN"}} + + return config + + +@fixture(name="replication_start_date") +def replication_start_date_fixture(config): + return parse(config["replication_start_date"]) + + +@fixture(name="from_date") +def from_date_fixture(replication_start_date): + return replication_start_date.date() + + +@fixture(name="mock_stream") +def mock_stream_fixture(requests_mock): + def _mock_stream(path, response={}): + url = f"https://api.harvestapp.com/v2/{path}" + requests_mock.get(url, json=response) + + return _mock_stream diff --git a/airbyte-integrations/connectors/source-harvest/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-harvest/unit_tests/unit_test.py index dddaea0060fa..582b3fba1785 100644 --- a/airbyte-integrations/connectors/source-harvest/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-harvest/unit_tests/unit_test.py @@ -2,6 +2,81 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +import requests +from airbyte_cdk.logger import AirbyteLogger +from airbyte_cdk.sources.streams.http.auth import NoAuth +from source_harvest.source import SourceHarvest +from source_harvest.streams import ExpensesClients, HarvestStream, InvoicePayments -def test_example_method(): - assert True +logger = AirbyteLogger() + + +def test_check_connection_ok(config, mock_stream): + mock_stream("users", response={"users": [{"id": 1}], "next_page": 2}) + ok, error_msg = SourceHarvest().check_connection(logger, config=config) + + assert ok + assert not error_msg + + +def test_check_connection_empty_config(config): + config = {} + + ok, error_msg = SourceHarvest().check_connection(logger, config=config) + + assert not ok + assert error_msg + + +def test_check_connection_invalid_config(config): + config.pop("replication_start_date") + ok, error_msg = SourceHarvest().check_connection(logger, config=config) + + assert not ok + assert error_msg + + +def test_check_connection_exception(config): + ok, error_msg = SourceHarvest().check_connection(logger, config=config) + + assert not ok + assert error_msg + + +def test_streams(config): + streams = SourceHarvest().streams(config) + + assert len(streams) == 32 + + +def test_next_page_token(config, mocker): + next_page = 2 + expected = {"page": next_page} + + instance = HarvestStream(authenticator=NoAuth()) + + response = mocker.Mock(spec=requests.Response, request=mocker.Mock(spec=requests.Request)) + response.json.return_value = {"next_page": next_page} + + assert instance.next_page_token(response) == expected + + +def test_child_stream_slices(config, replication_start_date, mock_stream): + object_id = 1 + mock_stream("invoices", response={"invoices": [{"id": object_id}]}) + mock_stream(f"invoices/{object_id}/payments", {"invoice_payments": [{"id": object_id}]}) + + invoice_payments_instance = InvoicePayments(authenticator=NoAuth(), replication_start_date=replication_start_date) + stream_slice = next(invoice_payments_instance.stream_slices(sync_mode=None)) + invoice_payments = invoice_payments_instance.read_records(sync_mode=None, stream_slice=stream_slice) + + assert next(invoice_payments) + + +def test_report_base_stream(config, from_date, mock_stream): + mock_stream("reports/expenses/clients", response={"results": [{"client_id": 1}]}) + + invoice_payments_instance = ExpensesClients(authenticator=NoAuth(), from_date=from_date) + invoice_payments = invoice_payments_instance.read_records(sync_mode=None) + + assert next(invoice_payments) diff --git a/docs/integrations/sources/harvest.md b/docs/integrations/sources/harvest.md index 6349fcaf35fd..95a78a3f6d27 100644 --- a/docs/integrations/sources/harvest.md +++ b/docs/integrations/sources/harvest.md @@ -1,12 +1,54 @@ # Harvest -## Overview +This page contains the setup guide and reference information for the Harvest source connector. -The Harvest connector can be used to sync your Harvest data. It supports full refresh sync for all streams and incremental sync for all streams except of Expense Reports streams which are: Clients Report, Projects Report, Categories Report, Team Report. Incremental sync is also now available for Company stream, but it always has only one record. +## Prerequisites + +See [docs](https://help.getharvest.com/api-v2/authentication-api/authentication/authentication/) for more details. + +## Setup guide +### Step 1: Set up Harvest + +This connector supports only authentication with API Key. To obtain API key follow the instructions below: + +1. Go to Account Settings page; +2. Under Integrations section press Authorized OAuth2 API Clients button; +3. New page will be opened on which you need to click on Create New Personal Access Token button and follow instructions. + +## Step 2: Set up the Harvest connector in Airbyte + +### For Airbyte Cloud: + +1. [Log into your Airbyte Cloud](https://cloud.airbyte.io/workspaces) account. +2. In the left navigation bar, click **Sources**. In the top-right corner, click **+new source**. +3. On the Set up the source page, enter the name for the Harvest connector and select **Harvest** from the Source type dropdown. +4. For Airbyte Cloud, click **Authenticate your Harvest account** to sign in with Harvest and authorize your account. +5. Enter your `account_id` +6. Enter the `replication_start_date` you want your sync to start from +7. Click **Set up source** + +### For Airbyte OSS: +1. Navigate to the Airbyte Open Source dashboard +2. Set the name for your source +3. Enter your `api_token` +4. Enter your `account_id` +5. Enter the `replication_start_date` you want your sync to start from +6. Click **Set up source** + +## Supported sync modes + +The Harvest source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts#connection-sync-modes): + +| Feature | Supported? | +| :--- | :--- | +| Full Refresh Sync | Yes | +| Incremental Sync | Yes | +| Replicate Incremental Deletes | No | +| SSL connection | Yes | +| Namespaces | No | -### Output schema -Several output streams are available from this source: +## Supported Streams * [Client Contacts](https://help.getharvest.com/api-v2/clients-api/clients/contacts/) \(Incremental\) * [Clients](https://help.getharvest.com/api-v2/clients-api/clients/clients/) \(Incremental\) @@ -34,41 +76,15 @@ Several output streams are available from this source: * [Time Reports](https://help.getharvest.com/api-v2/reports-api/reports/time-reports/) * [Project Budget Report](https://help.getharvest.com/api-v2/reports-api/reports/project-budget-report/) -### Features - -| Feature | Supported? | -| :--- | :--- | -| Full Refresh Sync | Yes | -| Incremental Sync | Yes | -| Replicate Incremental Deletes | No | -| SSL connection | Yes | -| Namespaces | No | - -### Performance considerations +## Performance considerations The Harvest connector will gracefully handle rate limits. For more information, see [the Harvest docs for rate limitations](https://help.getharvest.com/api-v2/introduction/overview/general/#rate-limiting). -## Getting started - -### Requirements - -* Harvest Account -* Harvest Authorized OAuth2 API Client to create Access Token and get account ID - -### Setup guide - -This connector supports only authentication with API Key. To obtain API key follow the instructions below: - -1. Go to Account Settings page; -2. Under Integrations section press Authorized OAuth2 API Clients button; -3. New page will be opened on which you need to click on Create New Personal Access Token button and follow instructions. - -See [docs](https://help.getharvest.com/api-v2/authentication-api/authentication/authentication/) for more details. - ## Changelog | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.9 | 2022-08-04 | [15312](https://github.com/airbytehq/airbyte/pull/15312) | Fix `started_time` and `ended_time` format schema error and updated report slicing | | 0.1.8 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update titles and descriptions | | 0.1.6 | 2021-11-14 | [7952](https://github.com/airbytehq/airbyte/pull/7952) | Implement OAuth 2.0 support | | 0.1.5 | 2021-09-28 | [5747](https://github.com/airbytehq/airbyte/pull/5747) | Update schema date-time fields | @@ -77,4 +93,3 @@ See [docs](https://help.getharvest.com/api-v2/authentication-api/authentication/ | 0.1.2 | 2021-06-07 | [4222](https://github.com/airbytehq/airbyte/pull/4222) | Correct specification parameter name | | 0.1.1 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | | 0.1.0 | 2021-06-07 | [3709](https://github.com/airbytehq/airbyte/pull/3709) | Release Harvest connector! | - diff --git a/docs/integrations/sources/paypal-transaction.md b/docs/integrations/sources/paypal-transaction.md index fc333d29bc32..d52605b29f95 100644 --- a/docs/integrations/sources/paypal-transaction.md +++ b/docs/integrations/sources/paypal-transaction.md @@ -25,11 +25,11 @@ In order to get an `Client ID` and `Secret` please go to [this](https://develope ### For Airbyte OSS: 1. Navigate to the Airbyte Open Source dashboard 2. Set the name for your source -4. Enter your client id +3. Enter your client id 4. Enter your secret -4. Choose if your account is sandbox -5. Enter the date you want your sync to start from -6. Click **Set up source** +5. Choose if your account is sandbox +6. Enter the date you want your sync to start from +7. Click **Set up source** ## Supported sync modes