diff --git a/airbyte-integrations/connectors/source-google-sheets/Dockerfile b/airbyte-integrations/connectors/source-google-sheets/Dockerfile index 145fd52d46e4..a90647b24c18 100644 --- a/airbyte-integrations/connectors/source-google-sheets/Dockerfile +++ b/airbyte-integrations/connectors/source-google-sheets/Dockerfile @@ -34,5 +34,5 @@ COPY google_sheets_source ./google_sheets_source ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.9 +LABEL io.airbyte.version=0.2.10 LABEL io.airbyte.name=airbyte/source-google-sheets diff --git a/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml b/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml index 8ac9e73c56f3..aace1432f990 100644 --- a/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-google-sheets/acceptance-test-config.yml @@ -7,6 +7,8 @@ tests: connection: - config_path: "secrets/config.json" status: "succeed" + - config_path: "secrets/config_with_url.json" + status: "succeed" - config_path: "secrets/service_config.json" status: "succeed" # was commented because when old config format used google_sheets_source.get_authenticated_google_credentials raises error diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py index 817aa3cae0d1..efc29628f5a6 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py @@ -42,7 +42,7 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: except Exception as e: return AirbyteConnectionStatus(status=Status.FAILED, message=f"Please use valid credentials json file. Error: {e}") - spreadsheet_id = config["spreadsheet_id"] + spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"]) try: # Attempt to get first row of sheet @@ -94,7 +94,7 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: client = GoogleSheetsClient(self.get_credentials(config)) - spreadsheet_id = config["spreadsheet_id"] + spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"]) try: logger.info(f"Running discovery on sheet {spreadsheet_id}") spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) @@ -124,7 +124,7 @@ def read( client = GoogleSheetsClient(self.get_credentials(config)) sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog) - spreadsheet_id = config["spreadsheet_id"] + spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"]) logger.info(f"Starting syncing spreadsheet {spreadsheet_id}") # For each sheet in the spreadsheet, get a batch of rows, and as long as there hasn't been diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py index 9d18f1a5cea1..0ff84c159a3a 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/helpers.py @@ -3,6 +3,7 @@ # import json +import re from collections import defaultdict from datetime import datetime from typing import Dict, FrozenSet, Iterable, List @@ -192,3 +193,13 @@ def row_contains_relevant_data(cell_values: List[str], relevant_indices: Iterabl if len(cell_values) > idx and cell_values[idx].strip() != "": return True return False + + @staticmethod + def get_spreadsheet_id(id_or_url: str) -> str: + if re.match(r"(http://)|(https://)", id_or_url): + # This is a URL + m = re.search(r"(/)([-\w]{40,})([/]?)", id_or_url) + if m.group(2): + return m.group(2) + else: + return id_or_url diff --git a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json index eaf9659baef9..a9e0d0065783 100644 --- a/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json +++ b/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/spec.json @@ -4,18 +4,18 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Stripe Source Spec", "type": "object", - "required": ["spreadsheet_id"], + "required": ["spreadsheet_id","credentials"], "additionalProperties": true, "properties": { "spreadsheet_id": { "type": "string", - "title": "Spreadsheet ID", - "description": "The ID of the spreadsheet to be replicated. The ID is found in the URL of your spreadsheet, typically between \"/d/\" and \"/edit\"", - "examples": ["1hLd9Qqti5XyLXZB2aFfUWDT7BG-arw2xy4HR3D-dwUb"] + "title": "Spreadsheet Link", + "description": "The link to your spreadsheet. See this guide for more details.", + "examples": ["https://docs.google.com/spreadsheets/d/1hLd9Qqti3UyLXZB2aFfUWDT7BG-arw2xy4HR3D-dwUb/edit"] }, "credentials": { "type": "object", - "title": "Credentials", + "title": "Authentication", "description": "Google API Credentials for connecting to Google Sheets and Google Drive APIs", "oneOf": [ { @@ -64,7 +64,7 @@ "service_account_info": { "type": "string", "title": "Service Account Information.", - "description": "The JSON key of the service account to use for authorization.", + "description": "The JSON key of the service account to use for authorization. See Setup Guide for more details", "airbyte_secret": true, "examples": [ "{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID, \"private_key_id\": YOUR_PRIVATE_KEY, ... }" diff --git a/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py b/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py index b2ad3450c3bd..91c77ad7c8e5 100644 --- a/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py +++ b/airbyte-integrations/connectors/source-google-sheets/unit_tests/test_helpers.py @@ -227,6 +227,31 @@ def mock_client_call(spreadsheetId, includeGridData, ranges=None): self.assertEqual(expected, actual) + def test_get_spreadsheet_id(self): + test_url = "https://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1B_cPP9re66xI8uJK25dtY9Q/edit#gid=1820065035" + result = Helpers.get_spreadsheet_id(test_url) + self.assertEqual("18vWlVH8BfjGegwY_GdV1B_cPP9re66xI8uJK25dtY9Q", result) + + test_url = "https://docs.google.com/spreadsheets/d/18vWlVH8BfjGa-gwYGdV1BjcPP9re66xI8uJK25dtY9Q/edit" + result = Helpers.get_spreadsheet_id(test_url) + self.assertEqual("18vWlVH8BfjGa-gwYGdV1BjcPP9re66xI8uJK25dtY9Q", result) + + test_url = "http://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q/" + result = Helpers.get_spreadsheet_id(test_url) + self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q", result) + + test_url = "http://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q/#" + result = Helpers.get_spreadsheet_id(test_url) + self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q", result) + + test_url = "http://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q" + result = Helpers.get_spreadsheet_id(test_url) + self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q", result) + + test_url = "18vWlVH8BfjGegwY_GdV1BjcPP9re66xI8uJK25dtY9Q" + result = Helpers.get_spreadsheet_id(test_url) + self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re66xI8uJK25dtY9Q", result) + if __name__ == "__main__": unittest.main() diff --git a/docs/.gitbook/assets/google_spreadsheet_url.png b/docs/.gitbook/assets/google_spreadsheet_url.png index 747fcfaebac3..b1360834caba 100644 Binary files a/docs/.gitbook/assets/google_spreadsheet_url.png and b/docs/.gitbook/assets/google_spreadsheet_url.png differ diff --git a/docs/integrations/sources/google-sheets.md b/docs/integrations/sources/google-sheets.md index 74af843f2e4a..948b90d5f3f4 100644 --- a/docs/integrations/sources/google-sheets.md +++ b/docs/integrations/sources/google-sheets.md @@ -36,13 +36,14 @@ At the time of writing, the [Google API rate limit](https://developers.google.co To configure the connector you'll need to: * [Authorize your Google account via OAuth](#oauth) -* [The ID of the spreadsheet you'd like to sync](#sheetid) +* [The ID of the spreadsheet you'd like to sync](#sheetlink) ### Authorize your Google account via OAuth Click on the "Sign in with Google" button and authorize via your Google account. -### Spreadsheet ID -you'll need the ID of the Spreadsheet you'd like to sync. To get it, navigate to the spreadsheet in your browser, then copy the portion of the URL which comes after "/d" and before "/edit" or "/view". This is the highlighted portion of the screenshot below: +### Spreadsheet Link +You will need the link of the Spreadsheet you'd like to sync. To get it, click Share button in the top right corner of Google Sheets interface, and then click Copy Link in the dialog that pops up. +These two steps are highlighted in the screenshot below: ![](../../.gitbook/assets/google_spreadsheet_url.png) @@ -56,9 +57,9 @@ To configure the Google Sheets Source for syncs, you'll need the following: * [Create a service account with permissions to access the Google Sheets and Drive APIs](#createserviceaccount) * [Create a Service Account Key for the Service Account](#createserviceaccount) * [Share the spreadsheets you'd like to sync with the Service Account created above](#sharesheet) -* [The ID of the spreadsheet you'd like to sync](#findsheetid) +* [The Link to the spreadsheet you'd like to sync](#findsheetlink) -### Setup guide +### Setup guide #### Enable the Google Sheets and Google Drive APIs @@ -84,9 +85,10 @@ Once you've created the Service Account, you need to explicitly give it access t {% embed url="https://youtu.be/GyomEw5a2NQ" caption="" %} -#### Spreadsheet ID +#### Spreadsheet Link -Finally, you'll need the ID of the Spreadsheet you'd like to sync. To get it, navigate to the spreadsheet in your browser, then copy the portion of the URL which comes after "/d" and before "/edit" or "/view". This is the highlighted portion of the screenshot below: +Finally, you'll need the Link to the Spreadsheet you'd like to sync. To get it, click Share button in the top right corner of Google Sheets interface, and then click Copy Link in the dialog that pops up. +These two steps are highlighted in the screenshot below: ![](../../.gitbook/assets/google_spreadsheet_url.png) @@ -94,23 +96,24 @@ Finally, you'll need the ID of the Spreadsheet you'd like to sync. To get it, na The Airbyte UI will ask for two things: -1. Spreadsheet ID +1. Spreadsheet Link 2. The content of the credentials JSON you created in the ["Create a Service Account and Service Account Key"](#createserviceaccount) step above. This should be as simple as opening the file and copy-pasting all its contents into this field in the Airbyte UI. ## Changelog -| Version | Date | Pull Request | Subject | -|:--------| :-------- | :----- |:------------------------------------------------------------------------------| -| 0.2.9 | 2022-01-25 | [9208](https://github.com/airbytehq/airbyte/pull/9208) | Update title and descriptions | -| 0.2.7 | 2021-09-27 | [8470](https://github.com/airbytehq/airbyte/pull/8470) | Migrate to the CDK | -| 0.2.6 | 2021-09-27 | [6354](https://github.com/airbytehq/airbyte/pull/6354) | Support connecting via Oauth webflow | -| 0.2.5 | 2021-09-12 | [5972](https://github.com/airbytehq/airbyte/pull/5972) | Fix full_refresh test by adding supported_sync_modes to Stream initialization | -| 0.2.4 | 2021-08-05 | [5233](https://github.com/airbytehq/airbyte/pull/5233) | Fix error during listing sheets with diagram only | -| 0.2.3 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | -| 0.2.2 | 2021-04-20 | [2994](https://github.com/airbytehq/airbyte/pull/2994) | Formatting spec | -| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | -| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | -| 0.1.7 | 2021-01-21 | [1762](https://github.com/airbytehq/airbyte/pull/1762) | Fix issue large spreadsheet | -| 0.1.6 | 2021-01-27 | [1668](https://github.com/airbytehq/airbyte/pull/1668) | Adopt connector best practices | -| 0.1.5 | 2020-12-30 | [1438](https://github.com/airbytehq/airbyte/pull/1438) | Implement backoff | -| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------| +| 0.2.10 | 2022-03-25 | [11404](https://github.com/airbytehq/airbyte/pull/11404) | Allow using Spreadsheet Link/URL instead of Spreadsheet ID | +| 0.2.9 | 2022-01-25 | [9208](https://github.com/airbytehq/airbyte/pull/9208) | Update title and descriptions | +| 0.2.7 | 2021-09-27 | [8470](https://github.com/airbytehq/airbyte/pull/8470) | Migrate to the CDK | +| 0.2.6 | 2021-09-27 | [6354](https://github.com/airbytehq/airbyte/pull/6354) | Support connecting via Oauth webflow | +| 0.2.5 | 2021-09-12 | [5972](https://github.com/airbytehq/airbyte/pull/5972) | Fix full_refresh test by adding supported_sync_modes to Stream initialization | +| 0.2.4 | 2021-08-05 | [5233](https://github.com/airbytehq/airbyte/pull/5233) | Fix error during listing sheets with diagram only | +| 0.2.3 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | +| 0.2.2 | 2021-04-20 | [2994](https://github.com/airbytehq/airbyte/pull/2994) | Formatting spec | +| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | +| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | +| 0.1.7 | 2021-01-21 | [1762](https://github.com/airbytehq/airbyte/pull/1762) | Fix issue large spreadsheet | +| 0.1.6 | 2021-01-27 | [1668](https://github.com/airbytehq/airbyte/pull/1668) | Adopt connector best practices | +| 0.1.5 | 2020-12-30 | [1438](https://github.com/airbytehq/airbyte/pull/1438) | Implement backoff | +| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |