Skip to content

Commit

Permalink
Source google sheets: allow using spreadsheet URL instead of ID and u…
Browse files Browse the repository at this point in the history
…pdate docs (#11404)

Update spec file
 * Add Authentication label to credentials section
 * Add doc link to spreadsheet ID
 * Add a link to setup guide
  * update description text for spreadsheet_id in spec
  * change spreadsheet_id label in spec to Spreadsheet Link per github issue 11406
 
Allow using sheet URL instead of sheet ID
  * add a parser helper that extracts Sheet ID from a URL
  * add a tests for parsing sheet ID out of a URL
  * add a test for backward compatibility (parser should recognize an ID)
  * Bump connector version in Dockerfile

Update documentation
    * Replace screenshot of sheet id with screenshot of sheet URL
    * Update connector version in changelog
    * Update links to docs in spec file
  • Loading branch information
grishick authored Mar 28, 2022
1 parent 848bb34 commit 1d12c7c
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ COPY google_sheets_source ./google_sheets_source
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.9
LABEL io.airbyte.version=0.2.10
LABEL io.airbyte.name=airbyte/source-google-sheets
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ tests:
connection:
- config_path: "secrets/config.json"
status: "succeed"
- config_path: "secrets/config_with_url.json"
status: "succeed"
- config_path: "secrets/service_config.json"
status: "succeed"
# was commented because when old config format used google_sheets_source.get_authenticated_google_credentials raises error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:
except Exception as e:
return AirbyteConnectionStatus(status=Status.FAILED, message=f"Please use valid credentials json file. Error: {e}")

spreadsheet_id = config["spreadsheet_id"]
spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"])

try:
# Attempt to get first row of sheet
Expand Down Expand Up @@ -94,7 +94,7 @@ def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus:

def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog:
client = GoogleSheetsClient(self.get_credentials(config))
spreadsheet_id = config["spreadsheet_id"]
spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"])
try:
logger.info(f"Running discovery on sheet {spreadsheet_id}")
spreadsheet_metadata = Spreadsheet.parse_obj(client.get(spreadsheetId=spreadsheet_id, includeGridData=False))
Expand Down Expand Up @@ -124,7 +124,7 @@ def read(
client = GoogleSheetsClient(self.get_credentials(config))

sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog(catalog)
spreadsheet_id = config["spreadsheet_id"]
spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"])

logger.info(f"Starting syncing spreadsheet {spreadsheet_id}")
# For each sheet in the spreadsheet, get a batch of rows, and as long as there hasn't been
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#

import json
import re
from collections import defaultdict
from datetime import datetime
from typing import Dict, FrozenSet, Iterable, List
Expand Down Expand Up @@ -192,3 +193,13 @@ def row_contains_relevant_data(cell_values: List[str], relevant_indices: Iterabl
if len(cell_values) > idx and cell_values[idx].strip() != "":
return True
return False

@staticmethod
def get_spreadsheet_id(id_or_url: str) -> str:
if re.match(r"(http://)|(https://)", id_or_url):
# This is a URL
m = re.search(r"(/)([-\w]{40,})([/]?)", id_or_url)
if m.group(2):
return m.group(2)
else:
return id_or_url
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Stripe Source Spec",
"type": "object",
"required": ["spreadsheet_id"],
"required": ["spreadsheet_id","credentials"],
"additionalProperties": true,
"properties": {
"spreadsheet_id": {
"type": "string",
"title": "Spreadsheet ID",
"description": "The ID of the spreadsheet to be replicated. The ID is found in the URL of your spreadsheet, typically between \"/d/\" and \"/edit\"",
"examples": ["1hLd9Qqti5XyLXZB2aFfUWDT7BG-arw2xy4HR3D-dwUb"]
"title": "Spreadsheet Link",
"description": "The link to your spreadsheet. See <a href='https://docs.airbyte.com/integrations/sources/google-sheets#sheetlink'>this guide</a> for more details.",
"examples": ["https://docs.google.com/spreadsheets/d/1hLd9Qqti3UyLXZB2aFfUWDT7BG-arw2xy4HR3D-dwUb/edit"]
},
"credentials": {
"type": "object",
"title": "Credentials",
"title": "Authentication",
"description": "Google API Credentials for connecting to Google Sheets and Google Drive APIs",
"oneOf": [
{
Expand Down Expand Up @@ -64,7 +64,7 @@
"service_account_info": {
"type": "string",
"title": "Service Account Information.",
"description": "The JSON key of the service account to use for authorization.",
"description": "The JSON key of the service account to use for authorization. See <a href='https://github.com/airbytehq/airbyte/blob/master/docs/integrations/sources/google-sheets.md#setupguide'>Setup Guide</a> for more details",
"airbyte_secret": true,
"examples": [
"{ \"type\": \"service_account\", \"project_id\": YOUR_PROJECT_ID, \"private_key_id\": YOUR_PRIVATE_KEY, ... }"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,31 @@ def mock_client_call(spreadsheetId, includeGridData, ranges=None):

self.assertEqual(expected, actual)

def test_get_spreadsheet_id(self):
test_url = "https://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1B_cPP9re66xI8uJK25dtY9Q/edit#gid=1820065035"
result = Helpers.get_spreadsheet_id(test_url)
self.assertEqual("18vWlVH8BfjGegwY_GdV1B_cPP9re66xI8uJK25dtY9Q", result)

test_url = "https://docs.google.com/spreadsheets/d/18vWlVH8BfjGa-gwYGdV1BjcPP9re66xI8uJK25dtY9Q/edit"
result = Helpers.get_spreadsheet_id(test_url)
self.assertEqual("18vWlVH8BfjGa-gwYGdV1BjcPP9re66xI8uJK25dtY9Q", result)

test_url = "http://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q/"
result = Helpers.get_spreadsheet_id(test_url)
self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q", result)

test_url = "http://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q/#"
result = Helpers.get_spreadsheet_id(test_url)
self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q", result)

test_url = "http://docs.google.com/spreadsheets/d/18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q"
result = Helpers.get_spreadsheet_id(test_url)
self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re_6xI8uJ-25dtY9Q", result)

test_url = "18vWlVH8BfjGegwY_GdV1BjcPP9re66xI8uJK25dtY9Q"
result = Helpers.get_spreadsheet_id(test_url)
self.assertEqual("18vWlVH8BfjGegwY_GdV1BjcPP9re66xI8uJK25dtY9Q", result)


if __name__ == "__main__":
unittest.main()
Binary file modified docs/.gitbook/assets/google_spreadsheet_url.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 26 additions & 23 deletions docs/integrations/sources/google-sheets.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,14 @@ At the time of writing, the [Google API rate limit](https://developers.google.co
To configure the connector you'll need to:

* [Authorize your Google account via OAuth](#oauth)
* [The ID of the spreadsheet you'd like to sync](#sheetid)
* [The ID of the spreadsheet you'd like to sync](#sheetlink)

### <a name="oauth"></a> Authorize your Google account via OAuth
Click on the "Sign in with Google" button and authorize via your Google account.

### <a name="sheetid"></a>Spreadsheet ID
you'll need the ID of the Spreadsheet you'd like to sync. To get it, navigate to the spreadsheet in your browser, then copy the portion of the URL which comes after "/d" and before "/edit" or "/view". This is the highlighted portion of the screenshot below:
### <a name="sheetlink"></a>Spreadsheet Link
You will need the link of the Spreadsheet you'd like to sync. To get it, click Share button in the top right corner of Google Sheets interface, and then click Copy Link in the dialog that pops up.
These two steps are highlighted in the screenshot below:

![](../../.gitbook/assets/google_spreadsheet_url.png)

Expand All @@ -56,9 +57,9 @@ To configure the Google Sheets Source for syncs, you'll need the following:
* [Create a service account with permissions to access the Google Sheets and Drive APIs](#createserviceaccount)
* [Create a Service Account Key for the Service Account](#createserviceaccount)
* [Share the spreadsheets you'd like to sync with the Service Account created above](#sharesheet)
* [The ID of the spreadsheet you'd like to sync](#findsheetid)
* [The Link to the spreadsheet you'd like to sync](#findsheetlink)

### Setup guide
### <a name="setupguide"></a>Setup guide

#### <a name="enableapi"></a>Enable the Google Sheets and Google Drive APIs

Expand All @@ -84,33 +85,35 @@ Once you've created the Service Account, you need to explicitly give it access t

{% embed url="https://youtu.be/GyomEw5a2NQ" caption="" %}

#### <a name="findsheetid"></a>Spreadsheet ID
#### <a name="findsheetlink"></a>Spreadsheet Link

Finally, you'll need the ID of the Spreadsheet you'd like to sync. To get it, navigate to the spreadsheet in your browser, then copy the portion of the URL which comes after "/d" and before "/edit" or "/view". This is the highlighted portion of the screenshot below:
Finally, you'll need the Link to the Spreadsheet you'd like to sync. To get it, click Share button in the top right corner of Google Sheets interface, and then click Copy Link in the dialog that pops up.
These two steps are highlighted in the screenshot below:

![](../../.gitbook/assets/google_spreadsheet_url.png)

### Setting up in the Airbyte UI

The Airbyte UI will ask for two things:

1. Spreadsheet ID
1. Spreadsheet Link
2. The content of the credentials JSON you created in the ["Create a Service Account and Service Account Key"](#createserviceaccount) step above. This should be as simple as opening the file and copy-pasting all its contents into this field in the Airbyte UI.

## Changelog

| Version | Date | Pull Request | Subject |
|:--------| :-------- | :----- |:------------------------------------------------------------------------------|
| 0.2.9 | 2022-01-25 | [9208](https://github.com/airbytehq/airbyte/pull/9208) | Update title and descriptions |
| 0.2.7 | 2021-09-27 | [8470](https://github.com/airbytehq/airbyte/pull/8470) | Migrate to the CDK |
| 0.2.6 | 2021-09-27 | [6354](https://github.com/airbytehq/airbyte/pull/6354) | Support connecting via Oauth webflow |
| 0.2.5 | 2021-09-12 | [5972](https://github.com/airbytehq/airbyte/pull/5972) | Fix full_refresh test by adding supported_sync_modes to Stream initialization |
| 0.2.4 | 2021-08-05 | [5233](https://github.com/airbytehq/airbyte/pull/5233) | Fix error during listing sheets with diagram only |
| 0.2.3 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support |
| 0.2.2 | 2021-04-20 | [2994](https://github.com/airbytehq/airbyte/pull/2994) | Formatting spec |
| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning |
| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties |
| 0.1.7 | 2021-01-21 | [1762](https://github.com/airbytehq/airbyte/pull/1762) | Fix issue large spreadsheet |
| 0.1.6 | 2021-01-27 | [1668](https://github.com/airbytehq/airbyte/pull/1668) | Adopt connector best practices |
| 0.1.5 | 2020-12-30 | [1438](https://github.com/airbytehq/airbyte/pull/1438) | Implement backoff |
| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |
| Version | Date | Pull Request | Subject |
|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------|
| 0.2.10 | 2022-03-25 | [11404](https://github.com/airbytehq/airbyte/pull/11404) | Allow using Spreadsheet Link/URL instead of Spreadsheet ID |
| 0.2.9 | 2022-01-25 | [9208](https://github.com/airbytehq/airbyte/pull/9208) | Update title and descriptions |
| 0.2.7 | 2021-09-27 | [8470](https://github.com/airbytehq/airbyte/pull/8470) | Migrate to the CDK |
| 0.2.6 | 2021-09-27 | [6354](https://github.com/airbytehq/airbyte/pull/6354) | Support connecting via Oauth webflow |
| 0.2.5 | 2021-09-12 | [5972](https://github.com/airbytehq/airbyte/pull/5972) | Fix full_refresh test by adding supported_sync_modes to Stream initialization |
| 0.2.4 | 2021-08-05 | [5233](https://github.com/airbytehq/airbyte/pull/5233) | Fix error during listing sheets with diagram only |
| 0.2.3 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support |
| 0.2.2 | 2021-04-20 | [2994](https://github.com/airbytehq/airbyte/pull/2994) | Formatting spec |
| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning |
| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties |
| 0.1.7 | 2021-01-21 | [1762](https://github.com/airbytehq/airbyte/pull/1762) | Fix issue large spreadsheet |
| 0.1.6 | 2021-01-27 | [1668](https://github.com/airbytehq/airbyte/pull/1668) | Adopt connector best practices |
| 0.1.5 | 2020-12-30 | [1438](https://github.com/airbytehq/airbyte/pull/1438) | Implement backoff |
| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |

0 comments on commit 1d12c7c

Please sign in to comment.