From 53ee684c7a230865d40a464d2836f653fabaa386 Mon Sep 17 00:00:00 2001 From: Lan <53841803+lanlooker@users.noreply.github.com> Date: Tue, 7 Dec 2021 03:39:24 +0000 Subject: [PATCH] feat: Python SDK example: write query result to BigQuery using Cloud Functions (#908) This script provides an API/SDK workaround for the most requested feature suggestion for the System Activity team: "Edit LookML in the System Activity model" (https://feedback.us.pendo.io/app/#/case/17625) "Export system activity explores directly to BQ" (https://portal.feedback.us.pendo.io/app/#/case/61595) For context: b/204835776 --- examples/python/README.md | 3 +- .../README.md | 13 ++++ .../cloud-function-write-to-bigquery/main.py | 68 +++++++++++++++++++ .../requirements.txt | 7 ++ 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 examples/python/cloud-function-write-to-bigquery/README.md create mode 100644 examples/python/cloud-function-write-to-bigquery/main.py create mode 100644 examples/python/cloud-function-write-to-bigquery/requirements.txt diff --git a/examples/python/README.md b/examples/python/README.md index e460e54d2..56c8f8e6b 100644 --- a/examples/python/README.md +++ b/examples/python/README.md @@ -7,7 +7,8 @@ The full details of all Looker API endpoints are listed in Looker Docs: [Version ## Full Applications - [Flask full app demo](lookersdk-flask) -- [Google Cloud Function: User Creation](cloud-function-user-provision) +- [Google Cloud Function & Google Sheet : Create new users from reading email addresses in a Google Sheet](cloud-function-user-provision) +- [Google Cloud Function & BigQuery: Run a query in Looker, and write the result to a BigQuery table](cloud-function-write-to-bigquery) ## Connection : Manage Database Connections diff --git a/examples/python/cloud-function-write-to-bigquery/README.md b/examples/python/cloud-function-write-to-bigquery/README.md new file mode 100644 index 000000000..3d9fd3b46 --- /dev/null +++ b/examples/python/cloud-function-write-to-bigquery/README.md @@ -0,0 +1,13 @@ +# Run a Looker query and write the result to a BigQuery table using Cloud Function + +This repository contains a [Google Cloud Function](https://cloud.google.com/functions) that leverages Looker Python SDK and the Python client for BigQuery to get the result of a query in Looker and load the result to a BigQuery table. + +A potential use case is to get data from Looker's System Activity and write to BigQuery (currently, Looker's System Activity stores a maximum of 100k rows, or 90 days of historical query and event data). These BigQuery tables can then be registered as a connection in Looker for additional LookML data modeling. For more flexibility on System Activity, consider using [Elite System Activity](https://docs.looker.com/admin-options/system-activity/elite-system-activity). + +Cloud Function is easy to set up and suitable for light-weighted, on-the-fly tasks. For heavy ETL/ELT workloads, consider using Looker's native actions (sending to Google Cloud Storage) or ETL/ELT tools (such as GCP's Dataflow). + +## Demo + +

+ Setting environmental variables in Cloud Function UI +

\ No newline at end of file diff --git a/examples/python/cloud-function-write-to-bigquery/main.py b/examples/python/cloud-function-write-to-bigquery/main.py new file mode 100644 index 000000000..f52a48f24 --- /dev/null +++ b/examples/python/cloud-function-write-to-bigquery/main.py @@ -0,0 +1,68 @@ +"""This Cloud Function accomplishes the following tasks: +1. Get data from a Looker query in CSV format +2. Transform columns' names by replacing a white space with an underscore +("User Name" to "User_Name") since BigQuery does not accept a white space inside columns' names +3. Write the modified column name and data to a CSV file stored in Cloud Functions' temporary disk +4. Load the CSV file to a BigQuery table + +Last modified: November 2021 +""" + +from google.cloud import bigquery +import looker_sdk +client = bigquery.Client() +sdk = looker_sdk.init40() + +def main(request): + get_data_from_looker() + write_to_file() + load_to_bq() + return("Successfully loaded data from Looker to BigQuery") + +def get_data_from_looker(query_id=1): + query = sdk.run_query( + query_id=query_id, + result_format="csv", + limit= 5000 + ) + print("Successfully retrieved data from Looker") + return query + +def write_to_file(): + data = get_data_from_looker() + # Transform the columns' name (i.e: "User ID" to become "User_ID") because + # BigQuery does not accept a white space inside columns' name + cnt = 0 # cnt is to find the index of the character after the last character of columns'names + for i in data: + if i == "\n": + break + else: + cnt += 1 + header = data[:cnt] + header_to_write = header.replace(" ", "_") + data_to_write = data[cnt:] + # Write header and data to temporary disk + with open('/tmp/table.csv', "w") as csv: # Files can only be modified/written inside tmp/ + csv.write(header_to_write) + csv.write(data_to_write) + print("Successfully wrote data to a CSV file stored in temporary disk") + +def load_to_bq(): + # Set up the table inside BQ in advance: The names and types of columns in BQ must match the + # names and types of the query result from Looker (for example: User_ID, type: Integer). + # Optionally, write additional logic to make an empty table with matching columns' names + # Example: https://github.com/googleapis/python-bigquery/blob/main/samples/create_table.py + table_id = "myproject.myschema.mytable" + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.CSV, skip_leading_rows=1, autodetect=True, + ) + with open("/tmp/table.csv", "rb") as source_file: + job = client.load_table_from_file(source_file, table_id, job_config=job_config) + job.result() # Wait for the job to complete. + table = client.get_table(table_id) # Make an API request. + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + \ No newline at end of file diff --git a/examples/python/cloud-function-write-to-bigquery/requirements.txt b/examples/python/cloud-function-write-to-bigquery/requirements.txt new file mode 100644 index 000000000..9b5b7e087 --- /dev/null +++ b/examples/python/cloud-function-write-to-bigquery/requirements.txt @@ -0,0 +1,7 @@ +# Function dependencies, for example: +# package>=version +looker_sdk +google-api-python-client==1.7.9 +google-auth-httplib2==0.0.3 +google-auth-oauthlib==0.4.0 +google-cloud-bigquery \ No newline at end of file