From 580b0947312ac173ccc17d7382f9227df95e6ff7 Mon Sep 17 00:00:00 2001 From: thedae Date: Wed, 13 Mar 2024 12:44:47 +0100 Subject: [PATCH 1/4] Add labels to BQ uploaded tables --- raster_loader/io/bigquery.py | 10 ++++++++++ raster_loader/tests/bigquery/test_io.py | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/raster_loader/io/bigquery.py b/raster_loader/io/bigquery.py index a938cd5..7b3a0d0 100644 --- a/raster_loader/io/bigquery.py +++ b/raster_loader/io/bigquery.py @@ -2,7 +2,9 @@ import json import pandas as pd import rasterio +import re +from raster_loader import __version__ from raster_loader.errors import import_error_bigquery, IncompatibleRasterException from raster_loader.utils import ask_yes_no_question, batched from raster_loader.io.common import ( @@ -161,6 +163,9 @@ def done_callback(job): print("Writing metadata to BigQuery...") self.write_metadata(metadata, append_records, fqn) + print("Updating labels...") + self.update_labels(fqn, {"raster_loader": re.sub(r'[^a-z0-9_-]', '_', __version__.lower())}) + except IncompatibleRasterException as e: raise IOError("Error uploading to BigQuery: {}".format(e.message)) @@ -224,6 +229,11 @@ def get_metadata(self, fqn): return json.loads(rows[0]["metadata"]) + def update_labels(self, fqn, labels): + table = self.client.get_table(fqn) + table.labels = labels + table = self.client.update_table(table, ["labels"]) + def write_metadata( self, metadata, diff --git a/raster_loader/tests/bigquery/test_io.py b/raster_loader/tests/bigquery/test_io.py index 351e322..de29789 100644 --- a/raster_loader/tests/bigquery/test_io.py +++ b/raster_loader/tests/bigquery/test_io.py @@ -364,6 +364,7 @@ def test_rasterio_to_table_wrong_band_name_block(*args, **kwargs): "raster_loader.io.bigquery.BigQueryConnection.check_if_table_exists", return_value=False, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) @patch("raster_loader.io.bigquery.ask_yes_no_question", return_value=False) def test_rasterio_to_table(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" @@ -389,6 +390,7 @@ def test_rasterio_to_table(*args, **kwargs): @patch("raster_loader.io.common.rasterio_metadata", return_value={}) @patch("raster_loader.io.common.get_number_of_blocks", return_value=1) @patch("raster_loader.io.bigquery.BigQueryConnection.write_metadata", return_value=None) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_overwrite(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -424,6 +426,7 @@ def test_rasterio_to_table_overwrite(*args, **kwargs): "num_pixels": 1, }, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_is_not_empty_append(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -498,6 +501,7 @@ def test_rasterio_to_table_keyboard_interrupt(*args, **kwargs): "raster_loader.io.bigquery.BigQueryConnection.check_if_table_exists", return_value=False, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_with_chunk_size(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -515,6 +519,7 @@ def test_rasterio_to_table_with_chunk_size(*args, **kwargs): "raster_loader.io.bigquery.BigQueryConnection.check_if_table_exists", return_value=False, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_table_with_one_chunk_size(*args, **kwargs): table_name = "test_mosaic_custom_band_column_1" connector = mocks.MockBigQueryConnection() @@ -567,6 +572,7 @@ def test_rasterio_to_table_invalid_raster(*args, **kwargs): "num_pixels": 1, }, ) +@patch("raster_loader.io.bigquery.BigQueryConnection.update_labels", return_value=None) def test_rasterio_to_bigquery_valid_raster(*args, **kwargs): table_name = "test_mosaic_valid_raster".upper() connector = mocks.MockBigQueryConnection() From bd408e7b6111237a28f98f9e2d97d92f5011ce12 Mon Sep 17 00:00:00 2001 From: thedae Date: Wed, 13 Mar 2024 13:04:08 +0100 Subject: [PATCH 2/4] Add some integration test case --- raster_loader/tests/bigquery/test_io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/raster_loader/tests/bigquery/test_io.py b/raster_loader/tests/bigquery/test_io.py index de29789..333fe92 100644 --- a/raster_loader/tests/bigquery/test_io.py +++ b/raster_loader/tests/bigquery/test_io.py @@ -107,6 +107,9 @@ def test_rasterio_to_bigquery_with_raster_default_band_name(): list(expected_dataframe.band_1), key=lambda x: x if x is not None else b"" ) + table = connector.client.get_table(fqn) + assert table.labels.get("raster_loader") is not None + @pytest.mark.integration_test def test_rasterio_to_bigquery_appending_rows(): From 42b7a9cbdd496620c630d821a8504f42b596b31c Mon Sep 17 00:00:00 2001 From: thedae Date: Wed, 13 Mar 2024 14:51:38 +0100 Subject: [PATCH 3/4] Add test cases for label generation --- raster_loader/io/bigquery.py | 7 ++++++- raster_loader/tests/bigquery/test_io.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/raster_loader/io/bigquery.py b/raster_loader/io/bigquery.py index 7b3a0d0..d12394e 100644 --- a/raster_loader/io/bigquery.py +++ b/raster_loader/io/bigquery.py @@ -164,7 +164,7 @@ def done_callback(job): self.write_metadata(metadata, append_records, fqn) print("Updating labels...") - self.update_labels(fqn, {"raster_loader": re.sub(r'[^a-z0-9_-]', '_', __version__.lower())}) + self.update_labels(fqn, self.get_labels(__version__)) except IncompatibleRasterException as e: raise IOError("Error uploading to BigQuery: {}".format(e.message)) @@ -229,6 +229,11 @@ def get_metadata(self, fqn): return json.loads(rows[0]["metadata"]) + def get_labels(self, version: str): + return { + "raster_loader": re.sub(r'[^a-z0-9_-]', '_', version.lower()), + } + def update_labels(self, fqn, labels): table = self.client.get_table(fqn) table.labels = labels diff --git a/raster_loader/tests/bigquery/test_io.py b/raster_loader/tests/bigquery/test_io.py index 333fe92..43dcf67 100644 --- a/raster_loader/tests/bigquery/test_io.py +++ b/raster_loader/tests/bigquery/test_io.py @@ -608,3 +608,15 @@ def test_append_with_different_resolution(*args, **kwargs): os.path.join(fixtures_dir, "mosaic_cog.tif"), f"{BQ_PROJECT_ID}.{BQ_DATASET_ID}.{table_name}", ) + +def test_get_labels(*args, **kwargs): + connector = mocks.MockBigQueryConnection() + + cases = { + "": {"raster_loader": ""}, + "0.1.0": {"raster_loader": "0_1_0"}, + "0.1.0 something": {"raster_loader": "0_1_0_something"}, + "0.1.0+17$g1d1f3a3H": {"raster_loader": "0_1_0_17_g1d1f3a3h"}, + } + for version, expected_labels in cases.items(): + assert connector.get_labels(version) == expected_labels \ No newline at end of file From 3e78cb33b05c7b50f81ff9c772cdae3c35dca407 Mon Sep 17 00:00:00 2001 From: thedae Date: Wed, 13 Mar 2024 14:57:44 +0100 Subject: [PATCH 4/4] Linter --- raster_loader/io/bigquery.py | 2 +- raster_loader/tests/bigquery/test_io.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/raster_loader/io/bigquery.py b/raster_loader/io/bigquery.py index d12394e..37bf8f8 100644 --- a/raster_loader/io/bigquery.py +++ b/raster_loader/io/bigquery.py @@ -231,7 +231,7 @@ def get_metadata(self, fqn): def get_labels(self, version: str): return { - "raster_loader": re.sub(r'[^a-z0-9_-]', '_', version.lower()), + "raster_loader": re.sub(r"[^a-z0-9_-]", "_", version.lower()), } def update_labels(self, fqn, labels): diff --git a/raster_loader/tests/bigquery/test_io.py b/raster_loader/tests/bigquery/test_io.py index 43dcf67..daaf89c 100644 --- a/raster_loader/tests/bigquery/test_io.py +++ b/raster_loader/tests/bigquery/test_io.py @@ -609,6 +609,7 @@ def test_append_with_different_resolution(*args, **kwargs): f"{BQ_PROJECT_ID}.{BQ_DATASET_ID}.{table_name}", ) + def test_get_labels(*args, **kwargs): connector = mocks.MockBigQueryConnection() @@ -619,4 +620,4 @@ def test_get_labels(*args, **kwargs): "0.1.0+17$g1d1f3a3H": {"raster_loader": "0_1_0_17_g1d1f3a3h"}, } for version, expected_labels in cases.items(): - assert connector.get_labels(version) == expected_labels \ No newline at end of file + assert connector.get_labels(version) == expected_labels