Skip to content

Commit

Permalink
feat: Add '--use-geodataframe' argument to return a GeoDataFrame (#91)
Browse files Browse the repository at this point in the history
* feat: Add '--use-geodataframe' argument to return a geopands.GeoDataFrame.
chore: Re-activate python v3.10 unit tests

* Update bigquery_magics/bigquery.py

* test against minimum geopandas

---------

Co-authored-by: Tim Sweña (Swast) <tswast@gmail.com>
  • Loading branch information
bijanvakili and tswast authored Feb 12, 2025
1 parent 620c76f commit fc04f34
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 11 deletions.
46 changes: 36 additions & 10 deletions bigquery_magics/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@
amount of time for the query to complete will not be cleared after the
query is finished. By default, this information will be displayed but
will be cleared after the query is finished.
* ``--use_geodataframe <params>`` (Optional[line argument]):
Return the query result as a geopandas.GeoDataFrame.
If present, the argument that follows the ``--use_geodataframe`` flag
must be a string representing column names to use as the active
geometry.
See geopandas.GeoDataFrame for details.
The Coordinate Reference System will be set to “EPSG:4326”.
* ``--params <params>`` (Optional[line argument]):
If present, the argument following the ``--params`` flag must be
either:
Expand All @@ -75,7 +84,8 @@
Returns:
A :class:`pandas.DataFrame` or :class:`bigframes.pandas.DataFrame`
with the query results, depending on the ``engine`` chosen.
with the query results, depending on the ``engine`` chosen or if
``--as_geodataframe`` was provided.
.. note::
All queries run using this magic will run using the context
Expand Down Expand Up @@ -343,6 +353,16 @@ def _create_dataset_if_necessary(client, dataset_id):
"name (ex. $my_dict_var)."
),
)
@magic_arguments.argument(
"--use_geodataframe",
type=str,
default=None,
help=(
"Return the query result as a geopandas.GeoDataFrame. If present, the "
"--use_geodataframe flag should be followed by a string name of the "
"column."
),
)
@magic_arguments.argument(
"--progress_bar_type",
type=str,
Expand Down Expand Up @@ -574,6 +594,7 @@ def _make_bq_query(
bqstorage_client: Any,
):
max_results = int(args.max_results) if args.max_results else None
geography_column = args.use_geodataframe

# Any query that does not contain whitespace (aside from leading and trailing whitespace)
# is assumed to be a table id
Expand Down Expand Up @@ -631,19 +652,24 @@ def _make_bq_query(
return query_job

progress_bar = context.progress_bar_type or args.progress_bar_type
dataframe_kwargs = {
"bqstorage_client": bqstorage_client,
"create_bqstorage_client": False,
"progress_bar_type": progress_bar,
}
if max_results:
dataframe_kwargs["bqstorage_client"] = None

result = query_job
if max_results:
result = query_job.result(max_results=max_results).to_dataframe(
bqstorage_client=None,
create_bqstorage_client=False,
progress_bar_type=progress_bar,
result = result.result(max_results=max_results)

if geography_column:
result = result.to_geodataframe(
geography_column=geography_column, **dataframe_kwargs
)
else:
result = query_job.to_dataframe(
bqstorage_client=bqstorage_client,
create_bqstorage_client=False,
progress_bar_type=progress_bar,
)
result = result.to_dataframe(**dataframe_kwargs)

return _handle_result(result, args)

Expand Down
12 changes: 11 additions & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,15 @@

DEFAULT_PYTHON_VERSION = "3.8"

UNIT_TEST_PYTHON_VERSIONS: List[str] = ["3.7", "3.8", "3.9", "3.11", "3.12", "3.13"]
UNIT_TEST_PYTHON_VERSIONS: List[str] = [
"3.7",
"3.8",
"3.9",
"3.10",
"3.11",
"3.12",
"3.13",
]
UNIT_TEST_STANDARD_DEPENDENCIES = [
"mock",
"asyncmock",
Expand Down Expand Up @@ -62,12 +70,14 @@
"3.10": [
"bqstorage",
"bigframes",
"geopandas",
],
"3.11": [],
"3.12": [],
"3.13": [
"bqstorage",
"bigframes",
"geopandas",
],
}

Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'",
],
"bigframes": ["bigframes >= 1.17.0"],
"geopandas": ["geopandas >= 1.0.1"],
}

all_extras = []
Expand Down
1 change: 1 addition & 0 deletions testing/constraints-3.10.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# IMPORTANT: When Python 3.9 support is dropped, update these to
# match the minimums in setup.py.
geopandas==1.0.1
50 changes: 50 additions & 0 deletions tests/unit/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
except ImportError:
bpd = None

try:
import geopandas as gpd
except ImportError:
gpd = None


def make_connection(*args):
# TODO(tswast): Remove this in favor of a mock google.cloud.bigquery.Client
Expand Down Expand Up @@ -737,6 +742,51 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result():
)


@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(gpd is None, reason="Requires `geopandas`")
def test_bigquery_magic_with_use_geodataframe():
ip = IPython.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
bigquery_magics.context._project = None

credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
default_patch = mock.patch(
"google.auth.default", return_value=(credentials_mock, "general-project")
)
client_query_patch = mock.patch(
"google.cloud.bigquery.client.Client.query", autospec=True
)

sql = """
SELECT
17 AS num,
ST_GEOGFROMTEXT('POINT(-122.083855 37.386051)') AS my_geom
"""
result = gpd.GeoDataFrame(
[[17, "POINT(-122.083855 37.386051)"]], columns=["num", "my_geom"]
)

query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
query_job_mock.to_geodataframe.return_value = result

with client_query_patch as client_query_mock, default_patch:
client_query_mock.return_value = query_job_mock
return_value = ip.run_cell_magic("bigquery", "--use_geodataframe my_geom", sql)

query_job_mock.to_dataframe.assert_not_called()
query_job_mock.to_geodataframe.assert_called_once_with(
geography_column="my_geom",
bqstorage_client=mock.ANY,
create_bqstorage_client=False,
progress_bar_type="tqdm_notebook",
)
assert isinstance(return_value, gpd.GeoDataFrame)


@pytest.mark.usefixtures("ipython_interactive")
def test_bigquery_magic_w_max_results_query_job_results_fails():
ip = IPython.get_ipython()
Expand Down

0 comments on commit fc04f34

Please sign in to comment.