From 3cd4418d622b81eca1238ecf36611d7cd8b071cb Mon Sep 17 00:00:00 2001 From: Shubha Rajan Date: Thu, 5 Sep 2019 17:56:17 -0700 Subject: [PATCH] Add `--max_results` option to Jupyter magics. (#9169) * added max_results magic option and fixed broken tests * added tests for --max_results magic option * added max_results magic option and fixed broken tests * added tests for --max_results magic option * Removed duplicate `--max_results` magic argument * removed max_results param from run_query, updated tests --- bigquery/google/cloud/bigquery/magics.py | 24 +++++++++- bigquery/tests/unit/test_magics.py | 57 +++++++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/bigquery/google/cloud/bigquery/magics.py b/bigquery/google/cloud/bigquery/magics.py index b2dae2511ec87..4c93d1307a427 100644 --- a/bigquery/google/cloud/bigquery/magics.py +++ b/bigquery/google/cloud/bigquery/magics.py @@ -320,6 +320,14 @@ def _run_query(client, query, job_config=None): default=None, help=("Project to use for executing this query. Defaults to the context project."), ) +@magic_arguments.argument( + "--max_results", + default=None, + help=( + "Maximum number of rows in dataframe returned from executing the query." + "Defaults to returning all rows." + ), +) @magic_arguments.argument( "--maximum_bytes_billed", default=None, @@ -420,6 +428,12 @@ def _cell_magic(line, query): bqstorage_client = _make_bqstorage_client( args.use_bqstorage_api or context.use_bqstorage_api, context.credentials ) + + if args.max_results: + max_results = int(args.max_results) + else: + max_results = None + job_config = bigquery.job.QueryJobConfig() job_config.query_parameters = params job_config.use_legacy_sql = args.use_legacy_sql @@ -433,7 +447,7 @@ def _cell_magic(line, query): error = None try: - query_job = _run_query(client, query, job_config) + query_job = _run_query(client, query, job_config=job_config) except Exception as ex: error = str(ex) @@ -460,7 +474,13 @@ def _cell_magic(line, query): ) return query_job - result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + if max_results: + result = query_job.result(max_results=max_results).to_dataframe( + bqstorage_client=bqstorage_client + ) + else: + result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) else: diff --git a/bigquery/tests/unit/test_magics.py b/bigquery/tests/unit/test_magics.py index fbea9bdd90503..ed748d2dd5e3c 100644 --- a/bigquery/tests/unit/test_magics.py +++ b/bigquery/tests/unit/test_magics.py @@ -414,7 +414,7 @@ def test_bigquery_magic_with_legacy_sql(): with run_query_patch as run_query_mock: ip.run_cell_magic("bigquery", "--use_legacy_sql", "SELECT 17 AS num") - job_config_used = run_query_mock.call_args_list[0][0][-1] + job_config_used = run_query_mock.call_args_list[0][1]["job_config"] assert job_config_used.use_legacy_sql is True @@ -645,6 +645,57 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): assert isinstance(return_value, pandas.DataFrame) +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_max_results_invalid(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + client_query_patch = mock.patch( + "google.cloud.bigquery.client.Client.query", autospec=True + ) + + sql = "SELECT 17 AS num" + + with pytest.raises(ValueError), default_patch, client_query_patch: + ip.run_cell_magic("bigquery", "--max_results=abc", sql) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + default_patch = mock.patch( + "google.auth.default", return_value=(credentials_mock, "general-project") + ) + client_query_patch = mock.patch( + "google.cloud.bigquery.client.Client.query", autospec=True + ) + + sql = "SELECT 17 AS num" + + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + + with client_query_patch as client_query_mock, default_patch: + client_query_mock.return_value = query_job_mock + ip.run_cell_magic("bigquery", "--max_results=5", sql) + + query_job_mock.result.assert_called_with(max_results=5) + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_dryrun_option_sets_job_config(): ip = IPython.get_ipython() @@ -662,7 +713,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(): with run_query_patch as run_query_mock: ip.run_cell_magic("bigquery", "--dry_run", sql) - job_config_used = run_query_mock.call_args_list[0][0][-1] + job_config_used = run_query_mock.call_args_list[0][1]["job_config"] assert job_config_used.dry_run is True @@ -924,6 +975,7 @@ def test_bigquery_magic_with_string_params(): run_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", 'params_string_df --params {"num":17}', sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) assert "params_string_df" in ip.user_ns # verify that the variable exists @@ -959,6 +1011,7 @@ def test_bigquery_magic_with_dict_params(): # Insert dictionary into user namespace so that it can be expanded ip.user_ns["params"] = params ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) + run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) assert "params_dict_df" in ip.user_ns # verify that the variable exists