Skip to content

Commit

Permalink
Add --max_results option to Jupyter magics. (googleapis#9169)
Browse files Browse the repository at this point in the history
* added max_results magic option and fixed broken tests

* added tests for --max_results magic option

* added max_results magic option and fixed broken tests

* added tests for --max_results magic option

* Removed duplicate `--max_results` magic argument

* removed max_results param from run_query, updated tests
  • Loading branch information
shubha-rajan authored and emar-kar committed Sep 18, 2019
1 parent 2ad7ec2 commit 3cd4418
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 4 deletions.
24 changes: 22 additions & 2 deletions bigquery/google/cloud/bigquery/magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,14 @@ def _run_query(client, query, job_config=None):
default=None,
help=("Project to use for executing this query. Defaults to the context project."),
)
@magic_arguments.argument(
"--max_results",
default=None,
help=(
"Maximum number of rows in dataframe returned from executing the query."
"Defaults to returning all rows."
),
)
@magic_arguments.argument(
"--maximum_bytes_billed",
default=None,
Expand Down Expand Up @@ -420,6 +428,12 @@ def _cell_magic(line, query):
bqstorage_client = _make_bqstorage_client(
args.use_bqstorage_api or context.use_bqstorage_api, context.credentials
)

if args.max_results:
max_results = int(args.max_results)
else:
max_results = None

job_config = bigquery.job.QueryJobConfig()
job_config.query_parameters = params
job_config.use_legacy_sql = args.use_legacy_sql
Expand All @@ -433,7 +447,7 @@ def _cell_magic(line, query):

error = None
try:
query_job = _run_query(client, query, job_config)
query_job = _run_query(client, query, job_config=job_config)
except Exception as ex:
error = str(ex)

Expand All @@ -460,7 +474,13 @@ def _cell_magic(line, query):
)
return query_job

result = query_job.to_dataframe(bqstorage_client=bqstorage_client)
if max_results:
result = query_job.result(max_results=max_results).to_dataframe(
bqstorage_client=bqstorage_client
)
else:
result = query_job.to_dataframe(bqstorage_client=bqstorage_client)

if args.destination_var:
IPython.get_ipython().push({args.destination_var: result})
else:
Expand Down
57 changes: 55 additions & 2 deletions bigquery/tests/unit/test_magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ def test_bigquery_magic_with_legacy_sql():
with run_query_patch as run_query_mock:
ip.run_cell_magic("bigquery", "--use_legacy_sql", "SELECT 17 AS num")

job_config_used = run_query_mock.call_args_list[0][0][-1]
job_config_used = run_query_mock.call_args_list[0][1]["job_config"]
assert job_config_used.use_legacy_sql is True


Expand Down Expand Up @@ -645,6 +645,57 @@ def test_bigquery_magic_without_bqstorage(monkeypatch):
assert isinstance(return_value, pandas.DataFrame)


@pytest.mark.usefixtures("ipython_interactive")
def test_bigquery_magic_w_max_results_invalid():
ip = IPython.get_ipython()
ip.extension_manager.load_extension("google.cloud.bigquery")
magics.context._project = None

credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
default_patch = mock.patch(
"google.auth.default", return_value=(credentials_mock, "general-project")
)
client_query_patch = mock.patch(
"google.cloud.bigquery.client.Client.query", autospec=True
)

sql = "SELECT 17 AS num"

with pytest.raises(ValueError), default_patch, client_query_patch:
ip.run_cell_magic("bigquery", "--max_results=abc", sql)


@pytest.mark.usefixtures("ipython_interactive")
def test_bigquery_magic_w_max_results_valid_calls_queryjob_result():
ip = IPython.get_ipython()
ip.extension_manager.load_extension("google.cloud.bigquery")
magics.context._project = None

credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
default_patch = mock.patch(
"google.auth.default", return_value=(credentials_mock, "general-project")
)
client_query_patch = mock.patch(
"google.cloud.bigquery.client.Client.query", autospec=True
)

sql = "SELECT 17 AS num"

query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)

with client_query_patch as client_query_mock, default_patch:
client_query_mock.return_value = query_job_mock
ip.run_cell_magic("bigquery", "--max_results=5", sql)

query_job_mock.result.assert_called_with(max_results=5)


@pytest.mark.usefixtures("ipython_interactive")
def test_bigquery_magic_dryrun_option_sets_job_config():
ip = IPython.get_ipython()
Expand All @@ -662,7 +713,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config():
with run_query_patch as run_query_mock:
ip.run_cell_magic("bigquery", "--dry_run", sql)

job_config_used = run_query_mock.call_args_list[0][0][-1]
job_config_used = run_query_mock.call_args_list[0][1]["job_config"]
assert job_config_used.dry_run is True


Expand Down Expand Up @@ -924,6 +975,7 @@ def test_bigquery_magic_with_string_params():
run_query_mock.return_value = query_job_mock

ip.run_cell_magic("bigquery", 'params_string_df --params {"num":17}', sql)

run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY)

assert "params_string_df" in ip.user_ns # verify that the variable exists
Expand Down Expand Up @@ -959,6 +1011,7 @@ def test_bigquery_magic_with_dict_params():
# Insert dictionary into user namespace so that it can be expanded
ip.user_ns["params"] = params
ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql)

run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY)

assert "params_dict_df" in ip.user_ns # verify that the variable exists
Expand Down

0 comments on commit 3cd4418

Please sign in to comment.