-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BigQuery: add --max_results
option to magic
#9169
Changes from 4 commits
81b0ca2
b04cc37
7176592
37b4d39
147e43d
f88ffe8
a01f1fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -266,7 +266,7 @@ def default_query_job_config(self, value): | |
context = Context() | ||
|
||
|
||
def _run_query(client, query, job_config=None): | ||
def _run_query(client, query, job_config=None, max_results=None): | ||
"""Runs a query while printing status updates | ||
|
||
Args: | ||
|
@@ -300,7 +300,7 @@ def _run_query(client, query, job_config=None): | |
while True: | ||
print("\rQuery executing: {:0.2f}s".format(time.time() - start_time), end="") | ||
try: | ||
query_job.result(timeout=0.5) | ||
query_job.result(timeout=0.5, max_results=max_results) | ||
break | ||
except futures.TimeoutError: | ||
continue | ||
|
@@ -320,6 +320,22 @@ def _run_query(client, query, job_config=None): | |
default=None, | ||
help=("Project to use for executing this query. Defaults to the context project."), | ||
) | ||
@magic_arguments.argument( | ||
"--max_results", | ||
default=None, | ||
help=( | ||
"Maximum number of rows in dataframe returned from executing the query." | ||
"Defaults to returning all rows." | ||
), | ||
) | ||
@magic_arguments.argument( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like these arguments got doubled up. (Maybe the two commits thing, we noticed when rebasing?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's probably what happened (moral of the story, always run tests before pushing). Removed the duplicate in 147e43d |
||
"--max_results", | ||
default=None, | ||
help=( | ||
"Maximum number of rows in dataframe returned from executing the query." | ||
"Defaults to returning all rows." | ||
), | ||
) | ||
@magic_arguments.argument( | ||
"--maximum_bytes_billed", | ||
default=None, | ||
|
@@ -420,6 +436,12 @@ def _cell_magic(line, query): | |
bqstorage_client = _make_bqstorage_client( | ||
args.use_bqstorage_api or context.use_bqstorage_api, context.credentials | ||
) | ||
|
||
if args.max_results: | ||
max_results = int(args.max_results) | ||
else: | ||
max_results = None | ||
|
||
job_config = bigquery.job.QueryJobConfig() | ||
job_config.query_parameters = params | ||
job_config.use_legacy_sql = args.use_legacy_sql | ||
|
@@ -433,7 +455,9 @@ def _cell_magic(line, query): | |
|
||
error = None | ||
try: | ||
query_job = _run_query(client, query, job_config) | ||
query_job = _run_query( | ||
client, query, job_config=job_config, max_results=max_results | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Note: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated to call |
||
) | ||
except Exception as ex: | ||
error = str(ex) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -414,7 +414,7 @@ def test_bigquery_magic_with_legacy_sql(): | |
with run_query_patch as run_query_mock: | ||
ip.run_cell_magic("bigquery", "--use_legacy_sql", "SELECT 17 AS num") | ||
|
||
job_config_used = run_query_mock.call_args_list[0][0][-1] | ||
job_config_used = run_query_mock.call_args_list[0][1]["job_config"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this change intentional? Maybe a bad rebase? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was intentional. It was to fix some test failures I was getting because the argument that was being retrieved was the wrong type (a result of changing the method signature for |
||
assert job_config_used.use_legacy_sql is True | ||
|
||
|
||
|
@@ -645,6 +645,57 @@ def test_bigquery_magic_without_bqstorage(monkeypatch): | |
assert isinstance(return_value, pandas.DataFrame) | ||
|
||
|
||
@pytest.mark.usefixtures("ipython_interactive") | ||
def test_bigquery_magic_w_max_results_invalid(): | ||
ip = IPython.get_ipython() | ||
ip.extension_manager.load_extension("google.cloud.bigquery") | ||
magics.context._project = None | ||
|
||
credentials_mock = mock.create_autospec( | ||
google.auth.credentials.Credentials, instance=True | ||
) | ||
default_patch = mock.patch( | ||
"google.auth.default", return_value=(credentials_mock, "general-project") | ||
) | ||
client_query_patch = mock.patch( | ||
"google.cloud.bigquery.client.Client.query", autospec=True | ||
) | ||
|
||
sql = "SELECT 17 AS num" | ||
|
||
with pytest.raises(ValueError), default_patch, client_query_patch: | ||
ip.run_cell_magic("bigquery", "--max_results=abc", sql) | ||
|
||
|
||
@pytest.mark.usefixtures("ipython_interactive") | ||
def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): | ||
ip = IPython.get_ipython() | ||
ip.extension_manager.load_extension("google.cloud.bigquery") | ||
magics.context._project = None | ||
|
||
credentials_mock = mock.create_autospec( | ||
google.auth.credentials.Credentials, instance=True | ||
) | ||
default_patch = mock.patch( | ||
"google.auth.default", return_value=(credentials_mock, "general-project") | ||
) | ||
client_query_patch = mock.patch( | ||
"google.cloud.bigquery.client.Client.query", autospec=True | ||
) | ||
|
||
sql = "SELECT 17 AS num" | ||
|
||
query_job_mock = mock.create_autospec( | ||
google.cloud.bigquery.job.QueryJob, instance=True | ||
) | ||
|
||
with client_query_patch as client_query_mock, default_patch: | ||
client_query_mock.return_value = query_job_mock | ||
ip.run_cell_magic("bigquery", "--max_results=5", sql) | ||
|
||
query_job_mock.result.assert_called_once_with(timeout=0.5, max_results=5) | ||
|
||
|
||
@pytest.mark.usefixtures("ipython_interactive") | ||
def test_bigquery_magic_dryrun_option_sets_job_config(): | ||
ip = IPython.get_ipython() | ||
|
@@ -662,7 +713,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(): | |
with run_query_patch as run_query_mock: | ||
ip.run_cell_magic("bigquery", "--dry_run", sql) | ||
|
||
job_config_used = run_query_mock.call_args_list[0][0][-1] | ||
job_config_used = run_query_mock.call_args_list[0][1]["job_config"] | ||
assert job_config_used.dry_run is True | ||
|
||
|
||
|
@@ -924,7 +975,10 @@ def test_bigquery_magic_with_string_params(): | |
run_query_mock.return_value = query_job_mock | ||
|
||
ip.run_cell_magic("bigquery", 'params_string_df --params {"num":17}', sql) | ||
run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) | ||
|
||
run_query_mock.assert_called_once_with( | ||
mock.ANY, sql.format(num=17), mock.ANY, max_results=None | ||
) | ||
|
||
assert "params_string_df" in ip.user_ns # verify that the variable exists | ||
df = ip.user_ns["params_string_df"] | ||
|
@@ -959,7 +1013,10 @@ def test_bigquery_magic_with_dict_params(): | |
# Insert dictionary into user namespace so that it can be expanded | ||
ip.user_ns["params"] = params | ||
ip.run_cell_magic("bigquery", "params_dict_df --params $params", sql) | ||
run_query_mock.assert_called_once_with(mock.ANY, sql.format(num=17), mock.ANY) | ||
|
||
run_query_mock.assert_called_once_with( | ||
mock.ANY, sql.format(num=17), mock.ANY, max_results=None | ||
) | ||
|
||
assert "params_dict_df" in ip.user_ns # verify that the variable exists | ||
df = ip.user_ns["params_dict_df"] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we aren't actually returning the results in this line (just waiting for the query to finish), we don't need to pass
max_results
here. I believe that means we can remove themax_results
parameter from_run_query
as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed in f88ffe8