Skip to content

Commit

Permalink
Add --params option to %%bigquery magic (#6277)
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillermo Carrasco authored and tseaver committed Oct 30, 2018
1 parent d20978c commit f32d7ad
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 3 deletions.
56 changes: 54 additions & 2 deletions bigquery/google/cloud/bigquery/magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
.. code-block:: python
%%bigquery [<destination_var>] [--project <project>] [--use_legacy_sql]
[--verbose]
[--verbose] [--params <params>]
<query>
Parameters:
Expand All @@ -39,6 +39,20 @@
amount of time for the query to complete will not be cleared after the
query is finished. By default, this information will be displayed but
will be cleared after the query is finished.
* ``--params <params>`` (optional, line argument):
If present, the argument following the ``--params`` flag must be
either:
* :class:`str` - A JSON string representation of a dictionary in the
format ``{"param_name": "param_value"}`` (ex. ``{"num": 17}``). Use
of the parameter in the query should be indicated with
``@param_name``. See ``In[5]`` in the Examples section below.
* :class:`dict` reference - A reference to a ``dict`` in the format
``{"param_name": "param_value"}``, where the value types must be JSON
serializable. The variable reference is indicated by a ``$`` before
the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]``
in the Examples section below.
* ``<query>`` (required, cell argument):
SQL query to run.
Expand All @@ -54,7 +68,7 @@
the bigquery IPython extension (see ``In[1]``) and setting up
Application Default Credentials.
.. code-block:: python
.. code-block:: none
In [1]: %load_ext google.cloud.bigquery
Expand Down Expand Up @@ -96,10 +110,26 @@
...: 1 Patricia 1568495
...: 2 Elizabeth 1519946
In [5]: %%bigquery df --params {"num": 17}
...: SELECT @num AS num
Out[5]: num
...: -------
...: 0 17
In [6]: params = {"num": 17}
In [7]: %%bigquery df --params $params
...: SELECT @num AS num
Out[7]: num
...: -------
...: 0 17
"""

from __future__ import print_function

import ast
import time
from concurrent import futures

Expand All @@ -112,6 +142,7 @@

import google.auth
from google.cloud import bigquery
from google.cloud.bigquery.dbapi import _helpers


class Context(object):
Expand Down Expand Up @@ -249,6 +280,16 @@ def _run_query(client, query, job_config=None):
'amount of time for the query to finish. By default, this '
'information will be displayed as the query runs, but will be '
'cleared after the query is finished.'))
@magic_arguments.argument(
'--params',
nargs='+',
default=None,
help=('Parameters to format the query string. If present, the --params '
'flag should be followed by a string representation of a dictionary '
'in the format {\'param_name\': \'param_value\'} (ex. {"num": 17}), '
'or a reference to a dictionary in the same format. The dictionary '
'reference can be made by including a \'$\' before the variable '
'name (ex. $my_dict_var).'))
def _cell_magic(line, query):
"""Underlying function for bigquery cell magic
Expand All @@ -265,9 +306,20 @@ def _cell_magic(line, query):
"""
args = magic_arguments.parse_argstring(_cell_magic, line)

params = []
if args.params is not None:
try:
params = _helpers.to_query_parameters(
ast.literal_eval(''.join(args.params)))
except Exception:
raise SyntaxError(
'--params is not a correctly formatted JSON string or a JSON '
'serializable dictionary')

project = args.project or context.project
client = bigquery.Client(project=project, credentials=context.credentials)
job_config = bigquery.job.QueryJobConfig()
job_config.query_parameters = params
job_config.use_legacy_sql = args.use_legacy_sql
query_job = _run_query(client, query, job_config)

Expand Down
81 changes: 80 additions & 1 deletion bigquery/tests/unit/test_magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_bigquery_magic_with_result_saved_to_variable():

sql = 'SELECT 17 AS num'
result = pandas.DataFrame([17], columns=['num'])
assert 'myvariable' not in ip.user_ns
assert 'df' not in ip.user_ns

run_query_patch = mock.patch(
'google.cloud.bigquery.magics._run_query', autospec=True)
Expand Down Expand Up @@ -259,3 +259,82 @@ def test_bigquery_magic_with_project():
assert client_used.project == 'specific-project'
# context project should not change
assert magics.context.project == 'general-project'


@pytest.mark.usefixtures('ipython_interactive')
@pytest.mark.skipif(pandas is None, reason='Requires `pandas`')
def test_bigquery_magic_with_string_params():
ip = IPython.get_ipython()
ip.extension_manager.load_extension('google.cloud.bigquery')
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True)

sql = 'SELECT @num AS num'
result = pandas.DataFrame([17], columns=['num'])
assert 'params_string_df' not in ip.user_ns

run_query_patch = mock.patch(
'google.cloud.bigquery.magics._run_query', autospec=True)
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True)
query_job_mock.to_dataframe.return_value = result
with run_query_patch as run_query_mock:
run_query_mock.return_value = query_job_mock

ip.run_cell_magic(
'bigquery', 'params_string_df --params {"num":17}', sql)
run_query_mock.assert_called_once_with(
mock.ANY, sql.format(num=17), mock.ANY)

assert 'params_string_df' in ip.user_ns # verify that the variable exists
df = ip.user_ns['params_string_df']
assert len(df) == len(result) # verify row count
assert list(df) == list(result) # verify column names


@pytest.mark.usefixtures('ipython_interactive')
@pytest.mark.skipif(pandas is None, reason='Requires `pandas`')
def test_bigquery_magic_with_dict_params():
ip = IPython.get_ipython()
ip.extension_manager.load_extension('google.cloud.bigquery')
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True)

sql = 'SELECT @num AS num'
result = pandas.DataFrame([17], columns=['num'])
assert 'params_dict_df' not in ip.user_ns

run_query_patch = mock.patch(
'google.cloud.bigquery.magics._run_query', autospec=True)
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True)
query_job_mock.to_dataframe.return_value = result
with run_query_patch as run_query_mock:
run_query_mock.return_value = query_job_mock

params = {"num": 17}
# Insert dictionary into user namespace so that it can be expanded
ip.user_ns['params'] = params
ip.run_cell_magic('bigquery', 'params_dict_df --params $params', sql)
run_query_mock.assert_called_once_with(
mock.ANY, sql.format(num=17), mock.ANY)

assert 'params_dict_df' in ip.user_ns # verify that the variable exists
df = ip.user_ns['params_dict_df']
assert len(df) == len(result) # verify row count
assert list(df) == list(result) # verify column names


@pytest.mark.usefixtures('ipython_interactive')
@pytest.mark.skipif(pandas is None, reason='Requires `pandas`')
def test_bigquery_magic_with_improperly_formatted_params():
ip = IPython.get_ipython()
ip.extension_manager.load_extension('google.cloud.bigquery')
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True)

sql = 'SELECT @num AS num'

with pytest.raises(SyntaxError):
ip.run_cell_magic(
'bigquery', '--params {17}', sql)

0 comments on commit f32d7ad

Please sign in to comment.