Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: invalid conversion of timezone-aware datetime values to JSON #480

Merged
merged 4 commits into from
Jan 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,21 @@ def _timestamp_to_json_parameter(value):
def _timestamp_to_json_row(value):
"""Coerce 'value' to an JSON-compatible representation."""
if isinstance(value, datetime.datetime):
# For naive datetime objects UTC timezone is assumed, thus we format
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this always our assumption? Based on the unit tests, I suppose so?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't say if "always", but the _datetime_to_json() helper has been unchanged since 2017 and was supposedly doing its job fine with naive datetime objects in this time - at least I don't remember any reported issues related to timezones. The strftime() conversion just uses the datetime fields at their face value and ignores any tzinfo set.

Considering that naive datetimes are used most of the time (as opposed to timezone-aware datetime objects) and that users did not complain, the UTC timezone assumption was apparently correct behavior. The logic just failed to cover the case when somebody actually used a timezone-aware instance.

And indeed, unit tests expect the same, a naive datetime instance is supposed to be converted to a string representation with a zero offset from UTC (example).

# those to string directly without conversion.
if value.tzinfo is not None:
value = value.astimezone(UTC)
value = value.strftime(_RFC3339_MICROS)
return value


def _datetime_to_json(value):
"""Coerce 'value' to an JSON-compatible representation."""
if isinstance(value, datetime.datetime):
# For naive datetime objects UTC timezone is assumed, thus we format
# those to string directly without conversion.
if value.tzinfo is not None:
value = value.astimezone(UTC)
value = value.strftime(_RFC3339_MICROS_NO_ZULU)
return value

Expand Down
77 changes: 43 additions & 34 deletions tests/unit/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,27 +420,27 @@ def _call_fut(self, row, schema):
def test_w_single_scalar_column(self):
# SELECT 1 AS col
col = _Field("REQUIRED", "col", "INTEGER")
row = {u"f": [{u"v": u"1"}]}
row = {"f": [{"v": "1"}]}
self.assertEqual(self._call_fut(row, schema=[col]), (1,))

def test_w_single_scalar_geography_column(self):
# SELECT 1 AS col
col = _Field("REQUIRED", "geo", "GEOGRAPHY")
row = {u"f": [{u"v": u"POINT(1, 2)"}]}
row = {"f": [{"v": "POINT(1, 2)"}]}
self.assertEqual(self._call_fut(row, schema=[col]), ("POINT(1, 2)",))

def test_w_single_struct_column(self):
# SELECT (1, 2) AS col
sub_1 = _Field("REQUIRED", "sub_1", "INTEGER")
sub_2 = _Field("REQUIRED", "sub_2", "INTEGER")
col = _Field("REQUIRED", "col", "RECORD", fields=[sub_1, sub_2])
row = {u"f": [{u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}]}}]}
row = {"f": [{"v": {"f": [{"v": "1"}, {"v": "2"}]}}]}
self.assertEqual(self._call_fut(row, schema=[col]), ({"sub_1": 1, "sub_2": 2},))

def test_w_single_array_column(self):
# SELECT [1, 2, 3] as col
col = _Field("REPEATED", "col", "INTEGER")
row = {u"f": [{u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}]}
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],))

def test_w_struct_w_nested_array_column(self):
Expand All @@ -450,21 +450,21 @@ def test_w_struct_w_nested_array_column(self):
third = _Field("REPEATED", "third", "INTEGER")
col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third])
row = {
u"f": [
"f": [
{
u"v": {
u"f": [
{u"v": [{u"v": u"1"}, {u"v": u"2"}]},
{u"v": u"3"},
{u"v": [{u"v": u"4"}, {u"v": u"5"}]},
"v": {
"f": [
{"v": [{"v": "1"}, {"v": "2"}]},
{"v": "3"},
{"v": [{"v": "4"}, {"v": "5"}]},
]
}
}
]
}
self.assertEqual(
self._call_fut(row, schema=[col]),
({u"first": [1, 2], u"second": 3, u"third": [4, 5]},),
({"first": [1, 2], "second": 3, "third": [4, 5]},),
)

def test_w_array_of_struct(self):
Expand All @@ -474,11 +474,11 @@ def test_w_array_of_struct(self):
third = _Field("REQUIRED", "third", "INTEGER")
col = _Field("REPEATED", "col", "RECORD", fields=[first, second, third])
row = {
u"f": [
"f": [
{
u"v": [
{u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}},
{u"v": {u"f": [{u"v": u"4"}, {u"v": u"5"}, {u"v": u"6"}]}},
"v": [
{"v": {"f": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}},
{"v": {"f": [{"v": "4"}, {"v": "5"}, {"v": "6"}]}},
]
}
]
Expand All @@ -487,8 +487,8 @@ def test_w_array_of_struct(self):
self._call_fut(row, schema=[col]),
(
[
{u"first": 1, u"second": 2, u"third": 3},
{u"first": 4, u"second": 5, u"third": 6},
{"first": 1, "second": 2, "third": 3},
{"first": 4, "second": 5, "third": 6},
],
),
)
Expand All @@ -499,32 +499,25 @@ def test_w_array_of_struct_w_array(self):
second = _Field("REQUIRED", "second", "INTEGER")
col = _Field("REPEATED", "col", "RECORD", fields=[first, second])
row = {
u"f": [
"f": [
{
u"v": [
{
u"v": {
u"f": [
{u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]},
{u"v": u"4"},
]
}
},
"v": [
{
u"v": {
u"f": [
{u"v": [{u"v": u"5"}, {u"v": u"6"}]},
{u"v": u"7"},
"v": {
"f": [
{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]},
{"v": "4"},
]
}
},
{"v": {"f": [{"v": [{"v": "5"}, {"v": "6"}]}, {"v": "7"}]}},
]
}
]
}
self.assertEqual(
self._call_fut(row, schema=[col]),
([{u"first": [1, 2, 3], u"second": 4}, {u"first": [5, 6], u"second": 7}],),
([{"first": [1, 2, 3], "second": 4}, {"first": [5, 6], "second": 7}],),
)


Expand Down Expand Up @@ -673,7 +666,7 @@ def test_w_non_bytes(self):

def test_w_bytes(self):
source = b"source"
expected = u"c291cmNl"
expected = "c291cmNl"
converted = self._call_fut(source)
self.assertEqual(converted, expected)

Expand Down Expand Up @@ -726,7 +719,7 @@ def test_w_string(self):
ZULU = "2016-12-20 15:58:27.339328+00:00"
self.assertEqual(self._call_fut(ZULU), ZULU)

def test_w_datetime(self):
def test_w_datetime_no_zone(self):
when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328)
self.assertEqual(self._call_fut(when), "2016-12-20T15:58:27.339328Z")

Expand All @@ -736,6 +729,14 @@ def test_w_datetime_w_utc_zone(self):
when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=UTC)
self.assertEqual(self._call_fut(when), "2020-11-17T01:06:52.353795Z")

def test_w_datetime_w_non_utc_zone(self):
class EstZone(datetime.tzinfo):
def utcoffset(self, _):
return datetime.timedelta(minutes=-300)

when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=EstZone())
self.assertEqual(self._call_fut(when), "2020-11-17T06:06:52.353795Z")


class Test_datetime_to_json(unittest.TestCase):
def _call_fut(self, value):
Expand All @@ -753,6 +754,14 @@ def test_w_datetime(self):
when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC)
self.assertEqual(self._call_fut(when), "2016-12-03T14:11:27.123456")

def test_w_datetime_w_non_utc_zone(self):
class EstZone(datetime.tzinfo):
def utcoffset(self, _):
return datetime.timedelta(minutes=-300)

when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=EstZone())
self.assertEqual(self._call_fut(when), "2016-12-03T19:11:27.123456")


class Test_date_to_json(unittest.TestCase):
def _call_fut(self, value):
Expand Down