Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hourly, monthly and yearly partitions in BigQuery #2903

Merged
merged 13 commits into from
Nov 30, 2020
Merged
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Store resolved node names in manifest ([#2647](https://github.com/fishtown-analytics/dbt/issues/2647), [#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
- Save selectors dictionary to manifest, allow descriptions ([#2693](https://github.com/fishtown-analytics/dbt/issues/2693), [#2866](https://github.com/fishtown-analytics/dbt/pull/2866))
- Normalize cli-style-strings in manifest selectors dictionary ([#2879](https://github.com/fishtown-anaytics/dbt/issues/2879), [#2895](https://github.com/fishtown-analytics/dbt/pull/2895))
- Hourly, monthly and yearly partitions available in BigQuery ([#2476](https://github.com/fishtown-analytics/dbt/issues/2476), [#2903](https://github.com/fishtown-analytics/dbt/pull/2903))

### Fixes
- Respect --project-dir in dbt clean command ([#2840](https://github.com/fishtown-analytics/dbt/issues/2840), [#2841](https://github.com/fishtown-analytics/dbt/pull/2841))
Expand All @@ -25,6 +26,7 @@ Contributors:
- [@elexisvenator](https://github.com/elexisvenator) ([#2850](https://github.com/fishtown-analytics/dbt/pull/2850))
- [@franloza](https://github.com/franloza) ([#2837](https://github.com/fishtown-analytics/dbt/pull/2837))
- [@rsella](https://github.com/rsella) ([#2892](https://github.com/fishtown-analytics/dbt/issues/2892))
- [@db-magnus](https://github.com/db-magnus) ([#2892](https://github.com/fishtown-analytics/dbt/issues/2892))

## dbt 0.19.0b1 (October 21, 2020)

Expand Down
12 changes: 8 additions & 4 deletions plugins/bigquery/dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,19 @@ def sql_escape(string):
class PartitionConfig(JsonSchemaMixin):
field: str
data_type: str = 'date'
granularity: str = 'day'
range: Optional[Dict[str, Any]] = None

def render(self, alias: Optional[str] = None):
column: str = self.field
if alias:
column = f'{alias}.{self.field}'

if self.data_type in ('timestamp', 'datetime'):
return f'date({column})'
else:
if self.data_type.lower() == 'date' and \
self.granularity.lower() == 'day':
return column
else:
return f'{self.data_type}_trunc({column}, {self.granularity})'

@classmethod
def parse(cls, raw_partition_by) -> Optional['PartitionConfig']:
Expand Down Expand Up @@ -547,7 +549,9 @@ def _partitions_match(
return True
elif conf_partition and table.time_partitioning is not None:
table_field = table.time_partitioning.field
return table_field == conf_partition.field
table_granularity = table.partitioning_type
return table_field == conf_partition.field \
and table_granularity == conf_partition.granularity
elif conf_partition and table.range_partitioning is not None:
dest_part = table.range_partitioning
conf_part = conf_partition.range or {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,35 @@ def test_bigquery_add_partition(self):
after = {"partition_by": {'field': 'cur_time', 'data_type': 'timestamp'}, "cluster_by": None}
self.run_changes(before, after)
self.test_partitions({"expected": 1})


@use_profile('bigquery')
def test_bigquery_add_partition_year(self):
before = {"partition_by": None, "cluster_by": None}
after = {"partition_by": {'field': 'cur_time', 'data_type': 'timestamp', 'granularity': 'year'}, "cluster_by": None}
self.run_changes(before, after)
self.test_partitions({"expected": 1})

@use_profile('bigquery')
def test_bigquery_add_partition_month(self):
before = {"partition_by": None, "cluster_by": None}
after = {"partition_by": {'field': 'cur_time', 'data_type': 'timestamp', 'granularity': 'month'}, "cluster_by": None}
self.run_changes(before, after)
self.test_partitions({"expected": 1})

@use_profile('bigquery')
def test_bigquery_add_partition_hour(self):
before = {"partition_by": None, "cluster_by": None}
after = {"partition_by": {'field': 'cur_time', 'data_type': 'timestamp', 'granularity': 'hour'}, "cluster_by": None}
self.run_changes(before, after)
self.test_partitions({"expected": 1})

@use_profile('bigquery')
def test_bigquery_add_partition_hour(self):
before = {"partition_by": {'field': 'cur_time', 'data_type': 'timestamp', 'granularity': 'day'}, "cluster_by": None}
after = {"partition_by": {'field': 'cur_time', 'data_type': 'timestamp', 'granularity': 'hour'}, "cluster_by": None}
self.run_changes(before, after)
self.test_partitions({"expected": 1})

db-magnus marked this conversation as resolved.
Show resolved Hide resolved
@use_profile('bigquery')
def test_bigquery_remove_partition(self):
before = {"partition_by": {'field': 'cur_time', 'data_type': 'timestamp'}, "cluster_by": None}
Expand Down
111 changes: 109 additions & 2 deletions test/unit/test_bigquery_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,17 +652,123 @@ def test_parse_partition_by(self):
"field": "ts",
}).to_dict(), {
"field": "ts",
"data_type": "date"
"data_type": "date",
"granularity": "DAY"
db-magnus marked this conversation as resolved.
Show resolved Hide resolved
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "date",
}).to_dict(), {
"field": "ts",
"data_type": "date",
"granularity": "DAY"
db-magnus marked this conversation as resolved.
Show resolved Hide resolved
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "date",
"granularity": "MONTH"

}).to_dict(), {
"field": "ts",
"data_type": "date",
"granularity": "MONTH"
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "date",
"granularity": "YEAR"

}).to_dict(), {
"field": "ts",
"data_type": "date",
"granularity": "YEAR"
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "timestamp",
"granularity": "HOUR"

}).to_dict(), {
"field": "ts",
"data_type": "timestamp",
"granularity": "HOUR"
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "timestamp",
"granularity": "MONTH"

}).to_dict(), {
"field": "ts",
"data_type": "timestamp",
"granularity": "MONTH"
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "timestamp",
"granularity": "YEAR"

}).to_dict(), {
"field": "ts",
"data_type": "timestamp",
"granularity": "YEAR"
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "datetime",
"granularity": "HOUR"

}).to_dict(), {
"field": "ts",
"data_type": "datetime",
"granularity": "HOUR"
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "datetime",
"granularity": "MONTH"

}).to_dict(), {
"field": "ts",
"data_type": "datetime",
"granularity": "MONTH"
}
)

self.assertEqual(
adapter.parse_partition_by({
"field": "ts",
"data_type": "datetime",
"granularity": "YEAR"

}).to_dict(), {
"field": "ts",
"data_type": "date"
"data_type": "datetime",
"granularity": "YEAR"
}
)

Expand All @@ -683,6 +789,7 @@ def test_parse_partition_by(self):
}).to_dict(), {
"field": "id",
"data_type": "int64",
"granularity": "DAY",
db-magnus marked this conversation as resolved.
Show resolved Hide resolved
"range": {
"start": 1,
"end": 100,
Expand Down