From c10137c4fb0a1cb0f810db2750eda412747148a0 Mon Sep 17 00:00:00 2001 From: Adam Sachs Date: Thu, 19 Sep 2024 14:51:18 -0400 Subject: [PATCH 1/6] initial poc of some partitioning attributes on dataset meta --- src/fideslang/models.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 8d943d3..16157a9 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -523,6 +523,29 @@ def validate_fides_collection_key(value: str) -> str: FidesCollectionKey = Annotated[str, AfterValidator(validate_fides_collection_key)] +class PartitionType(Enum): + RANGE = "range" + TIME = "time" + + +class TimePartitionInterval(Enum): + HOUR = "hour" + DAY = "day" + MONTH = "month" + YEAR = "year" + + +class PartitionSpecification(BaseModel): + """Defines partition spec for a collection""" + + field: str # the field that's partitioned + partition_type: PartitionType + start_value: Union[int, datetime] # should also support some sort of NOW() + end_value: Union[int, datetime] # should also support some sort of NOW() + interval: Union[int, TimePartitionInterval] + partitions_per_query: int = 1 + + class CollectionMeta(BaseModel): """Collection-level specific annotations used for query traversal""" @@ -530,6 +553,7 @@ class CollectionMeta(BaseModel): erase_after: Optional[List[FidesCollectionKey]] = None skip_processing: Optional[bool] = False masking_strategy_override: Optional[MaskingStrategyOverride] = None + partitioning: Optional[PartitionSpecification] = None class DatasetCollection(FidesopsMetaBackwardsCompat): From f58c77d451c551c2cef48dc60b6095b15b6a003d Mon Sep 17 00:00:00 2001 From: Adam Sachs Date: Sun, 22 Sep 2024 19:55:33 -0400 Subject: [PATCH 2/6] add where_clauses optional field --- src/fideslang/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 16157a9..1fb8d0f 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -544,6 +544,7 @@ class PartitionSpecification(BaseModel): end_value: Union[int, datetime] # should also support some sort of NOW() interval: Union[int, TimePartitionInterval] partitions_per_query: int = 1 + where_clauses: Optional[List[str]] = None class CollectionMeta(BaseModel): From 2e1575c4e9bc030d3ab7625fc44a351b1fce5c88 Mon Sep 17 00:00:00 2001 From: Adam Sachs Date: Tue, 24 Sep 2024 14:14:18 -0400 Subject: [PATCH 3/6] remove more advanced partitioning support fields --- src/fideslang/models.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 1fb8d0f..26f6503 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -538,12 +538,6 @@ class TimePartitionInterval(Enum): class PartitionSpecification(BaseModel): """Defines partition spec for a collection""" - field: str # the field that's partitioned - partition_type: PartitionType - start_value: Union[int, datetime] # should also support some sort of NOW() - end_value: Union[int, datetime] # should also support some sort of NOW() - interval: Union[int, TimePartitionInterval] - partitions_per_query: int = 1 where_clauses: Optional[List[str]] = None From 8ac746045b8c252bbc59c9690a787268a073f15d Mon Sep 17 00:00:00 2001 From: Adam Sachs Date: Tue, 24 Sep 2024 15:23:16 -0400 Subject: [PATCH 4/6] more bounded window based partitioning --- src/fideslang/models.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 26f6503..beef8fc 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -523,22 +523,20 @@ def validate_fides_collection_key(value: str) -> str: FidesCollectionKey = Annotated[str, AfterValidator(validate_fides_collection_key)] -class PartitionType(Enum): - RANGE = "range" - TIME = "time" +class UserDefinedPartitionWindow(BaseModel): + """Defines a user-defined partition window""" - -class TimePartitionInterval(Enum): - HOUR = "hour" - DAY = "day" - MONTH = "month" - YEAR = "year" + start: str + end: str + start_inclusive: bool = True + end_inclusive: bool = True class PartitionSpecification(BaseModel): """Defines partition spec for a collection""" - where_clauses: Optional[List[str]] = None + field: str + windows: Optional[List[UserDefinedPartitionWindow]] = None class CollectionMeta(BaseModel): From 3d919f613072947f6fd6a20fcfe055ac16bd48fb Mon Sep 17 00:00:00 2001 From: Adam Sachs Date: Tue, 1 Oct 2024 09:05:08 -0400 Subject: [PATCH 5/6] support open-ended partitioning dict --- src/fideslang/models.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/src/fideslang/models.py b/src/fideslang/models.py index beef8fc..bb15f25 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -523,22 +523,6 @@ def validate_fides_collection_key(value: str) -> str: FidesCollectionKey = Annotated[str, AfterValidator(validate_fides_collection_key)] -class UserDefinedPartitionWindow(BaseModel): - """Defines a user-defined partition window""" - - start: str - end: str - start_inclusive: bool = True - end_inclusive: bool = True - - -class PartitionSpecification(BaseModel): - """Defines partition spec for a collection""" - - field: str - windows: Optional[List[UserDefinedPartitionWindow]] = None - - class CollectionMeta(BaseModel): """Collection-level specific annotations used for query traversal""" @@ -546,7 +530,10 @@ class CollectionMeta(BaseModel): erase_after: Optional[List[FidesCollectionKey]] = None skip_processing: Optional[bool] = False masking_strategy_override: Optional[MaskingStrategyOverride] = None - partitioning: Optional[PartitionSpecification] = None + + # partitioning metadata is kept open-ended as it is an experimental feature - + # more strictly defined metadata structures will be supported in the future + partitioning: Optional[Dict] = None class DatasetCollection(FidesopsMetaBackwardsCompat): From 6de873381e34747087ef4d7aa6038ef82d4df91a Mon Sep 17 00:00:00 2001 From: Adam Sachs Date: Wed, 2 Oct 2024 16:18:34 -0400 Subject: [PATCH 6/6] udpate changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8e602d..6c5311f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,15 @@ The types of changes are: - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. -## [Unreleased](https://github.com/ethyca/fideslang/compare/3.0.6...main) +## [Unreleased](https://github.com/ethyca/fideslang/compare/3.0.7...main) +## [3.0.7](https://github.com/ethyca/fideslang/compare/3.0.6...3.0.7) + +### Added + +- Add a loosely-typed `partitioning` field to the `DatasetCollection.fides_meta` structure to support flexible database table partitioning specifications [#21](https://github.com/ethyca/fideslang/pull/21) + ## [3.0.6](https://github.com/ethyca/fideslang/compare/3.0.5...3.0.6)