Skip to content

Commit

Permalink
Merge pull request #3065 from fishtown-analytics/feature/write-artifa…
Browse files Browse the repository at this point in the history
…ct-schema

Collect and write json schema for dbt artifacts
  • Loading branch information
Kyle Wigley authored Feb 22, 2021
2 parents be47a0c + 11f1ece commit 344a144
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 19 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
## dbt 0.20.0 (Release TBD)

### Fixes

- Fix exit code from dbt debug not returning a failure when one of the tests fail ([#3017](https://github.com/fishtown-analytics/dbt/issues/3017))
- Auto-generated CTEs in tests and ephemeral models have lowercase names to comply with dbt coding conventions ([#3027](https://github.com/fishtown-analytics/dbt/issues/3027), [#3028](https://github.com/fishtown-analytics/dbt/issues/3028))
- Fix incorrect error message when a selector does not match any node [#3036](https://github.com/fishtown-analytics/dbt/issues/3036))
Expand All @@ -14,6 +13,7 @@

### Under the hood
- Add dependabot configuration for alerting maintainers about keeping dependencies up to date and secure. ([#3061](https://github.com/fishtown-analytics/dbt/issues/3061), [#3062](https://github.com/fishtown-analytics/dbt/pull/3062))
- Update script to collect and write json schema for dbt artifacts ([#2870](https://github.com/fishtown-analytics/dbt/issues/2870), [#3065](https://github.com/fishtown-analytics/dbt/pull/3065))

Contributors:
- [@yu-iskw](https://github.com/yu-iskw) ([#2928](https://github.com/fishtown-analytics/dbt/pull/2928))
Expand Down
13 changes: 9 additions & 4 deletions core/dbt/contracts/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,20 +126,25 @@ def read(cls, path: str):
return cls.from_dict(data) # type: ignore


BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/dbt/{name}/v{version}.json'
BASE_SCHEMAS_URL = 'https://schemas.getdbt.com/'
SCHEMA_PATH = 'dbt/{name}/v{version}.json'


@dataclasses.dataclass
class SchemaVersion:
name: str
version: int

def __str__(self) -> str:
return BASE_SCHEMAS_URL.format(
@property
def path(self) -> str:
return SCHEMA_PATH.format(
name=self.name,
version=self.version,
version=self.version
)

def __str__(self) -> str:
return BASE_SCHEMAS_URL + self.path


SCHEMA_VERSION_KEY = 'dbt_schema_version'

Expand Down
86 changes: 72 additions & 14 deletions scripts/collect-artifact-schema.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,31 +1,89 @@
#!/usr/bin/env python
from dataclasses import dataclass
from typing import Dict, Any
from argparse import ArgumentParser
from pathlib import Path
import json
from typing import Type, Dict, Any

from dbt.dataclass_schema import dbtClassMixin
from dbt.contracts.graph.manifest import WritableManifest
from dbt.contracts.results import (
CatalogArtifact, RunResultsArtifact, FreshnessExecutionResultArtifact
)
from dbt.contracts.util import VersionedSchema
from dbt.clients.system import write_file


@dataclass
class Schemas(dbtClassMixin):
manifest: Dict[str, Any]
catalog: Dict[str, Any]
run_results: Dict[str, Any]
freshness_results: Dict[str, Any]
class ArtifactInfo:
path: str
name: str
json_schema: Dict[str, Any]

@classmethod
def from_artifact_cls(
cls,
artifact_cls: Type[VersionedSchema],
) -> 'ArtifactInfo':
return cls(
path=artifact_cls.dbt_schema_version.path,
name=artifact_cls.dbt_schema_version.name,
json_schema=artifact_cls.json_schema()
)

def write_schema(self, dest_dir: Path):
write_file(
str(dest_dir / self.path),
json.dumps(self.json_schema, indent=2)
)


@dataclass
class Arguments:
path: Path

@classmethod
def parse(cls) -> 'Arguments':
parser = ArgumentParser(
prog="Collect and write dbt arfifact schema"
)
parser.add_argument(
'--path',
type=Path,
help='The dir to write artifact schema',
)

parsed = parser.parse_args()
return cls(
path=parsed.path
)


def collect_artifact_schema(args: Arguments):
artifacts = [
FreshnessExecutionResultArtifact,
RunResultsArtifact,
CatalogArtifact,
WritableManifest
]
artifact_infos = []
for artifact_cls in artifacts:
artifact_infos.append(ArtifactInfo.from_artifact_cls(artifact_cls))

if args and args.path is not None:
for artifact_info in artifact_infos:
dest_dir = args.path.resolve()
artifact_info.write_schema(dest_dir)
else:
artifacts_dict = {
artifact_info.name: artifact_info.json_schema
for artifact_info in artifact_infos
}
print(json.dumps(artifacts_dict))


def main():
schemas = Schemas(
manifest=WritableManifest.json_schema(),
catalog=CatalogArtifact.json_schema(),
run_results=RunResultsArtifact.json_schema(),
freshness_results=FreshnessExecutionResultArtifact.json_schema(),
)
print(json.dumps(schemas.to_dict()))
parsed = Arguments.parse()
collect_artifact_schema(parsed)


if __name__ == '__main__':
Expand Down

0 comments on commit 344a144

Please sign in to comment.