From 8c99a48fa58fac091a639c2dd3771e1bba6e9446 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Mon, 7 Aug 2023 14:21:37 -0400 Subject: [PATCH] first pass: --sample --- core/dbt/adapters/base/relation.py | 8 +++++++- core/dbt/cli/main.py | 2 ++ core/dbt/cli/params.py | 8 ++++++++ core/dbt/context/providers.py | 10 +++++++--- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/core/dbt/adapters/base/relation.py b/core/dbt/adapters/base/relation.py index 67a50d9061f..d5132c6b8a1 100644 --- a/core/dbt/adapters/base/relation.py +++ b/core/dbt/adapters/base/relation.py @@ -1,6 +1,7 @@ from collections.abc import Hashable from dataclasses import dataclass, field from typing import Optional, TypeVar, Any, Type, Dict, Iterator, Tuple, Set, Union, FrozenSet +import uuid from dbt.contracts.graph.nodes import SourceDefinition, ManifestNode, ResultNode, ParsedNode from dbt.contracts.relation import ( @@ -36,6 +37,7 @@ class BaseRelation(FakeAPIObject, Hashable): include_policy: Policy = field(default_factory=lambda: Policy()) quote_policy: Policy = field(default_factory=lambda: Policy()) dbt_created: bool = False + sample: Optional[int] = None # register relation types that can be renamed for the purpose of replacing relations using stages and backups # adding a relation type here also requires defining the associated rename macro @@ -192,7 +194,11 @@ def _render_iterator(self) -> Iterator[Tuple[Optional[ComponentName], Optional[s def render(self) -> str: # if there is nothing set, this will return the empty string. - return ".".join(part for _, part in self._render_iterator() if part is not None) + rendered_parts = ".".join(part for _, part in self._render_iterator() if part is not None) + if self.sample and rendered_parts: + alias = f"_dbt_sample_{uuid.uuid4().hex.upper()[:6]}" + return f"(select * from {rendered_parts} limit {self.sample}) {alias}" + return rendered_parts def quoted(self, identifier): return "{quote_char}{identifier}{quote_char}".format( diff --git a/core/dbt/cli/main.py b/core/dbt/cli/main.py index 7d4560a7910..97a48ce4426 100644 --- a/core/dbt/cli/main.py +++ b/core/dbt/cli/main.py @@ -342,6 +342,7 @@ def docs_serve(ctx, **kwargs): @p.profile @p.profiles_dir @p.project_dir +@p.sample @p.select @p.selector @p.inline @@ -599,6 +600,7 @@ def parse(ctx, **kwargs): @p.profile @p.profiles_dir @p.project_dir +@p.sample @p.select @p.selector @p.state diff --git a/core/dbt/cli/params.py b/core/dbt/cli/params.py index 1898815a724..445e7995151 100644 --- a/core/dbt/cli/params.py +++ b/core/dbt/cli/params.py @@ -415,6 +415,14 @@ hidden=True, ) +sample = click.option( + "--sample", + envvar="DBT_SAMPLE", + help="Limit by sample rows when resolving dbt ref and sources.", + type=click.INT, + default=None, +) + model_decls = ("-m", "--models", "--model") select_decls = ("-s", "--select") select_attrs = { diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index febc21a546f..b7334118c39 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -531,9 +531,13 @@ def resolve( def create_relation(self, target_model: ManifestNode) -> RelationProxy: if target_model.is_ephemeral_model: self.model.set_cte(target_model.unique_id, None) - return self.Relation.create_ephemeral_from_node(self.config, target_model) + return self.Relation.create_ephemeral_from_node( + self.config, target_model, sample=self.config.args.sample + ) else: - return self.Relation.create_from(self.config, target_model) + return self.Relation.create_from( + self.config, target_model, sample=self.config.args.sample + ) def validate( self, @@ -590,7 +594,7 @@ def resolve(self, source_name: str, table_name: str): target_kind="source", disabled=(isinstance(target_source, Disabled)), ) - return self.Relation.create_from_source(target_source) + return self.Relation.create_from_source(target_source, sample=self.config.args.sample) # metric` implementations