Skip to content

Commit

Permalink
Added proto definitions for Data Preparations (#1788)
Browse files Browse the repository at this point in the history
* Added proto definitions for Data Preparations

* Moved Data preparation protos into a separate file
  • Loading branch information
fernst authored Jul 18, 2024
1 parent 97123fa commit 6635abc
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 1 deletion.
3 changes: 2 additions & 1 deletion protos/BUILD
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
load("@rules_proto//proto:defs.bzl", "proto_library")
load("//tools:ts_proto_library.bzl", "ts_proto_library")
load("//testing:build_test.bzl", "build_test")
load("//tools:ts_proto_library.bzl", "ts_proto_library")

package(default_visibility = ["//visibility:public"])

Expand All @@ -9,6 +9,7 @@ proto_library(
srcs = [
"configs.proto",
"core.proto",
"data_preparation.proto",
"evaluation.proto",
"execution.proto",
"profiles.proto",
Expand Down
27 changes: 27 additions & 0 deletions protos/configs.proto
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,32 @@ message ActionConfig {
// TODO(ekrekr): add a notebook runtime field definition.
}

message DataPreparationConfig {
// The name of the data preparation.
string name = 1;

// Targets of actions that this action is dependent on.
repeated Target dependency_targets = 2;

// Path to the source file that the contents of the action is loaded from.
string filename = 3;

// A list of user-defined tags with which the action should be labeled.
repeated string tags = 4;

// If set to true, this action will not be executed. However, the action can
// still be depended upon. Useful for temporarily turning off broken
// actions.
bool disabled = 7;

// Description of the data preparation.
string description = 8;

// When set to true, assertions dependent upon any dependency will
// be add as dedpendency to this action
bool depend_on_dependency_assertions = 9;
}

oneof action {
TableConfig table = 1;
ViewConfig view = 2;
Expand All @@ -523,6 +549,7 @@ message ActionConfig {
OperationConfig operation = 5;
DeclarationConfig declaration = 6;
NotebookConfig notebook = 7;
DataPreparationConfig data_preparation = 8;
}
}

Expand Down
20 changes: 20 additions & 0 deletions protos/core.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
syntax = "proto3";
import "data_preparation.proto";

package dataform;

Expand Down Expand Up @@ -247,6 +248,24 @@ message NotebookRuntimeOptions {
}
}

// Data Preparation Related entries
message DataPreparation {
// Data preparatiohs can have more than 1 output
repeated Target targets = 1;

repeated Target canonical_targets = 2;

repeated string tags = 3;

repeated Target dependency_targets = 4;

string file_name = 5;

bool disabled = 6;

DataPreparationDefinition data_preparation = 7;
}

message CompiledGraph {
ProjectConfig project_config = 4;

Expand All @@ -258,6 +277,7 @@ message CompiledGraph {
// it is not used at runtime.
repeated Test tests = 8;
repeated Notebook notebooks = 12;
repeated DataPreparation data_preparations = 13;

GraphErrors graph_errors = 7;

Expand Down
174 changes: 174 additions & 0 deletions protos/data_preparation.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
syntax = "proto3";

package dataform;

option java_package = "com.dataform.protos";
option java_multiple_files = true;

option go_package = "github.com/dataform-co/dataform/protos/dataform";

message DataPreparationDefinition {
repeated DataPreparationNode nodes = 1;
DataPreparationGenerated generated = 2;
}

message DataPreparationNode {
string id = 1;
DataPreparationNodeSource source = 2;
repeated DataPreparationNodeStep steps = 3;
DataPreparationNodeGenerated generated = 4;
// Destination BigQuery table(s) are defined within the data preparation
optional DataPreparationNodeDestination destination = 5;
}

message DataPreparationNodeSource {
oneof source {
string node_id = 1;
TableReference table = 2;
Join join = 3;
}
}

message DataPreparationNodeDestination {
oneof destination {
TableReference table = 1;
}
}

message DataPreparationNodeStep {
string id = 1;
string description = 2;
oneof definition {
ColumnStep column_step = 3;
FilterStep filter_step = 4;
}
DataPreparationNodeStepGenerated generated = 5;
}

message ColumnStep {
string column_name = 1;
Expression expression = 2;
}

message FilterStep {
Expression expression = 1;
}

message Expression {
oneof expression {
string sql = 1;
}
}

message Join {
string left_node_id = 1;
string right_node_id = 2;
JoinType join_type = 3;
JoinCondition join_condition = 4;
}

enum JoinType {
JOIN_TYPE_UNSPECIFIED = 0;
JOIN_TYPE_INNER = 1;
JOIN_TYPE_FULL_OUTER = 2;
JOIN_TYPE_LEFT = 3;
JOIN_TYPE_RIGHT = 4;
}

message JoinCondition {
oneof condition {
Expression expression = 1;
JoinKeys keys = 2;
}
}

message JoinKeys {
repeated JoinKey keys = 1;
}

message JoinKey {
string left_column = 1;
string right_column = 2;
}

message TableReference {
string project = 1;
string dataset = 2;
string table = 3;
}

message DataPreparationGenerated {
repeated DataPreparationValidationError validation_errors = 1;
optional string location = 2;
}

message DataPreparationNodeGenerated {
repeated DataPreparationSection sections = 1;
repeated string sources = 2;
repeated DataPreparationValidationError validation_errors = 3;
optional DataPreparationSchema output_schema = 4;
DataPreparationNodeSourceGenerated source_generated = 5;
optional DataPreparationNodeDestinationGenerated destination_generated = 6;
}

message DataPreparationSection {
DataPreparationSectionType type = 1;
string label = 2;
}

enum DataPreparationSectionType {
SECTION_TYPE_UNSPECIFIED = 0;
SECTION_UNPARSEABLE = 1;
SECTION_SOURCE_TABLE = 2;
SECTION_SQL = 3;
SECTION_DESTINATION_TABLE = 4;
}

message DataPreparationNodeSourceGenerated {
optional DataPreparationNodeSourceSourceSchema source_schema = 4;
}

message DataPreparationNodeSourceSourceSchema {
oneof source_schema {
DataPreparationSchema node_schema = 1;
DataPreparationSchema table_schema = 2;
JoinSchema join_schema = 3;
}
}

message JoinSchema {
DataPreparationSchema left_schema = 1;
DataPreparationSchema right_schema = 2;
}

message DataPreparationNodeDestinationGenerated {
optional DataPreparationSchema schema = 1;
}

message DataPreparationNodeStepGenerated {
repeated string source_columns = 1;
repeated DataPreparationValidationError validation_errors = 2;
}

message DataPreparationSchema {
repeated DataPreparationSchemaField field = 1;
}

message DataPreparationSchemaField {
string name = 1;
optional string type = 2;
optional string mode = 3;
repeated DataPreparationSchemaField fields = 4;
}

message DataPreparationValidationError {
DataPreparationValidationErrorLevel level = 1;
string description = 2;
}

enum DataPreparationValidationErrorLevel {
LEVEL_UNSPECIFIED = 0;
LEVEL_WARN = 1;
LEVEL_ERROR = 2;
LEVEL_FATAL = 3;
}

0 comments on commit 6635abc

Please sign in to comment.