Skip to content

Commit

Permalink
Add protos for bundle telemetry (#2209)
Browse files Browse the repository at this point in the history
## Changes
These types correspond to the telemetry protobufs defined in universe.

## Tests
No tests are needed since this PR only adds the type bindings.

---------

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
  • Loading branch information
shreyas-goenka and pietern authored Jan 29, 2025
1 parent 884b5f2 commit 30f57d3
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 0 deletions.
31 changes: 31 additions & 0 deletions libs/telemetry/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package telemetry

// RequestBody is the request body type bindings for the /telemetry-ext API endpoint.
type RequestBody struct {
// Timestamp in millis for when the log was uploaded.
UploadTime int64 `json:"uploadTime"`

// DO NOT USE. This is the legacy field for logging in usage logs (not lumberjack).
// We keep this around because the API endpoint works only if this field is serialized
// to an empty array.
Items []string `json:"items"`

// JSON encoded strings containing the proto logs. Since it's represented as a
// string here, the values here end up being double JSON encoded in the final
// request body.
//
// Any logs here will be logged in our lumberjack tables as long as a corresponding
// protobuf is defined in universe.
ProtoLogs []string `json:"protoLogs"`
}

// ResponseBody is the response body type bindings for the /telemetry-ext API endpoint.
type ResponseBody struct {
Errors []LogError `json:"errors"`
NumProtoSuccess int64 `json:"numProtoSuccess"`
}

type LogError struct {
Message string `json:"message"`
ErrorType string `json:"errorType"`
}
2 changes: 2 additions & 0 deletions libs/telemetry/protos/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The types in this package are equivalent to the lumberjack protos defined in Universe.
You can find all lumberjack protos for the Databricks CLI in the `proto/logs/frontend/databricks_cli` directory.
77 changes: 77 additions & 0 deletions libs/telemetry/protos/bundle_deploy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package protos

type BundleDeployEvent struct {
// UUID associated with the bundle itself. Set in the `bundle.uuid` field in the bundle configuration.
BundleUuid string `json:"bundle_uuid,omitempty"`

ResourceCount int64 `json:"resource_count,omitempty"`
ResourceJobCount int64 `json:"resource_job_count,omitempty"`
ResourcePipelineCount int64 `json:"resource_pipeline_count,omitempty"`
ResourceModelCount int64 `json:"resource_model_count,omitempty"`
ResourceExperimentCount int64 `json:"resource_experiment_count,omitempty"`
ResourceModelServingEndpointCount int64 `json:"resource_model_serving_endpoint_count,omitempty"`
ResourceRegisteredModelCount int64 `json:"resource_registered_model_count,omitempty"`
ResourceQualityMonitorCount int64 `json:"resource_quality_monitor_count,omitempty"`
ResourceSchemaCount int64 `json:"resource_schema_count,omitempty"`
ResourceVolumeCount int64 `json:"resource_volume_count,omitempty"`
ResourceClusterCount int64 `json:"resource_cluster_count,omitempty"`
ResourceDashboardCount int64 `json:"resource_dashboard_count,omitempty"`
ResourceAppCount int64 `json:"resource_app_count,omitempty"`

// IDs of resources managed by the bundle. Some resources like volumes or schemas
// do not expose a numerical or UUID identifier and are tracked by name. Those
// resources are not tracked here since the names are PII.
ResourceJobIDs []string `json:"resource_job_ids,omitempty"`
ResourcePipelineIDs []string `json:"resource_pipeline_ids,omitempty"`
ResourceClusterIDs []string `json:"resource_cluster_ids,omitempty"`
ResourceDashboardIDs []string `json:"resource_dashboard_ids,omitempty"`

Experimental *BundleDeployExperimental `json:"experimental,omitempty"`
}

// These metrics are experimental and are often added in an adhoc manner. There
// are no guarantees for these metrics and they maybe removed in the future without
// any notice.
type BundleDeployExperimental struct {
// Number of configuration files in the bundle.
ConfigurationFileCount int64 `json:"configuration_file_count,omitempty"`

// Size in bytes of the Terraform state file
TerraformStateSizeBytes int64 `json:"terraform_state_size_bytes,omitempty"`

// Number of variables in the bundle
VariableCount int64 `json:"variable_count,omitempty"`
ComplexVariableCount int64 `json:"complex_variable_count,omitempty"`
LookupVariableCount int64 `json:"lookup_variable_count,omitempty"`

// Number of targets in the bundle
TargetCount int64 `json:"target_count,omitempty"`

// Whether a field is set or not. If a configuration field is not present in this
// map then it is not tracked by this field.
// Keys are the full path of the field in the configuration tree.
// Examples: "bundle.terraform.exec_path", "bundle.git.branch" etc.
SetFields []BoolMapEntry `json:"set_fields,omitempty"`

// Values for boolean configuration fields like `experimental.python_wheel_wrapper`
// We don't need to define protos to track boolean values and can simply write those
// values to this map to track them.
BoolValues []BoolMapEntry `json:"bool_values,omitempty"`

BundleMode BundleMode `json:"bundle_mode,omitempty"`

WorkspaceArtifactPathType BundleDeployArtifactPathType `json:"workspace_artifact_path_type,omitempty"`

// Execution time per mutator for a selected subset of mutators.
BundleMutatorExecutionTimeMs []IntMapEntry `json:"bundle_mutator_execution_time_ms,omitempty"`
}

type BoolMapEntry struct {
Key string `json:"key,omitempty"`
Value bool `json:"value,omitempty"`
}

type IntMapEntry struct {
Key string `json:"key,omitempty"`
Value int64 `json:"value,omitempty"`
}
37 changes: 37 additions & 0 deletions libs/telemetry/protos/bundle_init.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package protos

type BundleInitEvent struct {
// UUID associated with the DAB itself. This is serialized into the DAB
// when a user runs `databricks bundle init` and all subsequent deployments of
// that DAB can then be associated with this init event.
BundleUuid string `json:"bundle_uuid,omitempty"`

// Name of the template initialized when the user ran `databricks bundle init`
// This is only populated when the template is a first party template like
// mlops-stacks or default-python.
TemplateName string `json:"template_name,omitempty"`

// Arguments used by the user to initialize the template. Only enum
// values will be set here by the Databricks CLI.
//
// We use a generic map representation here because a bundle template's args are
// managed in the template itself and maintaining a copy typed schema for it here
// will be untenable in the long term.
TemplateEnumArgs []BundleInitTemplateEnumArg `json:"template_enum_args,omitempty"`
}

type BundleInitTemplateEnumArg struct {
// Valid key values for the template. These correspond to the keys specified in
// the "properties" section of the `databricks_template_schema.json` file.
//
// Note: `databricks_template_schema.json` contains a JSON schema type specification
// for the arguments that the template accepts.
Key string `json:"key"`

// Value that the user set for the field. This is only populated for properties
// that have the "enum" field specified in the JSON schema type specification.
//
// The Databricks CLI ensures that the value here is one of the "enum" values from
// the template specification.
Value string `json:"value"`
}
35 changes: 35 additions & 0 deletions libs/telemetry/protos/databricks_cli_log.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package protos

type ExecutionContext struct {
// UUID generated by the CLI for every CLI command run. This is also set in the HTTP user
// agent under the key "cmd-exec-id" and can be used to correlate frontend_log table
// with the http_access_log table.
CmdExecID string `json:"cmd_exec_id,omitempty"`

// Version of the Databricks CLI used.
Version string `json:"version,omitempty"`

// Command that was run by the user. Eg: bundle_deploy, fs_cp etc.
Command string `json:"command,omitempty"`

// Lowercase string name for the operating system. Same value
// as the one set in `runtime.GOOS` in Golang.
OperatingSystem string `json:"operating_system,omitempty"`

// Version of DBR from which CLI is being run.
// Only set when the CLI is being run from a Databricks cluster.
DbrVersion string `json:"dbr_version,omitempty"`

// If true, the CLI is being run from a Databricks notebook / cluster web terminal.
FromWebTerminal bool `json:"from_web_terminal,omitempty"`

// Time taken for the CLI command to execute.
ExecutionTimeMs int64 `json:"execution_time_ms,omitempty"`

// Exit code of the CLI command.
ExitCode int64 `json:"exit_code,omitempty"`
}

type CliTestEvent struct {
Name DummyCliEnum `json:"name,omitempty"`
}
26 changes: 26 additions & 0 deletions libs/telemetry/protos/enum.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package protos

type DummyCliEnum string

const (
DummyCliEnumUnspecified DummyCliEnum = "DUMMY_CLI_ENUM_UNSPECIFIED"
DummyCliEnumValue1 DummyCliEnum = "VALUE1"
DummyCliEnumValue2 DummyCliEnum = "VALUE2"
DummyCliEnumValue3 DummyCliEnum = "VALUE3"
)

type BundleMode string

const (
BundleModeUnspecified BundleMode = "TYPE_UNSPECIFIED"
BundleModeDevelopment BundleMode = "DEVELOPMENT"
BundleModeProduction BundleMode = "PRODUCTION"
)

type BundleDeployArtifactPathType string

const (
BundleDeployArtifactPathTypeUnspecified BundleDeployArtifactPathType = "TYPE_UNSPECIFIED"
BundleDeployArtifactPathTypeWorkspace BundleDeployArtifactPathType = "WORKSPACE_FILE_SYSTEM"
BundleDeployArtifactPathTypeVolume BundleDeployArtifactPathType = "UC_VOLUME"
)
22 changes: 22 additions & 0 deletions libs/telemetry/protos/frontend_log.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package protos

// This corresponds to the FrontendLog lumberjack proto in universe.
// FrontendLog is the top-level struct for any client-side logs at Databricks.
type FrontendLog struct {
// A UUID for the log event generated from the CLI.
FrontendLogEventID string `json:"frontend_log_event_id,omitempty"`

Entry FrontendLogEntry `json:"entry,omitempty"`
}

type FrontendLogEntry struct {
DatabricksCliLog DatabricksCliLog `json:"databricks_cli_log,omitempty"`
}

type DatabricksCliLog struct {
ExecutionContext *ExecutionContext `json:"execution_context,omitempty"`

CliTestEvent *CliTestEvent `json:"cli_test_event,omitempty"`
BundleInitEvent *BundleInitEvent `json:"bundle_init_event,omitempty"`
BundleDeployEvent *BundleDeployEvent `json:"bundle_deploy_event,omitempty"`
}

0 comments on commit 30f57d3

Please sign in to comment.