Skip to content

Commit

Permalink
feat: collect slow queries in monitoring mode
Browse files Browse the repository at this point in the history
  • Loading branch information
zyy17 committed Oct 25, 2024
1 parent 5e43736 commit d9feb0d
Show file tree
Hide file tree
Showing 15 changed files with 429 additions and 9 deletions.
22 changes: 22 additions & 0 deletions apis/v1alpha1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,28 @@ type LoggingSpec struct {
// +optional
// +kubebuilder:validation:Enum:={"json", "text"}
Format LogFormat `json:"format,omitempty"`

// SlowQuery is the slow query configuration.
// +optional
SlowQuery *SlowQuery `json:"slowQuery,omitempty"`
}

// SlowQuery defines the slow query configuration. It only works for the datanode component.
type SlowQuery struct {
// Enabled indicates whether the slow query is enabled.
// +required
Enabled bool `json:"enabled"`

// Threshold is the threshold of the slow query. Default to `10s`.
// +optional
// +kubebuilder:validation:Pattern="^([0-9]+(\\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$"
Threshold string `json:"threshold,omitempty"`

// SampleRatio is the sampling ratio of slow query log. The value should be in the range of (0, 1]. Default to `1.0`.
// +optional
// +kubebuilder:validation:Pattern=`^(0?\.\d+|1(\.0+)?)$`
// +kubebuilder:validation:Type=string
SampleRatio string `json:"sampleRatio,omitempty"`
}

func (in *LoggingSpec) GetLevel() LoggingLevel {
Expand Down
7 changes: 7 additions & 0 deletions apis/v1alpha1/defaulting.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,13 @@ func (in *GreptimeDBCluster) defaultSpec() *GreptimeDBClusterSpec {

// Set the default logging format to JSON if monitoring is enabled.
defaultSpec.Logging.Format = LogFormatJSON

// Turn on the slow query log by default if monitoring is enabled.
defaultSpec.Logging.SlowQuery = &SlowQuery{
Enabled: true,
Threshold: "10s",
SampleRatio: "1.0",
}
}

return defaultSpec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ spec:
logsDir: /data/greptimedb/logs
onlyLogToStdout: false
persistentWithData: false
slowQuery:
enabled: true
sampleRatio: "1.0"
threshold: 10s
frontend:
replicas: 1
httpPort: 4000
Expand Down
20 changes: 20 additions & 0 deletions apis/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

78 changes: 78 additions & 0 deletions config/crd/resources/greptime.io_greptimedbclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2841,6 +2841,19 @@ spec:
type: boolean
persistentWithData:
type: boolean
slowQuery:
properties:
enabled:
type: boolean
sampleRatio:
pattern: ^(0?\.\d+|1(\.0+)?)$
type: string
threshold:
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
required:
- enabled
type: object
type: object
replicas:
format: int32
Expand Down Expand Up @@ -5660,6 +5673,19 @@ spec:
type: boolean
persistentWithData:
type: boolean
slowQuery:
properties:
enabled:
type: boolean
sampleRatio:
pattern: ^(0?\.\d+|1(\.0+)?)$
type: string
threshold:
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
required:
- enabled
type: object
type: object
replicas:
format: int32
Expand Down Expand Up @@ -8462,6 +8488,19 @@ spec:
type: boolean
persistentWithData:
type: boolean
slowQuery:
properties:
enabled:
type: boolean
sampleRatio:
pattern: ^(0?\.\d+|1(\.0+)?)$
type: string
threshold:
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
required:
- enabled
type: object
type: object
mysqlPort:
format: int32
Expand Down Expand Up @@ -11297,6 +11336,19 @@ spec:
type: boolean
persistentWithData:
type: boolean
slowQuery:
properties:
enabled:
type: boolean
sampleRatio:
pattern: ^(0?\.\d+|1(\.0+)?)$
type: string
threshold:
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
required:
- enabled
type: object
type: object
meta:
properties:
Expand Down Expand Up @@ -11335,6 +11387,19 @@ spec:
type: boolean
persistentWithData:
type: boolean
slowQuery:
properties:
enabled:
type: boolean
sampleRatio:
pattern: ^(0?\.\d+|1(\.0+)?)$
type: string
threshold:
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
required:
- enabled
type: object
type: object
replicas:
format: int32
Expand Down Expand Up @@ -16942,6 +17007,19 @@ spec:
type: boolean
persistentWithData:
type: boolean
slowQuery:
properties:
enabled:
type: boolean
sampleRatio:
pattern: ^(0?\.\d+|1(\.0+)?)$
type: string
threshold:
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
required:
- enabled
type: object
type: object
mysqlPort:
format: int32
Expand Down
13 changes: 13 additions & 0 deletions config/crd/resources/greptime.io_greptimedbstandalones.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2854,6 +2854,19 @@ spec:
type: boolean
persistentWithData:
type: boolean
slowQuery:
properties:
enabled:
type: boolean
sampleRatio:
pattern: ^(0?\.\d+|1(\.0+)?)$
type: string
threshold:
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
required:
- enabled
type: object
type: object
mysqlPort:
format: int32
Expand Down
5 changes: 4 additions & 1 deletion controllers/constant/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ const (
DefaultTLSMode = "prefer"

// LogsTableName is the table name of storing greptimedb logs.
LogsTableName = "gtlogs"
LogsTableName = "_gt_logs"

// SlowQueriesTableName is the table name of storing slow queries.
SlowQueriesTableName = "_gt_slow_queries"

// DefaultVectorConfigName is the default name of vector config.
DefaultVectorConfigName = "vector-config"
Expand Down
11 changes: 6 additions & 5 deletions controllers/greptimedbcluster/deployers/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,12 @@ func (c *CommonBuilder) GenerateVectorConfigMap() (*corev1.ConfigMap, error) {
standaloneName := common.ResourceName(c.Cluster.Name+"-monitor", v1alpha1.StandaloneKind)
svc := fmt.Sprintf("%s.%s.svc.cluster.local", standaloneName, c.Cluster.Namespace)
vars := map[string]string{
"ClusterName": c.Cluster.Name,
"LogsTableName": constant.LogsTableName,
"PipelineName": common.LogsPipelineName(c.Cluster.Namespace, c.Cluster.Name),
"LoggingService": fmt.Sprintf("http://%s:%d", svc, v1alpha1.DefaultHTTPPort),
"MetricService": fmt.Sprintf("http://%s:%d/v1/prometheus/write?db=public", svc, v1alpha1.DefaultHTTPPort),
"ClusterName": c.Cluster.Name,
"LogsTableName": constant.LogsTableName,
"SlowQueriesTableName": constant.SlowQueriesTableName,
"PipelineName": common.LogsPipelineName(c.Cluster.Namespace, c.Cluster.Name),
"LoggingService": fmt.Sprintf("http://%s:%d", svc, v1alpha1.DefaultHTTPPort),
"MetricService": fmt.Sprintf("http://%s:%d/v1/prometheus/write?db=public", svc, v1alpha1.DefaultHTTPPort),
}

vectorConfigTemplate, err := c.vectorConfigTemplate()
Expand Down
3 changes: 3 additions & 0 deletions controllers/greptimedbcluster/deployers/config/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ transform:
- namespace
- cluster
- role
- cost
- threshold
type: string
index: tag
- fields:
- message
- query
- err
type: string
index: fulltext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@ sources:
type: file
data_dir: /logs
include:
- /logs/*.*
- /logs/greptimedb-err.*
- /logs/greptimedb.*
max_read_bytes: 536870912

slow_queries:
type: file
data_dir: /logs
include:
- /logs/greptimedb-slow-queries.*
max_read_bytes: 536870912

metrics:
Expand Down Expand Up @@ -40,6 +48,31 @@ transforms:
.err = .fields.err
}
transform_slow_queries:
type: remap
inputs:
- slow_queries
source: |
. = parse_json!(.message)
.message = .fields.message
.pod = "${POD_NAME}"
.pod_ip = "${POD_IP}"
.namespace = "${POD_NAMESPACE}"
.cluster = "{{ .ClusterName }}"
.role = "${ROLE}"
if exists(.fields.cost) {
.cost = .fields.cost
}
if exists(.fields.threshold) {
.threshold = .fields.threshold
}
if exists(.fields.promql) {
.query = .fields.promql
}
if exists(.fields.sql) {
.query = .fields.sql
}
add_metrics_labels:
type: remap
inputs:
Expand All @@ -59,6 +92,15 @@ sinks:
- transform_logs
endpoint: {{ .LoggingService }}

sink_greptimedb_slow_queries:
type: greptimedb_logs
table: {{ .SlowQueriesTableName }}
pipeline_name: {{ .PipelineName }}
compression: gzip
inputs:
- transform_slow_queries
endpoint: {{ .LoggingService }}

sink_greptimedb_metrics:
type: prometheus_remote_write
inputs:
Expand Down
19 changes: 19 additions & 0 deletions docs/api-references/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ _Appears in:_
| `persistentWithData` _boolean_ | PersistentWithData indicates whether to persist the log with the datanode data storage. It **ONLY** works for the datanode component.<br />If false, the log will be stored in ephemeral storage. | | |
| `onlyLogToStdout` _boolean_ | OnlyLogToStdout indicates whether to only log to stdout. If true, the log will not be stored in the storage even if the storage is configured. | | |
| `format` _[LogFormat](#logformat)_ | Format is the format of the logging. | | Enum: [json text] <br /> |
| `slowQuery` _[SlowQuery](#slowquery)_ | SlowQuery is the slow query configuration. | | |


#### LogsCollectionSpec
Expand Down Expand Up @@ -852,6 +853,24 @@ _Appears in:_
| `volumes` _[Volume](https://kubernetes.io/docs/reference/generated/kubernetes-api/v/#volume-v1-core) array_ | List of volumes that can be mounted by containers belonging to the pod. | | |


#### SlowQuery



SlowQuery defines the slow query configuration. It only works for the datanode component.



_Appears in:_
- [LoggingSpec](#loggingspec)

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `enabled` _boolean_ | Enabled indicates whether the slow query is enabled. | | |
| `threshold` _string_ | Threshold is the threshold of the slow query. Default to `10s`. | | Pattern: `^([0-9]+(\.[0-9]+)?(ns\|us\|µs\|ms\|s\|m\|h))+$` <br /> |
| `sampleRatio` _string_ | SampleRatio is the sampling ratio of slow query log. The value should be in the range of (0, 1]. Default to `1.0`. | | Pattern: `^(0?\.\d+\|1(\.0+)?)$` <br />Type: string <br /> |


#### StorageRetainPolicyType

_Underlying type:_ _string_
Expand Down
Loading

0 comments on commit d9feb0d

Please sign in to comment.