diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index f6085fbd9ff..cc64cdc669a 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -462,6 +462,12 @@ const ( // Default value: 0 // Allowed filters: N/A FrontendErrorInjectionRate + // FrontendEmitSignalNameMetricsTag enables emitting signal name tag in metrics in frontend client + // KeyName: frontend.emitSignalNameMetricsTag + // Value type: Bool + // Default value: false + // Allowed filters: DomainName + FrontendEmitSignalNameMetricsTag // key for matching @@ -2027,7 +2033,7 @@ var Keys = map[Key]string{ DomainFailoverRefreshInterval: "frontend.domainFailoverRefreshInterval", DomainFailoverRefreshTimerJitterCoefficient: "frontend.domainFailoverRefreshTimerJitterCoefficient", FrontendErrorInjectionRate: "frontend.errorInjectionRate", - + FrontendEmitSignalNameMetricsTag: "frontend.emitSignalNameMetricsTag", // matching settings MatchingRPS: "matching.rps", MatchingPersistenceMaxQPS: "matching.persistenceMaxQPS", diff --git a/common/metrics/context.go b/common/metrics/context.go index b7144d21b30..a2696fb0b65 100644 --- a/common/metrics/context.go +++ b/common/metrics/context.go @@ -35,6 +35,15 @@ func TagContext(ctx context.Context, tag Tag) context.Context { return context.WithValue(ctx, contextTagsKey, tags) } +func TagContexts(ctx context.Context, ctxTags ...Tag) context.Context { + tags, ok := ctx.Value(contextTagsKey).([]Tag) + if !ok { + tags = []Tag{} + } + tags = append(tags, ctxTags...) + return context.WithValue(ctx, contextTagsKey, tags) +} + func GetContextTags(ctx context.Context) []Tag { tags, ok := ctx.Value(contextTagsKey).([]Tag) if !ok { diff --git a/common/metrics/context_test.go b/common/metrics/context_test.go index dd7831f5771..e9d89b5b913 100644 --- a/common/metrics/context_test.go +++ b/common/metrics/context_test.go @@ -38,4 +38,9 @@ func TestContextTags(t *testing.T) { tag2 := ThriftTransportTag() ctx = TagContext(ctx, tag2) assert.Equal(t, []Tag{tag1, tag2}, GetContextTags(ctx)) + + ctx1 := context.Background() + ctx1 = TagContexts(ctx1, tag1, tag2) + assert.Contains(t, GetContextTags(ctx1), tag1) + assert.Contains(t, GetContextTags(ctx1), tag2) } diff --git a/common/metrics/scope.go b/common/metrics/scope.go index 1a280ec1dba..3efdf776a72 100644 --- a/common/metrics/scope.go +++ b/common/metrics/scope.go @@ -83,7 +83,7 @@ func (m *metricsScope) StartTimer(id int) Stopwatch { case !def.metricRollupName.Empty(): return NewStopwatch(timer, m.rootScope.Timer(def.metricRollupName.String())) case m.isDomainTagged: - timerAll := m.scope.Tagged(map[string]string{domain: domainAllValue}).Timer(def.metricName.String()) + timerAll := m.scope.Tagged(map[string]string{domain: allValue}).Timer(def.metricName.String()) return NewStopwatch(timer, timerAll) default: return NewStopwatch(timer) @@ -99,7 +99,7 @@ func (m *metricsScope) RecordTimer(id int, d time.Duration) { case m.isDomainTagged: // N.B. - Dual emit here so that we can see aggregate timer stats across all // domains along with the individual domains stats - m.scope.Tagged(map[string]string{domain: domainAllValue}).Timer(def.metricName.String()).Record(d) + m.scope.Tagged(map[string]string{domain: allValue}).Timer(def.metricName.String()).Record(d) } } @@ -139,5 +139,5 @@ func (m *metricsScope) getBuckets(id int) tally.Buckets { } func isDomainTagged(tag Tag) bool { - return tag.Key() == domain && tag.Value() != domainAllValue + return tag.Key() == domain && tag.Value() != allValue } diff --git a/common/metrics/tags.go b/common/metrics/tags.go index aaca5b4999c..ea0419a1a2a 100644 --- a/common/metrics/tags.go +++ b/common/metrics/tags.go @@ -45,8 +45,8 @@ const ( transport = "transport" signalName = "signalName" - domainAllValue = "all" - unknownValue = "_unknown_" + allValue = "all" + unknownValue = "_unknown_" transportThrift = "thrift" transportGRPC = "grpc" @@ -159,3 +159,8 @@ func GPRCTransportTag() Tag { func SignalNameTag(value string) Tag { return metricWithUnknown(signalName, value) } + +// SignalNameAllTag returns a new SignalName tag with all value +func SignalNameAllTag() Tag { + return metricWithUnknown(signalName, allValue) +} diff --git a/service/frontend/service.go b/service/frontend/service.go index 9c708bf3686..f0974d3af65 100644 --- a/service/frontend/service.go +++ b/service/frontend/service.go @@ -101,6 +101,11 @@ type Config struct { // max number of decisions per RespondDecisionTaskCompleted request (unlimited by default) DecisionResultCountLimit dynamicconfig.IntPropertyFnWithDomainFilter + + //Debugging + + // Emit signal related metrics with signal name tag. Be aware of cardinality. + EmitSignalNameMetricsTag dynamicconfig.BoolPropertyFnWithDomainFilter } // NewConfig returns new service config with default values @@ -149,6 +154,7 @@ func NewConfig(dc *dynamicconfig.Collection, numHistoryShards int, enableReadFro DisallowQuery: dc.GetBoolPropertyFilteredByDomain(dynamicconfig.DisallowQuery, false), SendRawWorkflowHistory: dc.GetBoolPropertyFilteredByDomain(dynamicconfig.SendRawWorkflowHistory, sendRawWorkflowHistory), DecisionResultCountLimit: dc.GetIntPropertyFilteredByDomain(dynamicconfig.FrontendDecisionResultCountLimit, 0), + EmitSignalNameMetricsTag: dc.GetBoolPropertyFilteredByDomain(dynamicconfig.FrontendEmitSignalNameMetricsTag, false), domainConfig: domain.Config{ MaxBadBinaryCount: dc.GetIntPropertyFilteredByDomain(dynamicconfig.FrontendMaxBadBinaries, domain.MaxBadBinaries), MinRetentionDays: dc.GetIntProperty(dynamicconfig.MinRetentionDays, domain.DefaultMinWorkflowRetentionInDays), diff --git a/service/frontend/workflowHandler.go b/service/frontend/workflowHandler.go index 8a69a4b091a..f7180510910 100644 --- a/service/frontend/workflowHandler.go +++ b/service/frontend/workflowHandler.go @@ -2273,8 +2273,15 @@ func (wh *WorkflowHandler) GetWorkflowExecutionHistory( }, nil } -func withSignalName(ctx context.Context, signalName string) context.Context { - return metrics.TagContext(ctx, metrics.SignalNameTag(signalName)) +func (wh *WorkflowHandler) withSignalName( + ctx context.Context, + domainName string, + signalName string, +) context.Context { + if wh.config.EmitSignalNameMetricsTag(domainName) { + return metrics.TagContext(ctx, metrics.SignalNameTag(signalName)) + } + return ctx } // SignalWorkflowExecution is used to send a signal event to running workflow execution. This results in @@ -2285,7 +2292,7 @@ func (wh *WorkflowHandler) SignalWorkflowExecution( ) (retError error) { defer log.CapturePanic(wh.GetLogger(), &retError) - ctx = withSignalName(ctx, signalRequest.GetSignalName()) + ctx = wh.withSignalName(ctx, signalRequest.GetDomain(), signalRequest.GetSignalName()) scope, sw := wh.startRequestProfileWithDomain(ctx, metrics.FrontendSignalWorkflowExecutionScope, signalRequest) defer sw.Stop() diff --git a/service/frontend/workflowHandler_test.go b/service/frontend/workflowHandler_test.go index 8d79232054a..9bf5a3fd22b 100644 --- a/service/frontend/workflowHandler_test.go +++ b/service/frontend/workflowHandler_test.go @@ -1459,7 +1459,7 @@ func (s *workflowHandlerSuite) TestSignalMetricHasSignalName() { } func (s *workflowHandlerSuite) newConfig(dynamicClient dc.Client) *Config { - return NewConfig( + config := NewConfig( dc.NewCollection( dynamicClient, s.mockResource.GetLogger(), @@ -1468,6 +1468,8 @@ func (s *workflowHandlerSuite) newConfig(dynamicClient dc.Client) *Config { false, false, ) + config.EmitSignalNameMetricsTag = dc.GetBoolPropertyFnFilteredByDomain(true) + return config } func updateRequest(