-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Expose broker metrics with go-metrics #701
Changes from 4 commits
f8d8342
7340cab
f9642ad
b7f401f
3ea3cb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,9 @@ package sarama | |
import ( | ||
"fmt" | ||
"testing" | ||
"time" | ||
|
||
"github.com/rcrowley/go-metrics" | ||
) | ||
|
||
func ExampleBroker() { | ||
|
@@ -52,36 +55,45 @@ func TestBrokerAccessors(t *testing.T) { | |
} | ||
|
||
func TestSimpleBrokerCommunication(t *testing.T) { | ||
mb := NewMockBroker(t, 0) | ||
defer mb.Close() | ||
|
||
broker := NewBroker(mb.Addr()) | ||
conf := NewConfig() | ||
conf.Version = V0_10_0_0 | ||
err := broker.Open(conf) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
for _, tt := range brokerTestTable { | ||
Logger.Printf("Testing broker communication for %s", tt.name) | ||
mb := NewMockBroker(t, 0) | ||
mb.Returns(&mockEncoder{tt.response}) | ||
} | ||
for _, tt := range brokerTestTable { | ||
broker := NewBroker(mb.Addr()) | ||
// Set the broker id in order to validate local broker metrics | ||
broker.id = 0 | ||
conf := NewConfig() | ||
conf.Version = V0_10_0_0 | ||
// Use a new registry every time to prevent side effect caused by the global one | ||
conf.MetricRegistry = metrics.NewRegistry() | ||
err := broker.Open(conf) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
tt.runner(t, broker) | ||
err = broker.Close() | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
// Wait up to 500 ms for the remote broker to process requests | ||
// in order to have consistent metrics | ||
if err := mb.WaitForExpectations(500 * time.Millisecond); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure why this is necessary, isn't the request guaranteed to be fully processed by the time There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I use a dedicated But I found a race condition that often manifests itself for unacknowledged produce request (i.e. On a regular produce request, the runner would block on reading the response and by then it is very likely that the My first fix was to add a magic 100 ms sleep for the I guess it would be good to document those scenarios better on both There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The race condition for unacknowledged produce requests is a very nice catch but I'm not sure I understand how it can possibly manifest for any other requests? The runner will not return in those cases until the mock broker has actually responded, which means it has to have consumed the expectation? If I am understanding the other issue properly, it is because the mockbroker updates its metrics after sending the response, so the test can get 0 metrics if it gets run in that gap? Would not the simpler fix be to block There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, it should not happen and I remembered that corner case while retracing my commits because I got bit by it when reusing original |
||
t.Error(err) | ||
} | ||
mb.Close() | ||
validateBrokerMetrics(t, broker, mb) | ||
} | ||
|
||
err = broker.Close() | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
} | ||
|
||
// We're not testing encoding/decoding here, so most of the requests/responses will be empty for simplicity's sake | ||
var brokerTestTable = []struct { | ||
name string | ||
response []byte | ||
runner func(*testing.T, *Broker) | ||
}{ | ||
{[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
{"MetadataRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := MetadataRequest{} | ||
response, err := broker.GetMetadata(&request) | ||
|
@@ -93,7 +105,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 't', 0x00, 0x00, 0x00, 0x00}, | ||
{"ConsumerMetadataRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 't', 0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := ConsumerMetadataRequest{} | ||
response, err := broker.GetConsumerMetadata(&request) | ||
|
@@ -105,7 +118,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{}, | ||
{"ProduceRequest (NoResponse)", | ||
[]byte{}, | ||
func(t *testing.T, broker *Broker) { | ||
request := ProduceRequest{} | ||
request.RequiredAcks = NoResponse | ||
|
@@ -118,7 +132,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00}, | ||
{"ProduceRequest (WaitForLocal)", | ||
[]byte{0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := ProduceRequest{} | ||
request.RequiredAcks = WaitForLocal | ||
|
@@ -131,7 +146,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00}, | ||
{"FetchRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := FetchRequest{} | ||
response, err := broker.Fetch(&request) | ||
|
@@ -143,7 +159,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00}, | ||
{"OffsetFetchRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := OffsetFetchRequest{} | ||
response, err := broker.FetchOffset(&request) | ||
|
@@ -155,7 +172,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00}, | ||
{"OffsetCommitRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := OffsetCommitRequest{} | ||
response, err := broker.CommitOffset(&request) | ||
|
@@ -167,7 +185,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00}, | ||
{"OffsetRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := OffsetRequest{} | ||
response, err := broker.GetAvailableOffsets(&request) | ||
|
@@ -179,7 +198,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
{"JoinGroupRequest", | ||
[]byte{0x00, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := JoinGroupRequest{} | ||
response, err := broker.JoinGroup(&request) | ||
|
@@ -191,7 +211,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
{"SyncGroupRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := SyncGroupRequest{} | ||
response, err := broker.SyncGroup(&request) | ||
|
@@ -203,7 +224,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00}, | ||
{"LeaveGroupRequest", | ||
[]byte{0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := LeaveGroupRequest{} | ||
response, err := broker.LeaveGroup(&request) | ||
|
@@ -215,7 +237,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00}, | ||
{"HeartbeatRequest", | ||
[]byte{0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := HeartbeatRequest{} | ||
response, err := broker.Heartbeat(&request) | ||
|
@@ -227,7 +250,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
{"ListGroupsRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := ListGroupsRequest{} | ||
response, err := broker.ListGroups(&request) | ||
|
@@ -239,7 +263,8 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
|
||
{[]byte{0x00, 0x00, 0x00, 0x00}, | ||
{"DescribeGroupsRequest", | ||
[]byte{0x00, 0x00, 0x00, 0x00}, | ||
func(t *testing.T, broker *Broker) { | ||
request := DescribeGroupsRequest{} | ||
response, err := broker.DescribeGroups(&request) | ||
|
@@ -251,3 +276,37 @@ var brokerTestTable = []struct { | |
} | ||
}}, | ||
} | ||
|
||
func validateBrokerMetrics(t *testing.T, broker *Broker, mockBroker *MockBroker) { | ||
metricValidators := newMetricValidators() | ||
mockBrokerBytesRead := 0 | ||
mockBrokerBytesWritten := 0 | ||
|
||
// Compute socket bytes | ||
for _, requestResponse := range mockBroker.History() { | ||
mockBrokerBytesRead += requestResponse.RequestSize | ||
mockBrokerBytesWritten += requestResponse.ResponseSize | ||
} | ||
|
||
// Check that the number of bytes sent corresponds to what the mock broker received | ||
metricValidators.registerForAllBrokers(broker, countMeterValidator("incoming-byte-rate", mockBrokerBytesWritten)) | ||
if mockBrokerBytesWritten == 0 { | ||
// This a ProduceRequest with NoResponse | ||
metricValidators.registerForAllBrokers(broker, countMeterValidator("response-rate", 0)) | ||
metricValidators.registerForAllBrokers(broker, countHistogramValidator("response-size", 0)) | ||
metricValidators.registerForAllBrokers(broker, minMaxHistogramValidator("response-size", 0, 0)) | ||
} else { | ||
metricValidators.registerForAllBrokers(broker, countMeterValidator("response-rate", 1)) | ||
metricValidators.registerForAllBrokers(broker, countHistogramValidator("response-size", 1)) | ||
metricValidators.registerForAllBrokers(broker, minMaxHistogramValidator("response-size", mockBrokerBytesWritten, mockBrokerBytesWritten)) | ||
} | ||
|
||
// Check that the number of bytes received corresponds to what the mock broker sent | ||
metricValidators.registerForAllBrokers(broker, countMeterValidator("outgoing-byte-rate", mockBrokerBytesRead)) | ||
metricValidators.registerForAllBrokers(broker, countMeterValidator("request-rate", 1)) | ||
metricValidators.registerForAllBrokers(broker, countHistogramValidator("request-size", 1)) | ||
metricValidators.registerForAllBrokers(broker, minMaxHistogramValidator("request-size", mockBrokerBytesRead, mockBrokerBytesRead)) | ||
|
||
// Run the validators | ||
metricValidators.run(t, broker.conf.MetricRegistry) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,8 @@ import ( | |
"crypto/tls" | ||
"regexp" | ||
"time" | ||
|
||
"github.com/rcrowley/go-metrics" | ||
) | ||
|
||
const defaultClientID = "sarama" | ||
|
@@ -233,6 +235,10 @@ type Config struct { | |
// latest features. Setting it to a version greater than you are actually | ||
// running may lead to random breakage. | ||
Version KafkaVersion | ||
// The registry to define metrics into. | ||
// Defaults to metrics.DefaultRegistry. | ||
// See Examples on how to use the metrics registry | ||
MetricRegistry metrics.Registry | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be null? Should it be, for users who don't want metrics? If not, should we validate that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it must not be null as it would panic when invoking methods on it. We can have that setting the default but I remember the overhead being minimal during my tests and when you need high performance you generally wants the metrics to capture those performance. I can update some benchmarks to see the impact of metrics being enabled vs disabled. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Benchmarks would be nice, but leaving it enabled by default is fine as long as the impact isn't huge. |
||
} | ||
|
||
// NewConfig returns a new configuration instance with sane defaults. | ||
|
@@ -268,6 +274,7 @@ func NewConfig() *Config { | |
c.ClientID = defaultClientID | ||
c.ChannelBufferSize = 256 | ||
c.Version = minVersion | ||
c.MetricRegistry = metrics.DefaultRegistry | ||
|
||
return c | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably want to move this down below the
SetWriteDeadline
else you could miscount in the (admittedly exceptional) case where that fails