Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up tchannel client/endpoint/server logs #498

Merged
merged 2 commits into from
Oct 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions codegen/template_bundle/template_files.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions codegen/templates/tchannel_endpoint.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func (h *{{$handlerName}}) Handle(
{{if ne .RequestType "" -}}
var req {{unref .RequestType}}
if err := req.FromWire(*wireValue); err != nil {
h.Deps.Default.ContextLogger.Warn(ctx, "Error converting request from wire", zap.Error(err))
h.Deps.Default.ContextLogger.Error(ctx, "Error converting request from wire", zap.Error(err))
return false, nil, nil, errors.Wrapf(
err, "Error converting %s.%s (%s) request from wire",
h.endpoint.EndpointID, h.endpoint.HandlerID, h.endpoint.Method,
Expand Down Expand Up @@ -152,7 +152,7 @@ func (h *{{$handlerName}}) Handle(

{{if eq (len .Exceptions) 0 -}}
if err != nil {
h.Deps.Default.ContextLogger.Warn(ctx, "Handler returned error", zap.Error(err))
h.Deps.Default.ContextLogger.Error(ctx, "Handler returned error", zap.Error(err))
return false, nil, resHeaders, err
}
res.Success = {{.RefResponse "r"}}
Expand All @@ -162,12 +162,12 @@ func (h *{{$handlerName}}) Handle(
{{$method := .Name -}}
{{range .Exceptions -}}
case *{{.Type}}:
h.Deps.Default.ContextLogger.Warn(
ctx,
"Handler returned non-nil error type *{{.Type}} but nil value",
zap.Error(err),
)
if v == nil {
h.Deps.Default.ContextLogger.Error(
ctx,
"Handler returned non-nil error type *{{.Type}} but nil value",
zap.Error(err),
)
return false, nil, resHeaders, errors.Errorf(
"%s.%s (%s) handler returned non-nil error type *{{.Type}} but nil value",
h.endpoint.EndpointID, h.endpoint.HandlerID, h.endpoint.Method,
Expand All @@ -176,7 +176,7 @@ func (h *{{$handlerName}}) Handle(
res.{{title .Name}} = v
{{end -}}
default:
h.Deps.Default.ContextLogger.Warn(ctx, "Handler returned error", zap.Error(err))
h.Deps.Default.ContextLogger.Error(ctx, "Handler returned error", zap.Error(err))
return false, nil, resHeaders, errors.Wrapf(
err, "%s.%s (%s) handler returned error",
h.endpoint.EndpointID, h.endpoint.HandlerID, h.endpoint.Method,
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 0 additions & 4 deletions examples/example-gateway/endpoints/tchannel/baz/baz_call.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@ func (w Workflow) Handle(
case *clientBaz.AuthErr:
return respHeaders, (*endpointBaz.AuthErr)(v)
default:
zanzibar.LogErrorWarnTimeoutContext(
ctx, w.contextLogger, err,
"baz.Call returned error",
)
return respHeaders, err
}
}
Expand Down
23 changes: 5 additions & 18 deletions runtime/tchannel_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ func (c *TChannelClient) call(
RequestState: rs,
})
if cerr != nil {
LogErrorWarnTimeout(call.logger, err, "Could not begin outbound request")
return errors.Wrapf(
err, "Could not begin outbound %s.%s (%s %s) request",
call.client.ClientID, call.methodName, call.client.serviceName, call.serviceMethod,
Expand Down Expand Up @@ -245,7 +244,6 @@ func (c *TChannelClient) call(
})

if err != nil {
LogErrorWarnTimeout(call.logger, err, "Could not make outbound request")
// Do not wrap system errors.
if _, ok := err.(tchannel.SystemError); ok {
return call.success, nil, err
Expand Down Expand Up @@ -277,7 +275,11 @@ func (c *tchannelOutboundCall) finish(err error) {
c.metrics.Latency.Record(c.finishTime.Sub(c.startTime))

// write logs
c.logger.Info("Finished an outgoing client TChannel request", c.logFields()...)
if err == nil {
c.logger.Info("Finished an outgoing client TChannel request", c.logFields()...)
} else {
c.logger.Warn("Failed to send outgoing client TChannel request", zap.Error(err))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be logger.error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is open to discussion. My reasoning is that when error happens, we will look at one error message (which is logged at endpoint handler and anywhere else is warn) and could understand what happened, if that is not sufficient we then look further for the warn logs associated with the request. Based on what I have experienced, multiple error log sites that describe the same cause create distraction and make logs less effective.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could have a zap.field GUID associated with each warn, and let the error catch that GUID which is the root cause of this Error. We could have a runbook/wiki document all the mitigation step for each GUID and handle the oncall to SRE

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once we use context.logger everywhere, we will have the request uuid in the context and all the logs that associated with the request will have the same request uuid.

}
}

func (c *tchannelOutboundCall) logFields() []zapcore.Field {
Expand Down Expand Up @@ -309,22 +311,19 @@ func (c *tchannelOutboundCall) writeReqHeaders(reqHeaders map[string]string) err

twriter, err := c.call.Arg2Writer()
if err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not create arg2writer for outbound request")
return errors.Wrapf(
err, "Could not create arg2writer for outbound %s.%s (%s %s) request",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := WriteHeaders(twriter, reqHeaders); err != nil {
_ = twriter.Close()
LogErrorWarnTimeout(c.logger, err, "Could not write headers for outbound request")
return errors.Wrapf(
err, "Could not write headers for outbound %s.%s (%s %s) request",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := twriter.Close(); err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not close arg2writer for outbound request")
return errors.Wrapf(
err, "Could not close arg2writer for outbound %s.%s (%s %s) request",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
Expand All @@ -338,7 +337,6 @@ func (c *tchannelOutboundCall) writeReqHeaders(reqHeaders map[string]string) err
func (c *tchannelOutboundCall) writeReqBody(req RWTStruct) error {
structWireValue, err := req.ToWire()
if err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not write request for outbound request")
return errors.Wrapf(
err, "Could not write request for outbound %s.%s (%s %s) request",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
Expand All @@ -347,22 +345,19 @@ func (c *tchannelOutboundCall) writeReqBody(req RWTStruct) error {

twriter, err := c.call.Arg3Writer()
if err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not create arg3writer for outbound request")
return errors.Wrapf(
err, "Could not create arg3writer for outbound %s.%s (%s %s) request",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := protocol.Binary.Encode(structWireValue, twriter); err != nil {
_ = twriter.Close()
LogErrorWarnTimeout(c.logger, err, "Could not write request for outbound request")
return errors.Wrapf(
err, "Could not write request for outbound %s.%s (%s %s) request",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := twriter.Close(); err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not close arg3writer for outbound request")
return errors.Wrapf(
err, "Could not close arg3writer for outbound %s.%s (%s %s) request",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
Expand All @@ -378,7 +373,6 @@ func (c *tchannelOutboundCall) writeReqBody(req RWTStruct) error {
func (c *tchannelOutboundCall) readResHeaders(response *tchannel.OutboundCallResponse) error {
treader, err := response.Arg2Reader()
if err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not create arg2reader for outbound response")
// Do not wrap system errors.
if _, ok := err.(tchannel.SystemError); ok {
return err
Expand All @@ -390,22 +384,19 @@ func (c *tchannelOutboundCall) readResHeaders(response *tchannel.OutboundCallRes
}
if c.resHeaders, err = ReadHeaders(treader); err != nil {
_ = treader.Close()
LogErrorWarnTimeout(c.logger, err, "Could not read headers for outbound response")
return errors.Wrapf(
err, "Could not read headers for outbound %s.%s (%s %s) response",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := EnsureEmpty(treader, "reading response headers"); err != nil {
_ = treader.Close()
LogErrorWarnTimeout(c.logger, err, "Could not ensure arg2reader is empty for outbound response")
return errors.Wrapf(
err, "Could not ensure arg2reader is empty for outbound %s.%s (%s %s) response",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := treader.Close(); err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not close arg2reader for outbound response")
return errors.Wrapf(
err, "Could not close arg2reader for outbound %s.%s (%s %s) response",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
Expand All @@ -421,30 +412,26 @@ func (c *tchannelOutboundCall) readResHeaders(response *tchannel.OutboundCallRes
func (c *tchannelOutboundCall) readResBody(response *tchannel.OutboundCallResponse, resp RWTStruct) error {
treader, err := response.Arg3Reader()
if err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not create arg3Reader for outbound response")
return errors.Wrapf(
err, "Could not create arg3Reader for outbound %s.%s (%s %s) response",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := ReadStruct(treader, resp); err != nil {
_ = treader.Close()
LogErrorWarnTimeout(c.logger, err, "Could not read outbound response")
return errors.Wrapf(
err, "Could not read outbound %s.%s (%s %s) response",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := EnsureEmpty(treader, "reading response body"); err != nil {
_ = treader.Close()
LogErrorWarnTimeout(c.logger, err, "Could not ensure arg3reader is empty for outbound response")
return errors.Wrapf(
err, "Could not ensure arg3reader is empty for outbound %s.%s (%s %s) response",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
)
}
if err := treader.Close(); err != nil {
LogErrorWarnTimeout(c.logger, err, "Could not close arg3reader for outbound response")
return errors.Wrapf(
err, "Could not close arg3reader outbound %s.%s (%s %s) response",
c.client.ClientID, c.methodName, c.client.serviceName, c.serviceMethod,
Expand Down
25 changes: 5 additions & 20 deletions runtime/tchannel_logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,27 +126,12 @@ func (l TChannelLogger) WithFields(fields ...tchannel.LogField) tchannel.Logger
// TODO: We want to improve the classification of errors, similar to:
// https://github.com/uber/tchannel-node/blob/master/errors.js#L907-L930
//
// Deprecated: Use LogErrorWarnTimeoutContext instead.
// Deprecated: use proper level to log instead
func LogErrorWarnTimeout(logger *zap.Logger, err error, msg string) {
if err != nil {
if isTimeout(err) {
logger.Warn(msg, zap.Error(err))
} else {
logger.Error(msg, zap.Error(err))
}
}
}

// LogErrorWarnTimeoutContext logs warnings for timeout errors, otherwise logs errors
// TODO: We want to improve the classification of errors, similar to:
// https://github.com/uber/tchannel-node/blob/master/errors.js#L907-L930
func LogErrorWarnTimeoutContext(ctx context.Context, logger ContextLogger, err error, msg string) {
if err != nil {
if isTimeout(err) {
logger.Warn(ctx, msg, zap.Error(err))
} else {
logger.Error(ctx, msg, zap.Error(err))
}
if isTimeout(err) {
logger.Warn(msg, zap.Error(err))
} else {
logger.Error(msg, zap.Error(err))
}
}

Expand Down
Loading