Skip to content

Commit

Permalink
Add metrics to the Go dispatcher (#3258)
Browse files Browse the repository at this point in the history
Also fixes a panic bug in go/lib/scmp.

Adds the following metrics:
```
# HELP disp_app_conn_reg_errors_total Application socket registration errors
# TYPE disp_app_conn_reg_errors_total counter
disp_app_conn_reg_errors_total 0
# HELP disp_app_not_found_errors_total Number of packets for which the destination application was not found.
# TYPE disp_app_not_found_errors_total counter
disp_app_not_found_errors_total 43
# HELP disp_app_read_bytes_total Total bytes read from applications.
# TYPE disp_app_read_bytes_total counter
disp_app_read_bytes_total 1.0394091e+07
# HELP disp_app_read_errors_total Total errors when reading packets from applications
# TYPE disp_app_read_errors_total counter
disp_app_read_errors_total 0
# HELP disp_app_read_pkts_total Total packets read from applications
# TYPE disp_app_read_pkts_total counter
disp_app_read_pkts_total 36161
# HELP disp_app_sockets_open Number of sockets currently opened by applications.
# TYPE disp_app_sockets_open gauge
disp_app_sockets_open{type="BS A (0x0000)"} 16
disp_app_sockets_open{type="CS A (0x0002)"} 16
disp_app_sockets_open{type="PS A (0x0001)"} 16
disp_app_sockets_open{type="UNKNOWN M (0xffff)"} 216
# HELP disp_app_write_bytes_total Total bytes sent to applications.
# TYPE disp_app_write_bytes_total counter
disp_app_write_bytes_total 1.0390873e+07
# HELP disp_app_write_errors_total Send packet to applications errors.
# TYPE disp_app_write_errors_total counter
disp_app_write_errors_total 0
# HELP disp_app_write_pkts_total Total packets sent to applications.
# TYPE disp_app_write_pkts_total counter
disp_app_write_pkts_total 36118
# HELP disp_app_write_svc_pkts_total Total SVC packets delivered to applications
# TYPE disp_app_write_svc_pkts_total counter
disp_app_write_svc_pkts_total{type="BS A (0x0000)"} 458
disp_app_write_svc_pkts_total{type="BS M (0x8000)"} 1276
disp_app_write_svc_pkts_total{type="CS A (0x0002)"} 37
disp_app_write_svc_pkts_total{type="PS A (0x0001)"} 737
# HELP disp_net_read_bytes_total Total bytes received from the network irrespective of packet outcome.
# TYPE disp_net_read_bytes_total counter
disp_net_read_bytes_total 1.0394091e+07
# HELP disp_net_read_overflow_pkts_total Total ingress packets that were dropped on the OS socket
# TYPE disp_net_read_overflow_pkts_total counter
disp_net_read_overflow_pkts_total 0
# HELP disp_net_read_parse_errors_total Total network packet parse error
# TYPE disp_net_read_parse_errors_total counter
disp_net_read_parse_errors_total 0
# HELP disp_net_read_pkts_total Total packets received from the network.
# TYPE disp_net_read_pkts_total counter
disp_net_read_pkts_total{incoming_packet_result="ok"} 36161
# HELP disp_net_write_bytes_total Total bytes sent on the network.
# TYPE disp_net_write_bytes_total counter
disp_net_write_bytes_total 1.0394091e+07
# HELP disp_net_write_errors_total Network packet send errors
# TYPE disp_net_write_errors_total counter
disp_net_write_errors_total 0
# HELP disp_net_write_pkts_total Total packets sent on the network.
# TYPE disp_net_write_pkts_total counter
disp_net_write_pkts_total 36161
```

Fixes #3101.
  • Loading branch information
scrye authored Oct 21, 2019
1 parent acc86dd commit 0ea3b18
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 50 deletions.
1 change: 0 additions & 1 deletion go/godispatcher/internal/metrics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ go_library(
importpath = "github.com/scionproto/scion/go/godispatcher/internal/metrics",
visibility = ["//go/godispatcher:__subpackages__"],
deps = [
"//go/lib/addr:go_default_library",
"//go/lib/prom:go_default_library",
"@com_github_prometheus_client_golang//prometheus:go_default_library",
],
Expand Down
227 changes: 192 additions & 35 deletions go/godispatcher/internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,52 +17,209 @@ package metrics
import (
"github.com/prometheus/client_golang/prometheus"

"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/prom"
)

// Namespace is the metrics namespace for the dispatcher.
const Namespace = "disp"

// Label descriptions
// Packet result labels
const (
IncomingPacketOutcome = "incoming_packet_outcome"
OpenConnectionType = "open_connection_type"
)

// Packet outcome labels
const (
PacketOutcomeParseError = "parse_error"
PacketOutcomeRouteNotFound = "route_not_found"
PacketOutcomeOk = "ok"
PacketResultParseError = "parse_error"
PacketResultRouteNotFound = "route_not_found"
PacketResultOk = "ok"
)

var (
OutgoingPacketsTotal prometheus.Counter
IncomingBytesTotal prometheus.Counter
OutgoingBytesTotal prometheus.Counter
IncomingPackets *prometheus.CounterVec
OpenSockets *prometheus.GaugeVec
// M exposes all the initialized metrics for this package.
M = newMetrics()
)

// GetOpenConnectionLabel returns an SVC address string representation for sockets
// that are opened on an SVC address, or a different string otherwise.
func GetOpenConnectionLabel(svc addr.HostSVC) string {
if svc == addr.SvcNone {
return "no_svc"
// IncomingPacket contains the labels for incoming packet metrics.
type IncomingPacket struct {
Result string
}

// Labels returns the list of labels.
func (l IncomingPacket) Labels() []string {
return []string{"incoming_packet_result"}
}

// Values returns the label values in the order defined by Labels.
func (l IncomingPacket) Values() []string {
return []string{l.Result}
}

// SVC contains the labels for SVC-related metrics.
type SVC struct {
Type string
}

// Labels returns the list of labels.
func (l SVC) Labels() []string {
return []string{"type"}
}

// Values returns the label values in the order defined by Labels.
func (l SVC) Values() []string {
return []string{l.Type}
}

// SCMP contains the labels for SCMP-related metrics.
type SCMP struct {
Class string
Type string
}

// Labels returns the list of labels.
func (l SCMP) Labels() []string {
return []string{"class", "type"}
}

// Values returns the label values in the order defined by Labels.
func (l SCMP) Values() []string {
return []string{"class", "type"}
}

type metrics struct {
netWriteBytes prometheus.Counter
netWritePkts prometheus.Counter
netWriteErrors prometheus.Counter
netReadBytes prometheus.Counter
netReadPkts *prometheus.CounterVec
netReadParseErrors prometheus.Counter
appWriteBytes prometheus.Counter
appWritePkts prometheus.Counter
appWriteErrors prometheus.Counter
appReadBytes prometheus.Counter
appReadPkts prometheus.Counter
appReadErrors prometheus.Counter
openSockets *prometheus.GaugeVec
appConnErrors prometheus.Counter
scmpReadPkts *prometheus.CounterVec
scmpWritePkts *prometheus.CounterVec
appNotFoundErrors prometheus.Counter
appWriteSVCPkts *prometheus.CounterVec
netReadOverflows prometheus.Counter
}

func newMetrics() metrics {
return metrics{
netWriteBytes: prom.NewCounter(Namespace, "", "net_write_bytes_total",
"Total bytes sent on the network."),
netWritePkts: prom.NewCounter(Namespace, "", "net_write_pkts_total",
"Total packets sent on the network."),
netWriteErrors: prom.NewCounter(Namespace, "", "net_write_errors_total",
"Network packet send errors"),
netReadBytes: prom.NewCounter(Namespace, "", "net_read_bytes_total",
"Total bytes received from the network irrespective of packet outcome."),
netReadPkts: prom.NewCounterVec(Namespace, "", "net_read_pkts_total",
"Total packets received from the network.", IncomingPacket{}.Labels()),
netReadParseErrors: prom.NewCounter(Namespace, "", "net_read_parse_errors_total",
"Total network packet parse error"),
appWriteBytes: prom.NewCounter(Namespace, "", "app_write_bytes_total",
"Total bytes sent to applications."),
appWritePkts: prom.NewCounter(Namespace, "", "app_write_pkts_total",
"Total packets sent to applications."),
appWriteErrors: prom.NewCounter(Namespace, "", "app_write_errors_total",
"Send packet to applications errors."),
appReadBytes: prom.NewCounter(Namespace, "", "app_read_bytes_total",
"Total bytes read from applications."),
appReadPkts: prom.NewCounter(Namespace, "", "app_read_pkts_total",
"Total packets read from applications"),
appReadErrors: prom.NewCounter(Namespace, "", "app_read_errors_total",
"Total errors when reading packets from applications."),
openSockets: prom.NewGaugeVec(Namespace, "", "app_sockets_open",
"Number of sockets currently opened by applications.", SVC{}.Labels()),
appConnErrors: prom.NewCounter(Namespace, "", "app_conn_reg_errors_total",
"Application socket registration errors"),
scmpReadPkts: prom.NewCounterVec(Namespace, "", "scmp_read_pkts_total",
"Total SCMP packets received from the network.", SCMP{}.Labels()),
scmpWritePkts: prom.NewCounterVec(Namespace, "", "scmp_write_pkts_total",
"Total SCMP packets received from the network.", SCMP{}.Labels()),
appNotFoundErrors: prom.NewCounter(Namespace, "", "app_not_found_errors_total",
"Number of packets for which the destination application was not found."),
appWriteSVCPkts: prom.NewCounterVec(Namespace, "", "app_write_svc_pkts_total",
"Total SVC packets delivered to applications", SVC{}.Labels()),
netReadOverflows: prom.NewCounter(Namespace, "", "net_read_overflow_pkts_total",
"Total ingress packets that were dropped on the OS socket"),
}
return svc.BaseString()
}

func init() {
OutgoingBytesTotal = prom.NewCounter(Namespace, "", "outgoing_bytes_total",
"Total bytes sent on the network.")
OutgoingPacketsTotal = prom.NewCounter(Namespace, "", "outgoing_packets_total",
"Total packets sent on the network.")
IncomingBytesTotal = prom.NewCounter(Namespace, "", "incoming_bytes_total",
"Total bytes received from the network irrespective of packet outcome.")
IncomingPackets = prom.NewCounterVec(Namespace, "", "incoming_packets_total",
"Total packets received from the network.", []string{IncomingPacketOutcome})
OpenSockets = prom.NewGaugeVec(Namespace, "", "open_application_connections",
"Number of sockets currently opened by applications.", []string{OpenConnectionType})
}

func (m metrics) NetWriteBytes() prometheus.Counter {
return m.netWriteBytes
}

func (m metrics) NetWritePkts() prometheus.Counter {
return m.netWritePkts
}

func (m metrics) NetReadBytes() prometheus.Counter {
return m.netReadBytes
}

func (m metrics) NetReadPkts(labels IncomingPacket) prometheus.Counter {
return m.netReadPkts.WithLabelValues(labels.Values()...)
}

func (m metrics) NetReadParseErrors() prometheus.Counter {
return m.netReadParseErrors
}

func (m metrics) AppWriteBytes() prometheus.Counter {
return m.appWriteBytes
}

func (m metrics) AppWritePkts() prometheus.Counter {
return m.appWritePkts
}

func (m metrics) AppWriteErrors() prometheus.Counter {
return m.appWriteErrors
}

func (m metrics) AppReadBytes() prometheus.Counter {
return m.appReadBytes
}

func (m metrics) AppReadPkts() prometheus.Counter {
return m.appReadPkts
}

func (m metrics) AppReadErrors() prometheus.Counter {
return m.appReadErrors
}

func (m metrics) OpenSockets(labels SVC) prometheus.Gauge {
return m.openSockets.WithLabelValues(labels.Values()...)
}

func (m metrics) AppConnErrors() prometheus.Counter {
return m.appConnErrors
}

func (m metrics) NetWriteErrors() prometheus.Counter {
return m.netWriteErrors
}

// SCMPReadPackets returns the metrics counters for SCMP packets read from the network.
func (m metrics) SCMPReadPkts(labels SCMP) prometheus.Counter {
return m.scmpReadPkts.WithLabelValues(labels.Values()...)
}

// SCMPWritePkts returns the metrics counters for SCMP packets written to the network.
func (m metrics) SCMPWritePkts(labels SCMP) prometheus.Counter {
return m.scmpWritePkts.WithLabelValues(labels.Values()...)
}

func (m metrics) AppNotFoundErrors() prometheus.Counter {
return m.appNotFoundErrors
}

func (m metrics) AppWriteSVCPkts(labels SVC) prometheus.Counter {
return m.appWriteSVCPkts.WithLabelValues(labels.Values()...)
}

func (m metrics) NetReadOverflows() prometheus.Counter {
return m.netReadOverflows
}
13 changes: 11 additions & 2 deletions go/godispatcher/internal/respool/packet.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ type Packet struct {
refCount *int
}

// Len returns the length of the packet.
func (p *Packet) Len() int {
return len(p.buffer)
}

func newPacket() *Packet {
refCount := 1
return &Packet{
Expand Down Expand Up @@ -99,11 +104,15 @@ func (pkt *Packet) DecodeFromConn(conn net.PacketConn) error {
return err
}
pkt.buffer = pkt.buffer[:n]
metrics.IncomingBytesTotal.Add(float64(n))
metrics.M.NetReadBytes().Add(float64(n))

pkt.OverlayRemote = readExtra.(*net.UDPAddr)
if err = hpkt.ParseScnPkt(&pkt.Info, pkt.buffer); err != nil {
metrics.IncomingPackets.WithLabelValues(metrics.PacketOutcomeParseError).Inc()
metrics.M.NetReadPkts(
metrics.IncomingPacket{
Result: metrics.PacketResultParseError,
},
).Inc()
return err
}
return nil
Expand Down
28 changes: 23 additions & 5 deletions go/godispatcher/network/app_socket.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,13 @@ func (h *AppConnHandler) Handle() {

ref, tableEntry, useIPv6, err := h.doRegExchange()
if err != nil {
metrics.M.AppConnErrors().Inc()
h.Logger.Warn("registration error", "err", err)
return
}
defer ref.Free()
metrics.OpenSockets.WithLabelValues(metrics.GetOpenConnectionLabel(ref.SVCAddr())).Inc()
defer metrics.OpenSockets.WithLabelValues(metrics.GetOpenConnectionLabel(ref.SVCAddr())).Dec()
metrics.M.OpenSockets(metrics.SVC{Type: ref.SVCAddr().String()}).Inc()
defer metrics.M.OpenSockets(metrics.SVC{Type: ref.SVCAddr().String()}).Dec()

defer tableEntry.appIngressRing.Close()
go func() {
Expand Down Expand Up @@ -211,27 +212,40 @@ func (h *AppConnHandler) RunAppToNetDataplane(ref registration.RegReference,
h.Logger.Info("[app->network] EOF received from client")
} else {
h.Logger.Error("[app->network] Client connection error", "err", err)
metrics.M.AppReadErrors().Inc()
}
return
}
metrics.M.AppReadBytes().Add(float64(pkt.Len()))
metrics.M.AppReadPkts().Inc()

if err := registerIfSCMPRequest(ref, &pkt.Info); err != nil {
log.Warn("SCMP Request ID error, packet still sent", "err", err)
}

n, err := pkt.SendOnConn(ovConn, pkt.OverlayRemote)
if err != nil {
metrics.M.NetWriteErrors().Inc()
h.Logger.Error("[app->network] Overlay socket error", "err", err)
} else {
metrics.OutgoingBytesTotal.Add(float64(n))
metrics.OutgoingPacketsTotal.Inc()
metrics.M.NetWriteBytes().Add(float64(n))
metrics.M.NetWritePkts().Inc()
}
pkt.Free()
}
}

// registerIfSCMPRequest registers the ID of the SCMP Request, if it is an
// SCMP::General::EchoRequest, SCMP::General::TraceRouteRequest or SCMP::General::RecordPathRequest
// packet. It also increments SCMP-related metrics.
func registerIfSCMPRequest(ref registration.RegReference, packet *spkt.ScnPkt) error {
if scmpHdr, ok := packet.L4.(*scmp.Hdr); ok {
metrics.M.SCMPWritePkts(
metrics.SCMP{
Class: scmpHdr.Class.String(),
Type: scmpHdr.Type.Name(scmpHdr.Class),
},
).Inc()
if !isSCMPGeneralRequest(scmpHdr) {
return nil
}
Expand Down Expand Up @@ -262,11 +276,15 @@ func (h *AppConnHandler) RunRingToAppDataplane(r *ringbuf.Ring) {
h.Logger.Warn("[network->app] Unable to encode overlay address.", "err", err)
continue
}
if _, err := pkt.SendOnConn(h.Conn, overlayAddr); err != nil {
n, err := pkt.SendOnConn(h.Conn, overlayAddr)
if err != nil {
metrics.M.AppWriteErrors().Inc()
h.Logger.Error("[network->app] App connection error.", "err", err)
h.Conn.Close()
return
}
metrics.M.AppWritePkts().Inc()
metrics.M.AppWriteBytes().Add(float64(n))
pkt.Free()
}
}
Expand Down
6 changes: 6 additions & 0 deletions go/godispatcher/network/dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"sync"
"time"

"github.com/scionproto/scion/go/godispatcher/internal/metrics"
"github.com/scionproto/scion/go/lib/addr"
"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/log"
Expand Down Expand Up @@ -168,6 +169,11 @@ type throttledMetaLogger struct {
func (p *throttledMetaLogger) Handle(m *conn.ReadMeta) {
p.mu.Lock()
if m.RcvOvfl != p.lastPrintValue && time.Since(p.lastPrintTimestamp) > p.MinInterval {
if m.RcvOvfl > p.lastPrintValue {
metrics.M.NetReadOverflows().Add(float64(m.RcvOvfl - p.lastPrintValue))
} else {
metrics.M.NetReadOverflows().Add(float64(m.RcvOvfl))
}
p.Logger.Debug("Detected socket overflow", "total_cnt", m.RcvOvfl)
p.lastPrintTimestamp = time.Now()
p.lastPrintValue = m.RcvOvfl
Expand Down
Loading

0 comments on commit 0ea3b18

Please sign in to comment.