diff --git a/go/godispatcher/internal/metrics/BUILD.bazel b/go/godispatcher/internal/metrics/BUILD.bazel index 86c9031ba6..d6bfa7e946 100644 --- a/go/godispatcher/internal/metrics/BUILD.bazel +++ b/go/godispatcher/internal/metrics/BUILD.bazel @@ -6,7 +6,6 @@ go_library( importpath = "github.com/scionproto/scion/go/godispatcher/internal/metrics", visibility = ["//go/godispatcher:__subpackages__"], deps = [ - "//go/lib/addr:go_default_library", "//go/lib/prom:go_default_library", "@com_github_prometheus_client_golang//prometheus:go_default_library", ], diff --git a/go/godispatcher/internal/metrics/metrics.go b/go/godispatcher/internal/metrics/metrics.go index b3871f4f3a..33d19d02a7 100644 --- a/go/godispatcher/internal/metrics/metrics.go +++ b/go/godispatcher/internal/metrics/metrics.go @@ -17,52 +17,209 @@ package metrics import ( "github.com/prometheus/client_golang/prometheus" - "github.com/scionproto/scion/go/lib/addr" "github.com/scionproto/scion/go/lib/prom" ) // Namespace is the metrics namespace for the dispatcher. const Namespace = "disp" -// Label descriptions +// Packet result labels const ( - IncomingPacketOutcome = "incoming_packet_outcome" - OpenConnectionType = "open_connection_type" -) - -// Packet outcome labels -const ( - PacketOutcomeParseError = "parse_error" - PacketOutcomeRouteNotFound = "route_not_found" - PacketOutcomeOk = "ok" + PacketResultParseError = "parse_error" + PacketResultRouteNotFound = "route_not_found" + PacketResultOk = "ok" ) var ( - OutgoingPacketsTotal prometheus.Counter - IncomingBytesTotal prometheus.Counter - OutgoingBytesTotal prometheus.Counter - IncomingPackets *prometheus.CounterVec - OpenSockets *prometheus.GaugeVec + // M exposes all the initialized metrics for this package. + M = newMetrics() ) -// GetOpenConnectionLabel returns an SVC address string representation for sockets -// that are opened on an SVC address, or a different string otherwise. -func GetOpenConnectionLabel(svc addr.HostSVC) string { - if svc == addr.SvcNone { - return "no_svc" +// IncomingPacket contains the labels for incoming packet metrics. +type IncomingPacket struct { + Result string +} + +// Labels returns the list of labels. +func (l IncomingPacket) Labels() []string { + return []string{"incoming_packet_result"} +} + +// Values returns the label values in the order defined by Labels. +func (l IncomingPacket) Values() []string { + return []string{l.Result} +} + +// SVC contains the labels for SVC-related metrics. +type SVC struct { + Type string +} + +// Labels returns the list of labels. +func (l SVC) Labels() []string { + return []string{"type"} +} + +// Values returns the label values in the order defined by Labels. +func (l SVC) Values() []string { + return []string{l.Type} +} + +// SCMP contains the labels for SCMP-related metrics. +type SCMP struct { + Class string + Type string +} + +// Labels returns the list of labels. +func (l SCMP) Labels() []string { + return []string{"class", "type"} +} + +// Values returns the label values in the order defined by Labels. +func (l SCMP) Values() []string { + return []string{"class", "type"} +} + +type metrics struct { + netWriteBytes prometheus.Counter + netWritePkts prometheus.Counter + netWriteErrors prometheus.Counter + netReadBytes prometheus.Counter + netReadPkts *prometheus.CounterVec + netReadParseErrors prometheus.Counter + appWriteBytes prometheus.Counter + appWritePkts prometheus.Counter + appWriteErrors prometheus.Counter + appReadBytes prometheus.Counter + appReadPkts prometheus.Counter + appReadErrors prometheus.Counter + openSockets *prometheus.GaugeVec + appConnErrors prometheus.Counter + scmpReadPkts *prometheus.CounterVec + scmpWritePkts *prometheus.CounterVec + appNotFoundErrors prometheus.Counter + appWriteSVCPkts *prometheus.CounterVec + netReadOverflows prometheus.Counter +} + +func newMetrics() metrics { + return metrics{ + netWriteBytes: prom.NewCounter(Namespace, "", "net_write_bytes_total", + "Total bytes sent on the network."), + netWritePkts: prom.NewCounter(Namespace, "", "net_write_pkts_total", + "Total packets sent on the network."), + netWriteErrors: prom.NewCounter(Namespace, "", "net_write_errors_total", + "Network packet send errors"), + netReadBytes: prom.NewCounter(Namespace, "", "net_read_bytes_total", + "Total bytes received from the network irrespective of packet outcome."), + netReadPkts: prom.NewCounterVec(Namespace, "", "net_read_pkts_total", + "Total packets received from the network.", IncomingPacket{}.Labels()), + netReadParseErrors: prom.NewCounter(Namespace, "", "net_read_parse_errors_total", + "Total network packet parse error"), + appWriteBytes: prom.NewCounter(Namespace, "", "app_write_bytes_total", + "Total bytes sent to applications."), + appWritePkts: prom.NewCounter(Namespace, "", "app_write_pkts_total", + "Total packets sent to applications."), + appWriteErrors: prom.NewCounter(Namespace, "", "app_write_errors_total", + "Send packet to applications errors."), + appReadBytes: prom.NewCounter(Namespace, "", "app_read_bytes_total", + "Total bytes read from applications."), + appReadPkts: prom.NewCounter(Namespace, "", "app_read_pkts_total", + "Total packets read from applications"), + appReadErrors: prom.NewCounter(Namespace, "", "app_read_errors_total", + "Total errors when reading packets from applications."), + openSockets: prom.NewGaugeVec(Namespace, "", "app_sockets_open", + "Number of sockets currently opened by applications.", SVC{}.Labels()), + appConnErrors: prom.NewCounter(Namespace, "", "app_conn_reg_errors_total", + "Application socket registration errors"), + scmpReadPkts: prom.NewCounterVec(Namespace, "", "scmp_read_pkts_total", + "Total SCMP packets received from the network.", SCMP{}.Labels()), + scmpWritePkts: prom.NewCounterVec(Namespace, "", "scmp_write_pkts_total", + "Total SCMP packets received from the network.", SCMP{}.Labels()), + appNotFoundErrors: prom.NewCounter(Namespace, "", "app_not_found_errors_total", + "Number of packets for which the destination application was not found."), + appWriteSVCPkts: prom.NewCounterVec(Namespace, "", "app_write_svc_pkts_total", + "Total SVC packets delivered to applications", SVC{}.Labels()), + netReadOverflows: prom.NewCounter(Namespace, "", "net_read_overflow_pkts_total", + "Total ingress packets that were dropped on the OS socket"), } - return svc.BaseString() -} - -func init() { - OutgoingBytesTotal = prom.NewCounter(Namespace, "", "outgoing_bytes_total", - "Total bytes sent on the network.") - OutgoingPacketsTotal = prom.NewCounter(Namespace, "", "outgoing_packets_total", - "Total packets sent on the network.") - IncomingBytesTotal = prom.NewCounter(Namespace, "", "incoming_bytes_total", - "Total bytes received from the network irrespective of packet outcome.") - IncomingPackets = prom.NewCounterVec(Namespace, "", "incoming_packets_total", - "Total packets received from the network.", []string{IncomingPacketOutcome}) - OpenSockets = prom.NewGaugeVec(Namespace, "", "open_application_connections", - "Number of sockets currently opened by applications.", []string{OpenConnectionType}) +} + +func (m metrics) NetWriteBytes() prometheus.Counter { + return m.netWriteBytes +} + +func (m metrics) NetWritePkts() prometheus.Counter { + return m.netWritePkts +} + +func (m metrics) NetReadBytes() prometheus.Counter { + return m.netReadBytes +} + +func (m metrics) NetReadPkts(labels IncomingPacket) prometheus.Counter { + return m.netReadPkts.WithLabelValues(labels.Values()...) +} + +func (m metrics) NetReadParseErrors() prometheus.Counter { + return m.netReadParseErrors +} + +func (m metrics) AppWriteBytes() prometheus.Counter { + return m.appWriteBytes +} + +func (m metrics) AppWritePkts() prometheus.Counter { + return m.appWritePkts +} + +func (m metrics) AppWriteErrors() prometheus.Counter { + return m.appWriteErrors +} + +func (m metrics) AppReadBytes() prometheus.Counter { + return m.appReadBytes +} + +func (m metrics) AppReadPkts() prometheus.Counter { + return m.appReadPkts +} + +func (m metrics) AppReadErrors() prometheus.Counter { + return m.appReadErrors +} + +func (m metrics) OpenSockets(labels SVC) prometheus.Gauge { + return m.openSockets.WithLabelValues(labels.Values()...) +} + +func (m metrics) AppConnErrors() prometheus.Counter { + return m.appConnErrors +} + +func (m metrics) NetWriteErrors() prometheus.Counter { + return m.netWriteErrors +} + +// SCMPReadPackets returns the metrics counters for SCMP packets read from the network. +func (m metrics) SCMPReadPkts(labels SCMP) prometheus.Counter { + return m.scmpReadPkts.WithLabelValues(labels.Values()...) +} + +// SCMPWritePkts returns the metrics counters for SCMP packets written to the network. +func (m metrics) SCMPWritePkts(labels SCMP) prometheus.Counter { + return m.scmpWritePkts.WithLabelValues(labels.Values()...) +} + +func (m metrics) AppNotFoundErrors() prometheus.Counter { + return m.appNotFoundErrors +} + +func (m metrics) AppWriteSVCPkts(labels SVC) prometheus.Counter { + return m.appWriteSVCPkts.WithLabelValues(labels.Values()...) +} + +func (m metrics) NetReadOverflows() prometheus.Counter { + return m.netReadOverflows } diff --git a/go/godispatcher/internal/respool/packet.go b/go/godispatcher/internal/respool/packet.go index ff6d8652ce..e602d95144 100644 --- a/go/godispatcher/internal/respool/packet.go +++ b/go/godispatcher/internal/respool/packet.go @@ -52,6 +52,11 @@ type Packet struct { refCount *int } +// Len returns the length of the packet. +func (p *Packet) Len() int { + return len(p.buffer) +} + func newPacket() *Packet { refCount := 1 return &Packet{ @@ -99,11 +104,15 @@ func (pkt *Packet) DecodeFromConn(conn net.PacketConn) error { return err } pkt.buffer = pkt.buffer[:n] - metrics.IncomingBytesTotal.Add(float64(n)) + metrics.M.NetReadBytes().Add(float64(n)) pkt.OverlayRemote = readExtra.(*net.UDPAddr) if err = hpkt.ParseScnPkt(&pkt.Info, pkt.buffer); err != nil { - metrics.IncomingPackets.WithLabelValues(metrics.PacketOutcomeParseError).Inc() + metrics.M.NetReadPkts( + metrics.IncomingPacket{ + Result: metrics.PacketResultParseError, + }, + ).Inc() return err } return nil diff --git a/go/godispatcher/network/app_socket.go b/go/godispatcher/network/app_socket.go index 274b100581..40642fe9cf 100644 --- a/go/godispatcher/network/app_socket.go +++ b/go/godispatcher/network/app_socket.go @@ -97,12 +97,13 @@ func (h *AppConnHandler) Handle() { ref, tableEntry, useIPv6, err := h.doRegExchange() if err != nil { + metrics.M.AppConnErrors().Inc() h.Logger.Warn("registration error", "err", err) return } defer ref.Free() - metrics.OpenSockets.WithLabelValues(metrics.GetOpenConnectionLabel(ref.SVCAddr())).Inc() - defer metrics.OpenSockets.WithLabelValues(metrics.GetOpenConnectionLabel(ref.SVCAddr())).Dec() + metrics.M.OpenSockets(metrics.SVC{Type: ref.SVCAddr().String()}).Inc() + defer metrics.M.OpenSockets(metrics.SVC{Type: ref.SVCAddr().String()}).Dec() defer tableEntry.appIngressRing.Close() go func() { @@ -211,9 +212,12 @@ func (h *AppConnHandler) RunAppToNetDataplane(ref registration.RegReference, h.Logger.Info("[app->network] EOF received from client") } else { h.Logger.Error("[app->network] Client connection error", "err", err) + metrics.M.AppReadErrors().Inc() } return } + metrics.M.AppReadBytes().Add(float64(pkt.Len())) + metrics.M.AppReadPkts().Inc() if err := registerIfSCMPRequest(ref, &pkt.Info); err != nil { log.Warn("SCMP Request ID error, packet still sent", "err", err) @@ -221,17 +225,27 @@ func (h *AppConnHandler) RunAppToNetDataplane(ref registration.RegReference, n, err := pkt.SendOnConn(ovConn, pkt.OverlayRemote) if err != nil { + metrics.M.NetWriteErrors().Inc() h.Logger.Error("[app->network] Overlay socket error", "err", err) } else { - metrics.OutgoingBytesTotal.Add(float64(n)) - metrics.OutgoingPacketsTotal.Inc() + metrics.M.NetWriteBytes().Add(float64(n)) + metrics.M.NetWritePkts().Inc() } pkt.Free() } } +// registerIfSCMPRequest registers the ID of the SCMP Request, if it is an +// SCMP::General::EchoRequest, SCMP::General::TraceRouteRequest or SCMP::General::RecordPathRequest +// packet. It also increments SCMP-related metrics. func registerIfSCMPRequest(ref registration.RegReference, packet *spkt.ScnPkt) error { if scmpHdr, ok := packet.L4.(*scmp.Hdr); ok { + metrics.M.SCMPWritePkts( + metrics.SCMP{ + Class: scmpHdr.Class.String(), + Type: scmpHdr.Type.Name(scmpHdr.Class), + }, + ).Inc() if !isSCMPGeneralRequest(scmpHdr) { return nil } @@ -262,11 +276,15 @@ func (h *AppConnHandler) RunRingToAppDataplane(r *ringbuf.Ring) { h.Logger.Warn("[network->app] Unable to encode overlay address.", "err", err) continue } - if _, err := pkt.SendOnConn(h.Conn, overlayAddr); err != nil { + n, err := pkt.SendOnConn(h.Conn, overlayAddr) + if err != nil { + metrics.M.AppWriteErrors().Inc() h.Logger.Error("[network->app] App connection error.", "err", err) h.Conn.Close() return } + metrics.M.AppWritePkts().Inc() + metrics.M.AppWriteBytes().Add(float64(n)) pkt.Free() } } diff --git a/go/godispatcher/network/dispatcher.go b/go/godispatcher/network/dispatcher.go index 67ede6fc1b..b4b06950ce 100644 --- a/go/godispatcher/network/dispatcher.go +++ b/go/godispatcher/network/dispatcher.go @@ -20,6 +20,7 @@ import ( "sync" "time" + "github.com/scionproto/scion/go/godispatcher/internal/metrics" "github.com/scionproto/scion/go/lib/addr" "github.com/scionproto/scion/go/lib/common" "github.com/scionproto/scion/go/lib/log" @@ -168,6 +169,11 @@ type throttledMetaLogger struct { func (p *throttledMetaLogger) Handle(m *conn.ReadMeta) { p.mu.Lock() if m.RcvOvfl != p.lastPrintValue && time.Since(p.lastPrintTimestamp) > p.MinInterval { + if m.RcvOvfl > p.lastPrintValue { + metrics.M.NetReadOverflows().Add(float64(m.RcvOvfl - p.lastPrintValue)) + } else { + metrics.M.NetReadOverflows().Add(float64(m.RcvOvfl)) + } p.Logger.Debug("Detected socket overflow", "total_cnt", m.RcvOvfl) p.lastPrintTimestamp = time.Now() p.lastPrintValue = m.RcvOvfl diff --git a/go/godispatcher/network/overlay.go b/go/godispatcher/network/overlay.go index 80c624ea9c..5965abfcc3 100644 --- a/go/godispatcher/network/overlay.go +++ b/go/godispatcher/network/overlay.go @@ -65,10 +65,12 @@ func (dp *NetToRingDataplane) Run() error { dst, err := ComputeDestination(&pkt.Info) if err != nil { log.Warn("unable to route packet", "err", err) - metrics.IncomingPackets.WithLabelValues(metrics.PacketOutcomeRouteNotFound).Inc() + metrics.M.NetReadPkts( + metrics.IncomingPacket{Result: metrics.PacketResultRouteNotFound}, + ).Inc() continue } - metrics.IncomingPackets.WithLabelValues(metrics.PacketOutcomeOk).Inc() + metrics.M.NetReadPkts(metrics.IncomingPacket{Result: metrics.PacketResultOk}).Inc() dst.Send(dp, pkt) } } @@ -96,7 +98,15 @@ func ComputeUDPDestination(packet *spkt.ScnPkt, header *l4.UDP) (Destination, er } } +// ComputeSCMPDestination decides which application to send the SCMP packet to. It also increments +// SCMP-related metrics. func ComputeSCMPDestination(packet *spkt.ScnPkt, header *scmp.Hdr) (Destination, error) { + metrics.M.SCMPReadPkts( + metrics.SCMP{ + Class: header.Class.String(), + Type: header.Type.Name(header.Class), + }, + ).Inc() if packet.DstHost.Type() != addr.HostTypeIPv4 && packet.DstHost.Type() != addr.HostTypeIPv6 { return nil, common.NewBasicError(ErrUnsupportedSCMPDestination, nil, "type", packet.DstHost.Type()) @@ -159,6 +169,7 @@ type UDPDestination net.UDPAddr func (d *UDPDestination) Send(dp *NetToRingDataplane, pkt *respool.Packet) { routingEntry, ok := dp.RoutingTable.LookupPublic(pkt.Info.DstIA, (*net.UDPAddr)(d)) if !ok { + metrics.M.AppNotFoundErrors().Inc() log.Warn("destination address not found", "ia", pkt.Info.DstIA, "udpAddr", (*net.UDPAddr)(d)) return @@ -175,6 +186,7 @@ func (d SVCDestination) Send(dp *NetToRingDataplane, pkt *respool.Packet) { // information found in the overlay IP header. routingEntries := dp.RoutingTable.LookupService(pkt.Info.DstIA, addr.HostSVC(d), nil) if len(routingEntries) == 0 { + metrics.M.AppNotFoundErrors().Inc() log.Warn("destination address not found", "ia", pkt.Info.DstIA, "svc", addr.HostSVC(d)) return } @@ -183,6 +195,7 @@ func (d SVCDestination) Send(dp *NetToRingDataplane, pkt *respool.Packet) { pkt.Dup() } for _, routingEntry := range routingEntries { + metrics.M.AppWriteSVCPkts(metrics.SVC{Type: addr.HostSVC(d).String()}).Inc() sendPacket(routingEntry, pkt) } } @@ -196,6 +209,7 @@ type SCMPAppDestination struct { func (d *SCMPAppDestination) Send(dp *NetToRingDataplane, pkt *respool.Packet) { routingEntry, ok := dp.RoutingTable.LookupID(pkt.Info.DstIA, d.ID) if !ok { + metrics.M.AppNotFoundErrors().Inc() log.Warn("destination address not found", "SCMP", d.ID) return } @@ -231,6 +245,17 @@ func (h SCMPHandlerDestination) Send(dp *NetToRingDataplane, pkt *respool.Packet return } + if scmpHdr, ok := pkt.Info.L4.(*scmp.Hdr); ok { + // above ok should always be true, because this handler only gets invoked when + // replying to SCMP packets + metrics.M.SCMPWritePkts( + metrics.SCMP{ + Class: scmpHdr.Class.String(), + Type: scmpHdr.Type.Name(scmpHdr.Class), + }, + ).Inc() + } + _, err = dp.OverlayConn.WriteTo(b[:n], pkt.OverlayRemote) if err != nil { log.Warn("Unable to write to overlay socket.", "err", err) diff --git a/go/godispatcher/network/table.go b/go/godispatcher/network/table.go index f48b402563..5247a1db22 100644 --- a/go/godispatcher/network/table.go +++ b/go/godispatcher/network/table.go @@ -29,7 +29,7 @@ type TableEntry struct { func newTableEntry(conn net.PacketConn) *TableEntry { // Construct application ingress ring buffer - appIngressRing := ringbuf.New(128, nil, "dispatcher") + appIngressRing := ringbuf.New(128, nil, "net_to_app_ring") return &TableEntry{ conn: conn, appIngressRing: appIngressRing, diff --git a/go/lib/scmp/scmp.go b/go/lib/scmp/scmp.go index 29bdac98d0..922e0df66b 100644 --- a/go/lib/scmp/scmp.go +++ b/go/lib/scmp/scmp.go @@ -105,7 +105,8 @@ const ( ) var typeNameMap = map[Class][]string{ - C_General: {"UNSPECIFIED", "ECHO_REQEST", "ECHO_REPLY"}, + C_General: {"UNSPECIFIED", "ECHO_REQUEST", "ECHO_REPLY", "TRACE_ROUTE_REQUEST", + "TRACE_ROUTE_REPLY", "RECORD_PATH_REQUEST", "RECORD_PATH_REPLY"}, C_Routing: {"UNREACH_NET", "UNREACH_HOST", "L2_ERROR", "UNREACH_PROTO", "UNREACH_PORT", "UNKNOWN_HOST", "BAD_HOST", "OVERSIZE_PKT", "ADMIN_DENIED"}, C_CmnHdr: {"BAD_VERSION", "BAD_DST_TYPE", "BAD_SRC_TYPE", @@ -120,10 +121,10 @@ var typeNameMap = map[Class][]string{ func (t Type) Name(c Class) string { names, ok := typeNameMap[c] - if !ok || int(t) > len(names) { - return fmt.Sprintf("Type(%d)", t) + if ok && int(t) < len(names) { + return fmt.Sprintf("%s(%d)", names[t], t) } - return fmt.Sprintf("%s(%d)", names[t], t) + return fmt.Sprintf("Type(%d)", t) } type ClassType struct {