From 8058323760e3165d634ce0cf82dfd02607fd4c9b Mon Sep 17 00:00:00 2001 From: Prem Chaitanya Prathi Date: Tue, 30 Jul 2024 19:26:49 +0530 Subject: [PATCH] fix: network monitor improvements (#2939) --- apps/networkmonitor/README.md | 18 +++++++- apps/networkmonitor/docker-compose.yml | 34 +++++++++++++++ apps/networkmonitor/networkmonitor.nim | 43 +++++++++---------- .../networkmonitor/networkmonitor_metrics.nim | 1 + apps/networkmonitor/prometheus.yaml | 9 ++++ waku/waku_metadata/protocol.nim | 3 +- 6 files changed, 82 insertions(+), 26 deletions(-) create mode 100644 apps/networkmonitor/docker-compose.yml create mode 100644 apps/networkmonitor/prometheus.yaml diff --git a/apps/networkmonitor/README.md b/apps/networkmonitor/README.md index 73143ccbf3..5d4322d4c9 100644 --- a/apps/networkmonitor/README.md +++ b/apps/networkmonitor/README.md @@ -18,13 +18,27 @@ networkmonitor [OPTIONS]... The following options are available: - -l, --log-level Sets the log level [=LogLevel.DEBUG]. + -l, --log-level Sets the log level [=LogLevel.INFO]. -t, --timeout Timeout to consider that the connection failed [=chronos.seconds(10)]. -b, --bootstrap-node Bootstrap ENR node. Argument may be repeated. [=@[""]]. --dns-discovery-url URL for DNS node list in format 'enrtree://@'. + --pubsub-topic Default pubsub topic to subscribe to. Argument may be repeated.. -r, --refresh-interval How often new peers are discovered and connected to (in seconds) [=5]. + --cluster-id Cluster id that the node is running in. Node in a different cluster id is + disconnected. [=1]. + --rln-relay Enable spam protection through rln-relay: true|false [=true]. + --rln-relay-dynamic Enable waku-rln-relay with on-chain dynamic group management: true|false + [=true]. + --rln-relay-tree-path Path to the RLN merkle tree sled db (https://github.com/spacejam/sled). + --rln-relay-eth-client-address HTTP address of an Ethereum testnet client e.g., http://localhost:8540/ + [=http://localhost:8540/]. + --rln-relay-eth-contract-address Address of membership contract on an Ethereum testnet. + --rln-relay-epoch-sec Epoch size in seconds used to rate limit RLN memberships. Default is 1 second. + [=1]. + --rln-relay-user-message-limit Set a user message limit for the rln membership registration. Must be a positive + integer. Default is 1. [=1]. --metrics-server Enable the metrics server: true|false [=true]. - --metrics-server-address Listening address of the metrics server. [=ValidIpAddress.init("127.0.0.1")]. + --metrics-server-address Listening address of the metrics server. [=parseIpAddress("127.0.0.1")]. --metrics-server-port Listening HTTP port of the metrics server. [=8008]. --metrics-rest-address Listening address of the metrics rest server. [=127.0.0.1]. --metrics-rest-port Listening HTTP port of the metrics rest server. [=8009]. diff --git a/apps/networkmonitor/docker-compose.yml b/apps/networkmonitor/docker-compose.yml new file mode 100644 index 0000000000..d7bf661994 --- /dev/null +++ b/apps/networkmonitor/docker-compose.yml @@ -0,0 +1,34 @@ +version: '3.8' +networks: + monitoring: + driver: bridge + +volumes: + prometheus-data: + driver: local + grafana-data: + driver: local + +# Services definitions +services: + + prometheus: + image: docker.io/prom/prometheus:latest + container_name: prometheus + ports: + - 9090:9090 + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml:ro + - ./data:/prometheus + restart: unless-stopped + + grafana: + image: grafana/grafana-oss:latest + container_name: grafana + ports: + - '3000:3000' + volumes: + - grafana-data:/var/lib/grafana + restart: unless-stopped diff --git a/apps/networkmonitor/networkmonitor.nim b/apps/networkmonitor/networkmonitor.nim index fe4062b35a..38591efaef 100644 --- a/apps/networkmonitor/networkmonitor.nim +++ b/apps/networkmonitor/networkmonitor.nim @@ -45,10 +45,10 @@ const MaxConnectedPeers = 150 const git_version* {.strdefine.} = "n/a" -proc setDiscoveredPeersCapabilities(routingTableNodes: seq[Node]) = +proc setDiscoveredPeersCapabilities(routingTableNodes: seq[waku_enr.Record]) = for capability in @[Relay, Store, Filter, Lightpush]: let nOfNodesWithCapability = - routingTableNodes.countIt(it.record.supportsCapability(capability)) + routingTableNodes.countIt(it.supportsCapability(capability)) info "capabilities as per ENR waku flag", capability = capability, amount = nOfNodesWithCapability networkmonitor_peer_type_as_per_enr.set( @@ -116,7 +116,7 @@ proc shouldReconnect(customPeerInfo: CustomPeerInfoRef): bool = # TODO: Split in discover, connect proc setConnectedPeersMetrics( - discoveredNodes: seq[Node], + discoveredNodes: seq[waku_enr.Record], node: WakuNode, timeout: chronos.Duration, restClient: RestClientRef, @@ -141,20 +141,10 @@ proc setConnectedPeersMetrics( # iterate all newly discovered nodes for discNode in discoveredNodes: - let typedRecord = discNode.record.toTypedRecord() - if not typedRecord.isOk(): - warn "could not convert record to typed record", record = discNode.record - continue - - let secp256k1 = typedRecord.get().secp256k1 - if not secp256k1.isSome(): - warn "could not get secp256k1 key", typedRecord = typedRecord.get() - continue - - let peerRes = toRemotePeerInfo(discNode.record) + let peerRes = toRemotePeerInfo(discNode) let peerInfo = peerRes.valueOr: - warn "error converting record to remote peer info", record = discNode.record + warn "error converting record to remote peer info", record = discNode continue # create new entry if new peerId found @@ -169,10 +159,17 @@ proc setConnectedPeersMetrics( let customPeerInfo = allPeers[peerId] customPeerInfo.lastTimeDiscovered = currentTime - customPeerInfo.enr = discNode.record.toURI() - customPeerInfo.enrCapabilities = discNode.record.getCapabilities().mapIt($it) + customPeerInfo.enr = discNode.toURI() + customPeerInfo.enrCapabilities = discNode.getCapabilities().mapIt($it) customPeerInfo.discovered += 1 + for maddr in peerInfo.addrs: + if $maddr notin customPeerInfo.maddrs: + customPeerInfo.maddrs.add $maddr + let typedRecord = discNode.toTypedRecord() + if not typedRecord.isOk(): + warn "could not convert record to typed record", record = discNode + continue if not typedRecord.get().ip.isSome(): warn "ip field is not set", record = typedRecord.get() continue @@ -301,13 +298,13 @@ proc crawlNetwork( while true: let startTime = Moment.now() # discover new random nodes - let discoveredNodes = await wakuDiscv5.protocol.queryRandom() + let discoveredNodes = await wakuDiscv5.findRandomPeers() # nodes are nested into bucket, flat it - let flatNodes = wakuDiscv5.protocol.routingTable.buckets.mapIt(it.nodes).flatten() + #let flatNodes = wakuDiscv5.protocol.routingTable.buckets.mapIt(it.nodes).flatten() # populate metrics related to capabilities as advertised by the ENR (see waku field) - setDiscoveredPeersCapabilities(flatNodes) + setDiscoveredPeersCapabilities(discoveredNodes) # tries to connect to all newly discovered nodes # and populates metrics related to peers we could connect @@ -321,10 +318,10 @@ proc crawlNetwork( # populate info from ip addresses await populateInfoFromIp(allPeersRef, restClient) - let totalNodes = flatNodes.len - let seenNodes = flatNodes.countIt(it.seen) + let totalNodes = discoveredNodes.len + #let seenNodes = totalNodes - info "discovered nodes: ", total = totalNodes, seen = seenNodes + info "discovered nodes: ", total = totalNodes #, seen = seenNodes # Notes: # we dont run ipMajorityLoop diff --git a/apps/networkmonitor/networkmonitor_metrics.nim b/apps/networkmonitor/networkmonitor_metrics.nim index fcd729826f..90d72e0edb 100644 --- a/apps/networkmonitor/networkmonitor_metrics.nim +++ b/apps/networkmonitor/networkmonitor_metrics.nim @@ -54,6 +54,7 @@ type enrCapabilities*: seq[string] country*: string city*: string + maddrs*: seq[string] # only after ok connection lastTimeConnected*: int64 diff --git a/apps/networkmonitor/prometheus.yaml b/apps/networkmonitor/prometheus.yaml new file mode 100644 index 0000000000..c7af03fcb6 --- /dev/null +++ b/apps/networkmonitor/prometheus.yaml @@ -0,0 +1,9 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + scrape_interval: 5s + static_configs: + - targets: ['host.docker.internal:8008'] + metrics_path: '/metrics' \ No newline at end of file diff --git a/waku/waku_metadata/protocol.nim b/waku/waku_metadata/protocol.nim index 3eb898d543..40b15a399e 100644 --- a/waku/waku_metadata/protocol.nim +++ b/waku/waku_metadata/protocol.nim @@ -83,7 +83,8 @@ proc initProtocolHandler(m: WakuMetadata) = remoteClusterId = response.clusterId, remoteShards = response.shards, localClusterId = m.clusterId, - localShards = m.shards + localShards = m.shards, + peer = conn.peerId discard await m.respond(conn)