diff --git a/javascript/packages/orchestrator/src/metrics/index.ts b/javascript/packages/orchestrator/src/metrics/index.ts index 6142fb812..bfa411d4d 100644 --- a/javascript/packages/orchestrator/src/metrics/index.ts +++ b/javascript/packages/orchestrator/src/metrics/index.ts @@ -21,10 +21,14 @@ enum metricKeysMapping { PeersCount = "sub_libp2p_peers_count", } -export async function fetchMetrics(metricUri: string): Promise { +export async function fetchMetrics( + metricUri: string, + node_name = "", +): Promise { let metrics = {}; // empty by default + const debug_msg = node_name ? `[${node_name}]` : ""; try { - debug(`fetching: ${metricUri}`); + debug([debug_msg, `fetching: ${metricUri}`].join(" ")); const fetchResult = await fetch(metricUri, { signal: TimeoutAbortController(2).signal, method: "GET", @@ -34,6 +38,9 @@ export async function fetchMetrics(metricUri: string): Promise { }); if (!fetchResult.ok) { + debug( + [debug_msg, `fetch error - status: ${fetchResult.status}`].join(" "), + ); throw new Error(`Error - status: ${fetchResult.status}`); } @@ -43,7 +50,7 @@ export async function fetchMetrics(metricUri: string): Promise { debug(`ERR: ${err}`); console.log( `\n${decorators.red(`Error`)} \t ${decorators.bright( - `fetching metrics from: ${metricUri}`, + `fetching metrics from: ${metricUri} ${debug_msg}`, )}`, ); } diff --git a/javascript/packages/orchestrator/src/networkNode.ts b/javascript/packages/orchestrator/src/networkNode.ts index 60ff97870..09634a1be 100644 --- a/javascript/packages/orchestrator/src/networkNode.ts +++ b/javascript/packages/orchestrator/src/networkNode.ts @@ -233,7 +233,9 @@ export class NetworkNode implements NetworkNodeInterface { desiredMetricValue === null || compare(comparator!, value, desiredMetricValue) ) { - debug(`value: ${value} ~ desiredMetricValue: ${desiredMetricValue}`); + debug( + `[${this.name}] value: ${value} ~ desiredMetricValue: ${desiredMetricValue}`, + ); return value; } } @@ -244,7 +246,9 @@ export class NetworkNode implements NetworkNodeInterface { while (!done && !timedout) { c++; await new Promise((resolve) => setTimeout(resolve, 1000)); - debug(`fetching metrics - q: ${c} time: ${new Date()}`); + debug( + `[${this.name}] Fetching metrics - q: ${c} time: ${new Date()}`, + ); this.cachedMetrics = await fetchMetrics(this.prometheusUri); value = this._getMetric(metricName, desiredMetricValue === null); @@ -256,7 +260,7 @@ export class NetworkNode implements NetworkNodeInterface { done = true; } else { debug( - `current value: ${value} for metric ${rawMetricName}, keep trying...`, + `[${this.name}] Current value: ${value} for metric ${rawMetricName}, keep trying...`, ); } } @@ -268,7 +272,7 @@ export class NetworkNode implements NetworkNodeInterface { setTimeout(() => { timedout = true; const err = new Error( - `Timeout(${timeout}), "getting desired metric value ${desiredMetricValue} within ${timeout} secs".`, + `[${this.name}] Timeout(${timeout}), "getting desired metric value ${desiredMetricValue} within ${timeout} secs".`, ); return resolve(err); }, timeout * 1000), diff --git a/javascript/packages/orchestrator/src/spawner.ts b/javascript/packages/orchestrator/src/spawner.ts index 95ce291a6..449795f75 100644 --- a/javascript/packages/orchestrator/src/spawner.ts +++ b/javascript/packages/orchestrator/src/spawner.ts @@ -122,16 +122,20 @@ export const spawnNode = async ( const endpointPort = RPC_WS_PORT; if (opts.inCI) { + // UPDATE: 04-10-2024 Since we have several reports of failures related to + // can't access metrics by dns, we switch back to use the pod ip. + // in CI we deploy a service (with the pod name) in front of each pod // so here we can use the name (as short dns in the ns) to connect to pod. - const nodeDns = `${podDef.metadata.name}.${namespace}.svc.cluster.local`; + // const nodeDns = `${podDef.metadata.name}.${namespace}.svc.cluster.local`; + const pod_ip = await client.getNodeIP(node.name); networkNode = new NetworkNode( node.name, - WS_URI_PATTERN.replace("{{IP}}", nodeDns).replace( + WS_URI_PATTERN.replace("{{IP}}", pod_ip).replace( "{{PORT}}", endpointPort.toString(), ), - METRICS_URI_PATTERN.replace("{{IP}}", nodeDns).replace( + METRICS_URI_PATTERN.replace("{{IP}}", pod_ip).replace( "{{PORT}}", PROMETHEUS_PORT.toString(), ),