-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds 'main' and 'metrics' packages. (#3)
* Adds 'main' and 'metrics' packages. * Changes in response to comments and requests in PR #3. * Changes from comments and suggestions in PR #3. * Fixes a few tests for metrics package. * Renames struct element intervalSeries to just interval. Renames Prom metric lable 'node' to 'machine'. * Adds two new Prometheus metrics: disco_collect_duration_seconds (a histogram) and disco_collect_errors_total. * Outputs JSONL instead of indented JSON. Removes the node/machine label from Prom metrics (k8s auto-discovery should add it automatically). * Tests archive.Write by writing a set of models as JSONL to a file, then reading that file back into new set of models, and then checking for equality between the two. * Moves collectDuration and collectErrors out of the Metrics struct and instead as global package variables. Handles and unhandled error case.
- Loading branch information
Showing
7 changed files
with
854 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"flag" | ||
"log" | ||
"time" | ||
|
||
"github.com/m-lab/disco/config" | ||
"github.com/m-lab/disco/metrics" | ||
"github.com/m-lab/disco/snmp" | ||
"github.com/m-lab/go/flagx" | ||
"github.com/m-lab/go/prometheusx" | ||
"github.com/m-lab/go/rtx" | ||
"github.com/soniah/gosnmp" | ||
) | ||
|
||
var ( | ||
fCommunity = flag.String("community", "", "The SNMP community string for the switch.") | ||
fHostname = flag.String("hostname", "", "The FQDN of the node.") | ||
fListenAddress = flag.String("listen-address", ":8888", "Address to listen on for telemetry.") | ||
fMetricsFile = flag.String("metrics", "", "Path to YAML file defining metrics to scrape.") | ||
fWriteInterval = flag.Duration("write-interval", 300*time.Second, "Interval to write out JSON files e.g, 300s, 10m.") | ||
fTarget = flag.String("target", "", "Switch FQDN to scrape metrics from.") | ||
logFatal = log.Fatal | ||
mainCtx, mainCancel = context.WithCancel(context.Background()) | ||
) | ||
|
||
func main() { | ||
flag.Parse() | ||
rtx.Must(flagx.ArgsFromEnv(flag.CommandLine), "Could not parse env args") | ||
|
||
if len(*fCommunity) <= 0 { | ||
log.Fatal("SNMP community string must be passed as arg or env variable.") | ||
} | ||
|
||
if len(*fHostname) <= 0 { | ||
log.Fatal("Node's FQDN must be passed as an arg or env variable.") | ||
} | ||
|
||
goSNMP := &gosnmp.GoSNMP{ | ||
Target: *fTarget, | ||
Port: uint16(161), | ||
Community: *fCommunity, | ||
Version: gosnmp.Version2c, | ||
Timeout: time.Duration(5) * time.Second, | ||
Retries: 1, | ||
} | ||
err := goSNMP.Connect() | ||
rtx.Must(err, "Failed to connect to the SNMP server") | ||
|
||
config, err := config.New(*fMetricsFile) | ||
rtx.Must(err, "Could not create new metrics configuration") | ||
client := snmp.New(goSNMP) | ||
metrics := metrics.New(client, config, *fTarget, *fHostname) | ||
|
||
promSrv := prometheusx.MustServeMetrics() | ||
|
||
go func() { | ||
<-mainCtx.Done() | ||
goSNMP.Conn.Close() | ||
promSrv.Close() | ||
}() | ||
|
||
// Start scraping on a clean 10s boundary within a minute. Run in an very | ||
// tight loop to be sure we start things as early in the 10s boundary as | ||
// possible. | ||
for time.Now().Second()%10 != 0 { | ||
time.Sleep(1 * time.Millisecond) | ||
} | ||
|
||
writeTicker := time.NewTicker(*fWriteInterval) | ||
metrics.IntervalStart = time.Now() | ||
|
||
collectTicker := time.NewTicker(10 * time.Second) | ||
// Tickers wait for the configured duration before their first tick. We want | ||
// Collect() to run immedately, so manually kick off Collect() once | ||
// immediately after the ticker is created. | ||
metrics.Collect(client, config) | ||
|
||
for { | ||
select { | ||
case <-mainCtx.Done(): | ||
collectTicker.Stop() | ||
writeTicker.Stop() | ||
return | ||
case <-writeTicker.C: | ||
start := metrics.IntervalStart | ||
metrics.IntervalStart = time.Now() | ||
metrics.Write(start, time.Now()) | ||
case <-collectTicker.C: | ||
metrics.CollectStart = time.Now() | ||
metrics.Collect(client, config) | ||
} | ||
} | ||
} |
Oops, something went wrong.