diff --git a/README.md b/README.md index 2e0d79e..3e3661d 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ Fly Autoscaler ============== -A metrics-based autoscaler for Fly.io. The autoscaler supports polling for -metrics from a Prometheus instance and then computing the number of machines -based on those metrics. +The project is a metrics-based autoscaler for Fly.io. The autoscaler supports +polling for metrics from a Prometheus instance and then computing the number of +machines based on those metrics. ## How it works @@ -63,36 +63,78 @@ machines available for a Fly app. ## Usage +### Create an app for your autoscaler + +First, create an app for your autoscaler: + +```sh +$ fly apps create my-autoscaler +``` + +Then create a `fly.toml` for the deployment. Update the `TARGET_APP_NAME` with +the name of the app that you want to scale and update `MY_ORG` to the +organization where your Prometheus metrics live. + +```toml +app = "my-autoscaler" + +[build] +image = "flyio/fly-autoscaler:0.1" + +[env] +FAS_APP_NAME = "TARGET_APP_NAME" +FAS_EXPR = "ceil(queue_depth / 10)" +FAS_PROMETHEUS_ADDRESS = "https://api.fly.io/prometheus/MY_ORG" +FAS_PROMETHEUS_METRIC_NAME = "queue_depth" +FAS_PROMETHEUS_QUERY = "sum(queue_depth)" + +[metrics] +port = 9090 +path = "/metrics" +``` + + ### Create a deploy token -First, set up a new deploy token for your application so that the autoscaler -and fetch metrics and start machines with it: +Next, set up a new deploy token for the application you want to scale: + +```sh +$ fly tokens create deploy -a TARGET_APP_NAME +``` + +Set the token as a secret on your application: + +``` +$ fly secrets set FAS_API_TOKEN="FlyV1 ..." +``` + + +### Create a read-only token + +Create a token for reading your Prometheus data: ```sh -$ fly tokens create deploy -a MYAPP -n "fly-autoscaler" +$ fly tokens create readonly ``` -You can use this token by exporting it to an environment variable: +Set the token as a secret on your application: ``` -$ export FLY_ACCESS_TOKEN="FlyV1 ..." +$ fly secrets set FAS_PROMETHEUS_TOKEN="FlyV1 ..." ``` -### Running the server +### Deploy the server -To run the autoscaler as a server process, use the `serve` command. For example, -this will ensure that there are always 5 instances running at a time: +Finally, deploy your autoscaler application: ```sh -$ fly-autoscaler serve \ - -org MYORG \ - -app MYAPP \ - -prometheus.address "https://api.fly.io/prometheus/MYORG" - -prometheus.query "sum(fly_instance_up{app='MYAPP'})" \ - -prometheus.metric-name instance_up - -expr "max(instance_up, 5)" +$ fly deploy ``` +This should create a new machine and start it with the `fly-autoscaler` server +running. + + ### Testing your metrics & expression You can perform a one-time run of metrics collection & expression evaluation for @@ -101,11 +143,22 @@ perform any scaling of Fly Machines. It will only print the evaluated expression based on current metrics numbers. ```sh -$ fly-autoscaler eval \ - -org MYORG \ - -app MYAPP \ - -prometheus.address "https://api.fly.io/prometheus/MYORG" - -prometheus.query "sum(fly_instance_up{app='MYAPP'})" \ - -prometheus.metric-name instance_up - -expr "max(instance_up, 5)" +$ fly-autoscaler eval ``` + +You can change the evaluated expression by setting the `FAS_EXPR` environment +variable: + +```sh +$ FAS_EXPR=queue_depth fly-autoscaler eval +``` + +## Configuration + +You can also configure `fly-autoscaler` with a YAML config file if you don't +want to use environment variables or if you want to configure more than one +metric collector. + +Please see the reference [fly-autoscaler.yml][] for more details. + +[fly-autoscaler.yml]: ./cmd/fly-autoscaler/testdata/fly-autoscaler.yml diff --git a/cmd/fly-autoscaler/main_test.go b/cmd/fly-autoscaler/main_test.go new file mode 100644 index 0000000..04cf76e --- /dev/null +++ b/cmd/fly-autoscaler/main_test.go @@ -0,0 +1,48 @@ +package main_test + +import ( + "testing" + "time" + + main "github.com/superfly/fly-autoscaler/cmd/fly-autoscaler" +) + +func TestConfig_Parse(t *testing.T) { + var config main.Config + if err := main.ParseConfigFromFile("../../etc/fly-autoscaler.yml", &config); err != nil { + t.Fatal(err) + } + + if got, want := config.AppName, "TARGET_APP_NAME"; got != want { + t.Fatalf("AppName=%v, want %v", got, want) + } + if got, want := config.Expr, "ceil(queue_depth / 10)"; got != want { + t.Fatalf("Expr=%v, want %v", got, want) + } + if got, want := config.Interval, 15*time.Second; got != want { + t.Fatalf("Interval=%v, want %v", got, want) + } + if got, want := config.APIToken, "FlyV1 ..."; got != want { + t.Fatalf("APIToken=%v, want %v", got, want) + } + if got, want := config.Verbose, false; got != want { + t.Fatalf("Verbose=%v, want %v", got, want) + } + + mc := config.MetricCollectors[0] + if got, want := mc.Type, "prometheus"; got != want { + t.Fatalf("MC[0].Type=%v, want %v", got, want) + } + if got, want := mc.MetricName, "queue_depth"; got != want { + t.Fatalf("MC[0].MetricName=%v, want %v", got, want) + } + if got, want := mc.Address, "https://api.fly.io/prometheus/MY_ORG"; got != want { + t.Fatalf("MC[0].Address=%v, want %v", got, want) + } + if got, want := mc.Query, "sum(queue_depth)"; got != want { + t.Fatalf("MC[0].Query=%v, want %v", got, want) + } + if got, want := mc.Token, "FlyV1 ..."; got != want { + t.Fatalf("MC[0].Token=%v, want %v", got, want) + } +} diff --git a/etc/fly-autoscaler.yml b/etc/fly-autoscaler.yml new file mode 100644 index 0000000..57db9d8 --- /dev/null +++ b/etc/fly-autoscaler.yml @@ -0,0 +1,29 @@ +# The name of the target app that you want to scale. +app-name: "TARGET_APP_NAME" + +# An expression to calculate the number of machines of the target app that +# should be in a "started" state. Should return a number which will be rounded. +# +# This uses the Expr language to define expressions: https://expr-lang.org/ +expr: "ceil(queue_depth / 10)" + +# The frequency that the reconciliation loop will be run. +interval: "15s" + +# A Fly.io auth token that has permission to start machines for the target app. +# This is typically set via the FAS_API_TOKEN environment variable. +api-token: "FlyV1 ..." + +# If true, enables verbose debugging logging. +verbose: false + +# Metric collectors fetch the current metrics when a reconciliation is performed. +# +# They store the current value locally with a given metric name so that the +# expression can be used to calculate the machine count. +metric-collectors: + - type: "prometheus" + metric-name: "queue_depth" + address: "https://api.fly.io/prometheus/MY_ORG" + query: "sum(queue_depth)" + token: "FlyV1 ..."