From 973a6be34df81238fba973984079f23e296db35c Mon Sep 17 00:00:00 2001 From: Chris Baker <1675087+cgbaker@users.noreply.github.com> Date: Sun, 7 Mar 2021 16:21:38 +0000 Subject: [PATCH] wip: rebase --- .../docs/autoscaling/internals/index.mdx | 4 + .../autoscaling/internals/plugins/apm.mdx | 43 ++++++++++ .../autoscaling/internals/plugins/base.mdx | 35 ++++++++ .../autoscaling/internals/plugins/index.mdx | 40 +++++++++ .../internals/plugins/strategy.mdx | 82 +++++++++++++++++++ .../autoscaling/internals/plugins/target.mdx | 82 +++++++++++++++++++ .../autoscaling/plugins/external/index.mdx | 17 ++++ .../docs/autoscaling/plugins/index.mdx | 15 ++-- .../docs/autoscaling/plugins/target.mdx | 2 + website/data/docs-navigation.js | 23 +++++- 10 files changed, 335 insertions(+), 8 deletions(-) create mode 100644 website/content/docs/autoscaling/internals/plugins/apm.mdx create mode 100644 website/content/docs/autoscaling/internals/plugins/base.mdx create mode 100644 website/content/docs/autoscaling/internals/plugins/index.mdx create mode 100644 website/content/docs/autoscaling/internals/plugins/strategy.mdx create mode 100644 website/content/docs/autoscaling/internals/plugins/target.mdx create mode 100644 website/content/docs/autoscaling/plugins/external/index.mdx diff --git a/website/content/docs/autoscaling/internals/index.mdx b/website/content/docs/autoscaling/internals/index.mdx index 4def6a4be80..30d5c3b6eb2 100644 --- a/website/content/docs/autoscaling/internals/index.mdx +++ b/website/content/docs/autoscaling/internals/index.mdx @@ -11,3 +11,7 @@ description: > This section covers the internals of the Nomad Autoscaler and explains the technical details of how it functions, its architecture, and sub-systems. + +- [Autoscaler plugins](/docs/autoscaxling/internals/plugins) +- [Check calculations](/docs/autoscaling/interals/checks) + diff --git a/website/content/docs/autoscaling/internals/plugins/apm.mdx b/website/content/docs/autoscaling/internals/plugins/apm.mdx new file mode 100644 index 00000000000..bd8b9a7df28 --- /dev/null +++ b/website/content/docs/autoscaling/internals/plugins/apm.mdx @@ -0,0 +1,43 @@ +--- +layout: docs +page_title: APM Plugins +sidebar_title: APM +description: Learn how to author a Nomad Autoscaler APM plugin. +--- + +# APM Plugins + +APM plugins are used by the autoscaler to interact with an external APM system, +returning metrics that are used by the autoscaler to inform scaling actions. + +For a real-world example of a Nomad APM plugin implementation, see the +[Prometheus plugin](https://github.com/hashicorp/nomad-autoscaler/tree/main/plugins/builtin/apm/prometheus). + +## Authoring APM Plugins + +Authoring a device plugin in Go can be accomplished by implementing the +[apm.APM][apm_plugin] interface, alongside a main package to launch the plugin. + +The [no-op APM plugin][noop_plugin] can be used as a starting point for new APM +plugins. + +## APM Plugin API + +The [base plugin][base_plugin] interface must be implemented in addition to the +following functions. + +### `Query(query string, timeRange sdk.TimeRange) (sdk.TimestampedMetrics, error)` + +The `Query` [function][fingerprintfn] is called by the agent during policy +evaluation. The `query` argument is the opaque string from the scaling policy, +and the `timeRange` indicates the period of time over which the query should be +made. The returned metrics are passed to the configured strategy plugin as part +of the policy evaluation. + +### `QueryMultiple(query string, timeRange sdk.TimeRange) ([]sdk.TimestampedMetrics, error)` + +The `QueryMultiple` method is only by Dynamic Application Scaling. + +[apm_plugin]: https://github.com/hashicorp/nomad-autoscaler/blob/v0.3.0/plugins/apm/apm.go#L11 +[base_plugin]: /docs/autoscaling/internals/plugins/base +[noop_plugin]: https://github.com/hashicorp/nomad-autoscaler/tree/v0.3.0/plugins/test/noop-apm diff --git a/website/content/docs/autoscaling/internals/plugins/base.mdx b/website/content/docs/autoscaling/internals/plugins/base.mdx new file mode 100644 index 00000000000..8dae37d5dea --- /dev/null +++ b/website/content/docs/autoscaling/internals/plugins/base.mdx @@ -0,0 +1,35 @@ +--- +layout: docs +page_title: Base Plugin +sidebar_title: Base +description: Learn about how to author a Nomad Autoscaler plugin. +--- + +# Base Plugin + +The base plugin is a special plugin type implemented by all autoscaler plugins. +It allows for common plugin operations such as defining a configuration schema and +version information. + +## Plugin API + +#### `PluginInfo() (*PluginInfo, error)` + +A `PluginInfo` contains metadata about the plugin. For example, +the Prometheus APM plugin returns the following; + +```go +PluginInfo{ + // Name of the plugin + Name: "prometheus", + // Plugin type: "apm", "strategy", or "target" + PluginType: "apm" +} +``` + +#### `SetConfig(config map[string]string) error` + +The `SetConfig` function is called when starting an instance of the plugin. It contains the +configuration for a named instance of the plugin as provided in the autoscaler [agent config][plugin_config]. + +[plugin_config]: /docs/autoscaling/agent diff --git a/website/content/docs/autoscaling/internals/plugins/index.mdx b/website/content/docs/autoscaling/internals/plugins/index.mdx new file mode 100644 index 00000000000..79b7dafd827 --- /dev/null +++ b/website/content/docs/autoscaling/internals/plugins/index.mdx @@ -0,0 +1,40 @@ +--- +layout: docs +page_title: Plugins +sidebar_title: Plugins +description: Learn about how external plugins work in the Nomad Autoscaler. +--- + +# Plugins + +The Nomad Autoscaler uses a plugin framework which allows users to extend its +functionality. The design of the plugin system is inspired by +[plugin system][nomad_plugin_system] used in Nomad for task drivers and +devices. + +The following components are currently pluggable within Nomad: + +- [APMs](/docs/autoscaling/internals/plugins/apm) +- [Strategies](/docs/autoscaling/internals/plugins/strategy) +- [Targets](/docs/autoscaling/internals/plugins/target) + +In addition, each plugin implements a [base](/docs/autoscaling/internals/plugins/base) plugin functionality + +# Architecture + +The Nomad Autoscaler plugin framework uses the [go-plugin][goplugin] project to expose +a language independent plugin interface. Plugins implement a set of gRPC +services and methods which the Autoscaler agent manages by running the plugin +and calling the implemented RPCs. This means that plugins are free to be +implemented in the author's language of choice. + +To make plugin development easier, a set of go interfaces and structs exist for +each plugin type that abstract the go-plugin and gRPC interfaces. The guides in +this documentation reference these abstractions for ease of use. + +The existing plugins can serve as examples; in addition, no-op external plugins +are available in the [autoscaler repo](noop_plugins). + +[goplugin]: https://github.com/hashicorp/go-plugin +[nomad_plugin_system]: /docs/internals/plugins +[noop_plugins]: https://github.com/hashicorp/nomad-autoscaler/tree/main/plugins/test diff --git a/website/content/docs/autoscaling/internals/plugins/strategy.mdx b/website/content/docs/autoscaling/internals/plugins/strategy.mdx new file mode 100644 index 00000000000..db495410dcd --- /dev/null +++ b/website/content/docs/autoscaling/internals/plugins/strategy.mdx @@ -0,0 +1,82 @@ +--- +layout: docs +page_title: Strategy Plugins +sidebar_title: Strategy +description: Learn how to author a Nomad Autoscaler strategy plugin. +--- + +# Devices + +Nomad has built-in support for scheduling compute resources such as CPU, memory, +and networking. Nomad device plugins are used to support scheduling tasks with +other devices, such as GPUs. They are responsible for fingerprinting these +devices and working with the Nomad client to make them available to assigned +tasks. + +For a real world example of a Nomad device plugin implementation, see the [Nvidia +GPU plugin](https://github.com/hashicorp/nomad/tree/master/devices/gpu/nvidia). + +## Authoring Device Plugins + +Authoring a device plugin in Nomad consists of implementing the +[DevicePlugin][deviceplugin] interface alongside +a main package to launch the plugin. + +The [device plugin skeleton project][skeletonproject] exists to help bootstrap +the development of new device plugins. It provides most of the boilerplate +necessary for a device plugin, along with detailed comments. + +### Lifecycle and State + +A device plugin is long-lived. Nomad will ensure that one instance of the plugin is +running. If the plugin crashes or otherwise terminates, Nomad will launch another +instance of it. + +However, unlike [task drivers](/docs/internals/plugins/task-drivers), device plugins do not currently +have an interface for persisting state to the Nomad client. Instead, the device +plugin API emphasizes fingerprinting devices and reporting their status. After +helping to provision a task with a scheduled device, a device plugin does not +have any responsibility (or ability) to monitor the task. + +## Device Plugin API + +The [base plugin][baseplugin] must be implemented in addition to the following +functions. + +### `Fingerprint(context.Context) (<-chan *FingerprintResponse, error)` + +The `Fingerprint` [function][fingerprintfn] is called by the client when the plugin is started. +It allows the plugin to provide Nomad with a list of discovered devices, along with their +attributes, for the purpose of scheduling workloads using devices. +The channel returned should immediately send an initial +[`FingerprintResponse`][fingerprintresponse], then send periodic updates at +an appropriate interval until the context is canceled. + +Each fingerprint response consists of either an error or a list of device groups. +A device group is a list of detected devices that are identical for the purpose of +scheduling; that is, they will have identical attributes. + +### `Stats(context.Context, time.Duration) (<-chan *StatsResponse, error)` + +The `Stats` [function][statsfn] returns a channel on which the plugin should +emit device statistics, at the specified interval, until either an error is +encountered or the specified context is cancelled. The `StatsReponse` object +allows [dimensioned][dimensioned] statistics to be returned for each device in a device group. + +### `Reserve(deviceIDs []string) (*ContainerReservation, error)` + +The `Reserve` [function][reservefn] accepts a list of device IDs and returns the information +necessary for the client to make those devices available to a task. Currently, +the `ContainerReservation` object allows the plugin to specify environment +variables for the task, as well as a list of host devices and files to be mounted +into the task's filesystem. Any orchestration required to prepare the device for +use should also be performed in this function. + +[deviceplugin]: https://github.com/hashicorp/nomad/blob/v0.9.0/plugins/device/device.go#L20-L33 +[baseplugin]: /docs/internals/plugins/base +[skeletonproject]: https://github.com/hashicorp/nomad-skeleton-device-plugin +[fingerprintresponse]: https://github.com/hashicorp/nomad/blob/v0.9.0/plugins/device/device.go#L37-L43 +[fingerprintfn]: https://github.com/hashicorp/nomad-skeleton-device-plugin/blob/v0.1.0/device/device.go#L159-L165 +[statsfn]: https://github.com/hashicorp/nomad-skeleton-device-plugin/blob/v0.1.0/device/device.go#L169-L176 +[reservefn]: https://github.com/hashicorp/nomad-skeleton-device-plugin/blob/v0.1.0/device/device.go#L189-L245 +[dimensioned]: https://github.com/hashicorp/nomad/blob/v0.9.0/plugins/shared/structs/stats.go#L33-L34 diff --git a/website/content/docs/autoscaling/internals/plugins/target.mdx b/website/content/docs/autoscaling/internals/plugins/target.mdx new file mode 100644 index 00000000000..91b31d65c05 --- /dev/null +++ b/website/content/docs/autoscaling/internals/plugins/target.mdx @@ -0,0 +1,82 @@ +--- +layout: docs +page_title: Target Plugins +sidebar_title: Target +description: Learn how to author a Nomad Autoscaler target plugin. +--- + +# Devices + +Nomad has built-in support for scheduling compute resources such as CPU, memory, +and networking. Nomad device plugins are used to support scheduling tasks with +other devices, such as GPUs. They are responsible for fingerprinting these +devices and working with the Nomad client to make them available to assigned +tasks. + +For a real world example of a Nomad device plugin implementation, see the [Nvidia +GPU plugin](https://github.com/hashicorp/nomad/tree/master/devices/gpu/nvidia). + +## Authoring Device Plugins + +Authoring a device plugin in Nomad consists of implementing the +[DevicePlugin][deviceplugin] interface alongside +a main package to launch the plugin. + +The [device plugin skeleton project][skeletonproject] exists to help bootstrap +the development of new device plugins. It provides most of the boilerplate +necessary for a device plugin, along with detailed comments. + +### Lifecycle and State + +A device plugin is long-lived. Nomad will ensure that one instance of the plugin is +running. If the plugin crashes or otherwise terminates, Nomad will launch another +instance of it. + +However, unlike [task drivers](/docs/internals/plugins/task-drivers), device plugins do not currently +have an interface for persisting state to the Nomad client. Instead, the device +plugin API emphasizes fingerprinting devices and reporting their status. After +helping to provision a task with a scheduled device, a device plugin does not +have any responsibility (or ability) to monitor the task. + +## Device Plugin API + +The [base plugin][baseplugin] must be implemented in addition to the following +functions. + +### `Fingerprint(context.Context) (<-chan *FingerprintResponse, error)` + +The `Fingerprint` [function][fingerprintfn] is called by the client when the plugin is started. +It allows the plugin to provide Nomad with a list of discovered devices, along with their +attributes, for the purpose of scheduling workloads using devices. +The channel returned should immediately send an initial +[`FingerprintResponse`][fingerprintresponse], then send periodic updates at +an appropriate interval until the context is canceled. + +Each fingerprint response consists of either an error or a list of device groups. +A device group is a list of detected devices that are identical for the purpose of +scheduling; that is, they will have identical attributes. + +### `Stats(context.Context, time.Duration) (<-chan *StatsResponse, error)` + +The `Stats` [function][statsfn] returns a channel on which the plugin should +emit device statistics, at the specified interval, until either an error is +encountered or the specified context is cancelled. The `StatsReponse` object +allows [dimensioned][dimensioned] statistics to be returned for each device in a device group. + +### `Reserve(deviceIDs []string) (*ContainerReservation, error)` + +The `Reserve` [function][reservefn] accepts a list of device IDs and returns the information +necessary for the client to make those devices available to a task. Currently, +the `ContainerReservation` object allows the plugin to specify environment +variables for the task, as well as a list of host devices and files to be mounted +into the task's filesystem. Any orchestration required to prepare the device for +use should also be performed in this function. + +[deviceplugin]: https://github.com/hashicorp/nomad/blob/v0.9.0/plugins/device/device.go#L20-L33 +[baseplugin]: /docs/internals/plugins/base +[skeletonproject]: https://github.com/hashicorp/nomad-skeleton-device-plugin +[fingerprintresponse]: https://github.com/hashicorp/nomad/blob/v0.9.0/plugins/device/device.go#L37-L43 +[fingerprintfn]: https://github.com/hashicorp/nomad-skeleton-device-plugin/blob/v0.1.0/device/device.go#L159-L165 +[statsfn]: https://github.com/hashicorp/nomad-skeleton-device-plugin/blob/v0.1.0/device/device.go#L169-L176 +[reservefn]: https://github.com/hashicorp/nomad-skeleton-device-plugin/blob/v0.1.0/device/device.go#L189-L245 +[dimensioned]: https://github.com/hashicorp/nomad/blob/v0.9.0/plugins/shared/structs/stats.go#L33-L34 diff --git a/website/content/docs/autoscaling/plugins/external/index.mdx b/website/content/docs/autoscaling/plugins/external/index.mdx new file mode 100644 index 00000000000..21450fa2a54 --- /dev/null +++ b/website/content/docs/autoscaling/plugins/external/index.mdx @@ -0,0 +1,17 @@ +--- +layout: docs +page_title: 'Autoscaler Plugins: Community Supported' +sidebar_title: Community +description: A list of community-supported Autoscaler Plugins. +--- + +# Community Supported + +If you have authored an autoscaler plugin that you believe will be useful to the +broader Nomad community and you are committed to maintaining the plugin, please +file a PR to add your plugin to this page. + +For details on authoring an autoscaler plugin, please refer to the [plugin +authoring guide][plugin_guide]. + +[plugin_guide]: /docs/autoscaling/internals/plugins diff --git a/website/content/docs/autoscaling/plugins/index.mdx b/website/content/docs/autoscaling/plugins/index.mdx index ab37e4f83ee..f52321c738e 100644 --- a/website/content/docs/autoscaling/plugins/index.mdx +++ b/website/content/docs/autoscaling/plugins/index.mdx @@ -9,15 +9,17 @@ description: Plugins are used to architect the Nomad Autoscaler into distinct ar Plugins are an essential part of the Nomad Autoscaler architecture. The Autoscaler uses the [go-plugin][go_plugin_github] library to implement an ecosystem of -different types of plugins. Each plugin type is responsible for a specific task; -APM plugins retrieve metrics about the workloads being monitored and Strategy -plugins decide which actions Nomad should execute to keep the policy valid. The -flexibility of plugins allows the Nomad Autoscaler to be extended to meet specific -business requirements or technology use cases. +different types of plugins. Each plugin type is responsible for a specific task: +APM plugins retrieve metrics about the workloads being monitored; strategy +plugins decide the scaling action to satisfy the scaling policy; and target +plugins perform the scaling action. The flexibility of plugins allows the Nomad +Autoscaler to be extended to meet specific business requirements or technology +use cases. The Nomad Autoscaler currently ships with a number of built-in plugins to ease the learning curve. Details of these can be found in the side menu, under the -specific plugin type sections. +specific plugin type sections. The autoscaler also supports external plugins; see +this list of [community-supported plugins][community_plugins]. # General Options @@ -63,3 +65,4 @@ targets. strongly discouraged. [go_plugin_github]: https://github.com/hashicorp/go-plugin +[community_plugins]: /docs/autoscaling/plugins/external diff --git a/website/content/docs/autoscaling/plugins/target.mdx b/website/content/docs/autoscaling/plugins/target.mdx index e5b6f0d976d..20324c00a88 100644 --- a/website/content/docs/autoscaling/plugins/target.mdx +++ b/website/content/docs/autoscaling/plugins/target.mdx @@ -19,6 +19,8 @@ Below is a list of plugins you can use with the Nomad Autoscaler: - [Azure Virtual Machine Scale Set][azure_vmss_target] - [Google Cloud Platform Managed Instance Groups][gcp_mig_target] + + ## Nomad Task Group Target The Nomad task group target indicates the scalable resource is a Nomad job diff --git a/website/data/docs-navigation.js b/website/data/docs-navigation.js index 0b2779c364a..093adf34d80 100644 --- a/website/data/docs-navigation.js +++ b/website/data/docs-navigation.js @@ -419,11 +419,30 @@ export default [ 'telemetry', { category: 'plugins', - content: ['apm', 'strategy', 'target'], + content: [ + 'apm', + 'strategy', + 'target', + { + category: 'external', + content: [], + } + ], }, { category: 'internals', - content: ['checks'], + content: [ + 'checks', + { + category: 'plugins', + content: [ + 'base', + 'apm', + 'strategy', + 'target', + ], + } + ], }, ], },