Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

crt: onboard nomad-device-nvidia to crt #6

Merged
merged 4 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
* @hashicorp/nomad-eng

# release configuration
/.release/ @hashicorp/release-engineering
/.github/workflows/build.yml @hashicorp/release-engineering
89 changes: 89 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: build

on:
push:
workflow_dispatch:

env:
PKG_NAME: "nomad-device-nvidia"

jobs:
get-go-version:
name: "Determine Go toolchain version"
runs-on: ubuntu-20.04
outputs:
go-version: ${{ steps.get-go-version.outputs.go-version }}
steps:
- uses: actions/checkout@v2
- name: Determine Go version
id: get-go-version
run: |
echo "Building with Go $(cat .go-version)"
echo "::set-output name=go-version::$(cat .go-version)"

get-product-version:
runs-on: ubuntu-20.04
outputs:
product-version: ${{ steps.get-product-version.outputs.product-version }}
steps:
- uses: actions/checkout@v2
- name: get product version
id: get-product-version
run: |
make version
echo "::set-output name=product-version::$(make version)"

generate-metadata-file:
needs: get-product-version
runs-on: ubuntu-20.04
outputs:
filepath: ${{ steps.generate-metadata-file.outputs.filepath }}
steps:
- name: "Checkout directory"
uses: actions/checkout@v2
- name: Generate metadata file
id: generate-metadata-file
uses: hashicorp/actions-generate-metadata@v1
with:
version: ${{ needs.get-product-version.outputs.product-version }}
product: ${{ env.PKG_NAME }}
repositoryOwner: "hashicorp"
- uses: actions/upload-artifact@v2
if: ${{ !env.ACT }}
with:
name: metadata.json
path: ${{ steps.generate-metadata-file.outputs.filepath }}

build-linux:
needs:
- get-go-version
- get-product-version
runs-on: ubuntu-20.04
strategy:
matrix:
goos: ["linux"]
goarch: ["amd64"]
fail-fast: true

name: Go ${{ needs.get-go-version.outputs.go-version }} ${{ matrix.goos }} ${{ matrix.goarch }} build

steps:
- uses: actions/checkout@v2
- name: Setup go
uses: actions/setup-go@v2
with:
go-version: ${{ needs.get-go-version.outputs.go-version }}
- name: Build
env:
GOOS: ${{ matrix.goos }}
GOARCH: ${{ matrix.goarch }}
run: |
make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip
mv \
pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip \
${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip
- uses: actions/upload-artifact@v2
if: ${{ !env.ACT }}
with:
name: ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip
path: ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip
16 changes: 16 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Run CI Tests
on: [push]
env:
GOBIN: /opt/bin
GO_VERSION: 1.17.9
jobs:
run-tests:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: magnetikonline/action-golang-cache@v1
with:
go-version: ${{env.GO_VERSION}}
- name: Run Go Tests
run: |
make test
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
# Build output
bin/
pkg/
nomad-device-nvidia
1 change: 1 addition & 0 deletions .go-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.17.9
132 changes: 132 additions & 0 deletions .release/ci.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
schema = "1"

project "nomad-device-nvidia" {
team = "nomad"
slack {
notification_channel = "C03B5EWFW01"
}
github {
organization = "hashicorp"
repository = "nomad-device-nvidia"
release_branches = [
"main",
"crt-onboard",
]
}
}

event "merge" {
// "entrypoint" to use if build is not run automatically
// i.e. send "merge" complete signal to orchestrator to trigger build
}

event "build" {
depends = ["merge"]
action "build" {
organization = "hashicorp"
repository = "nomad-device-nvidia"
workflow = "build"
}
}

event "upload-dev" {
depends = ["build"]
action "upload-dev" {
organization = "hashicorp"
repository = "crt-workflows-common"
workflow = "upload-dev"
depends = ["build"]
}

notification {
on = "fail"
}
}

event "security-scan-binaries" {
depends = ["upload-dev"]
action "security-scan-binaries" {
organization = "hashicorp"
repository = "crt-workflows-common"
workflow = "security-scan-binaries"
config = "security-scan.hcl"
}

notification {
on = "fail"
}
}

event "sign" {
depends = ["security-scan-binaries"]
action "sign" {
organization = "hashicorp"
repository = "crt-workflows-common"
workflow = "sign"
}

notification {
on = "fail"
}
}

event "verify" {
depends = ["sign"]
action "verify" {
organization = "hashicorp"
repository = "crt-workflows-common"
workflow = "verify"
}

notification {
on = "always"
}
}

event "fossa-scan" {
depends = ["verify"]
action "fossa-scan" {
organization = "hashicorp"
repository = "crt-workflows-common"
workflow = "fossa-scan"
}
}

## These are promotion and post-publish events
## they should be added to the end of the file after the verify event stanza.

event "trigger-staging" {
// This event is dispatched by the bob trigger-promotion command
// and is required - do not delete.
}

event "promote-staging" {
depends = ["trigger-staging"]
action "promote-staging" {
organization = "hashicorp"
repository = "crt-workflows-common"
workflow = "promote-staging"
}

notification {
on = "always"
}
}

event "trigger-production" {
// This event is dispatched by the bob trigger-promotion command
// and is required - do not delete.
}

event "promote-production" {
depends = ["trigger-production"]
action "promote-production" {
organization = "hashicorp"
repository = "crt-workflows-common"
workflow = "promote-production"
}

notification {
on = "always"
}
}
8 changes: 8 additions & 0 deletions .release/security-scan.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
binary {
secrets = true
go_modules = true
#TODO: enable OSV scan once dependencies are updated.
osv = false
oss_index = false
nvd = false
}
56 changes: 49 additions & 7 deletions GNUmakefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,55 @@
default: build
SHELL = bash
default: help

build: bin/nomad-device-nvidia
GIT_COMMIT := $(shell git rev-parse --short HEAD)
GIT_DIRTY := $(if $(shell git status --porcelain),+CHANGES)

GO_LDFLAGS := "-X github.com/hashicorp/nomad-autoscaler/version.GitCommit=$(GIT_COMMIT)$(GIT_DIRTY)"

HELP_FORMAT=" \033[36m%-25s\033[0m %s\n"
.PHONY: help
help: ## Display this usage information
@echo "Valid targets:"
@grep -E '^[^ ]+:.*?## .*$$' $(MAKEFILE_LIST) | \
sort | \
awk 'BEGIN {FS = ":.*?## "}; \
{printf $(HELP_FORMAT), $$1, $$2}'
@echo ""

.PHONY: clean
clean: ## Cleanup previous build
@echo "==> Cleanup previous build"
rm -f ./bin/nomad-device-nvidia

pkg/%/nomad-device-nvidia: GO_OUT ?= $@
pkg/%/nomad-device-nvidia: ## Build nomad-device-nvidia for GOOS_GOARCH, e.g. pkg/linux_amd64/nomad-device-nvidia
@echo "==> Building $@ with tags $(GO_TAGS)..."
@GOOS=$(firstword $(subst _, ,$*)) \
GOARCH=$(lastword $(subst _, ,$*)) \
go build -trimpath -ldflags $(GO_LDFLAGS) -tags "$(GO_TAGS)" -o $(GO_OUT) cmd/main.go

.PRECIOUS: pkg/%/nomad-device-nvidia
pkg/%.zip: pkg/%/nomad-device-nvidia ## Build and zip nomad-device-nvidia for GOOS_GOARCH, e.g. pkg/linux_amd64.zip
@echo "==> Packaging for $@..."
zip -j $@ $(dir $<)*

.PHONY: dev
dev: clean bin/nomad-device-nvidia ## Build the nomad-device-nvidia plugin

bin/nomad-device-nvidia:
@echo "==> Building device driver ..."
mkdir -p bin
go build \
-o ./bin/nomad-device-nvidia \
./cmd/main.go
go build -o bin/nomad-device-nvidia cmd/main.go

.PHONY: test
test:
go test -v ./...
test: ## Run unit tests
@echo "==> Running tests ..."
go test -v -race ./...

.PHONY: version
version: ## Get the current version string
ifneq (,$(wildcard version/version_ent.go))
@$(CURDIR)/scripts/version.sh version/version.go version/version_ent.go
else
@$(CURDIR)/scripts/version.sh version/version.go version/version.go
endif
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ This repository provides an implementation of a Nomad
[`device`](https://www.nomadproject.io/docs/job-specification/device) plugin
for Nvidia GPUs.

**Note: this package is currently embedded in Nomad itself but the Nomad team intends to externalize the driver.**

## Behavior

The Nvidia device plugin uses
Expand Down
7 changes: 3 additions & 4 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ package main
import (
"context"

log "github.com/hashicorp/go-hclog"

"github.com/hashicorp/nomad/devices/gpu/nvidia"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad-device-nvidia"
"github.com/hashicorp/nomad/plugins"
)

Expand All @@ -15,6 +14,6 @@ func main() {
}

// factory returns a new instance of the Nvidia GPU plugin
func factory(ctx context.Context, log log.Logger) interface{} {
func factory(ctx context.Context, log hclog.Logger) interface{} {
return nvidia.NewNvidiaDevice(ctx, log)
}
12 changes: 6 additions & 6 deletions device.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package nvidia

import (
"context"
Expand All @@ -7,9 +7,9 @@ import (
"sync"
"time"

log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad-device-nvidia/nvml"
"github.com/hashicorp/nomad-device-nvidia/version"
"github.com/hashicorp/nomad/devices/gpu/nvidia/nvml"
"github.com/hashicorp/nomad/helper/pluginutils/loader"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/device"
Expand Down Expand Up @@ -45,7 +45,7 @@ var (
// PluginConfig is the nvidia factory function registered in the
// plugin catalog.
PluginConfig = &loader.InternalPluginConfig{
Factory: func(ctx context.Context, l log.Logger) interface{} { return NewNvidiaDevice(ctx, l) },
Factory: func(ctx context.Context, l hclog.Logger) interface{} { return NewNvidiaDevice(ctx, l) },
}

// pluginInfo describes the plugin
Expand Down Expand Up @@ -102,11 +102,11 @@ type NvidiaDevice struct {
devices map[string]struct{}
deviceLock sync.RWMutex

logger log.Logger
logger hclog.Logger
}

// NewNvidiaDevice returns a new nvidia device plugin.
func NewNvidiaDevice(_ context.Context, log log.Logger) *NvidiaDevice {
func NewNvidiaDevice(_ context.Context, log hclog.Logger) *NvidiaDevice {
nvmlClient, err := nvml.NewNvmlClient()
logger := log.Named(pluginName)
if err != nil && err.Error() != nvml.UnavailableLib.Error() {
Expand Down
Loading