Skip to content

Commit

Permalink
Host Environment Variable Forwarding (#4842)
Browse files Browse the repository at this point in the history
This PR adds the ability to securely forward host environment variables
to job executions. This enables passing credentials and secrets from the
host to jobs through a controlled allowlist mechanism.

## Key Changes

- Added support for referencing host environment variables using `env:`
prefix
- Implemented allowlist-based security controls at the compute node
level
- Added early validation in bid strategy to fail fast when jobs request
non-allowlisted variables

## Usage Example

```yaml
# Job specification
Tasks:
  - Name: main
    Env:
      API_KEY: "env:API_KEY"     # Forward host's API_KEY
      LOG_PATH: "/logs"          # Regular literal value
    Engine:
      Type: docker
      Params:
        Image: ubuntu:latest

# Compute node configuration
compute:
  env:
    allowlist:
      - "API_*"    # Allow forwarding of any env var starting with API_
```

## Security Design

- Host variables must be explicitly allowlisted using patterns (e.g.,
`API_*`)
- Jobs must explicitly request variables using `env:` prefix
- Early validation during bid phase prevents scheduling jobs that
request non-allowlisted variables
- Creates clear audit trail of which credentials each job requested

## Future Work

The architecture introduced in this PR is designed to be extensible to
support secret management systems. The `env:` prefix pattern will evolve
to support additional sources like:

```yaml
Tasks:
  - Name: main
    Env:
      API_KEY: "vault:secrets/api-key"    # HashiCorp Vault
      DB_PASS: "aws:prod/db/password"     # AWS Secrets Manager
      CERT: "azure:certificates/prod"      # Azure Key Vault
```

This foundation enables:
- Integration with popular secret vaults and cloud provider secret
managers
- Dynamic credential generation and rotation
- More granular access control patterns

The key difference from the previous implementation is that we now
support referencing host environment variables (prefixed with `env:`) in
addition to literal values. This is implemented with security in mind -
only explicitly allowlisted patterns can be forwarded, and jobs must
declare which variables they need for audit purposes.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- New Features
- Introduced dynamic environment variable resolution for jobs and
compute nodes, allowing secure usage of host variables through
customizable allow lists.
- Integrated environment variable handling into job execution and
bidding workflows for more flexible configurations.

- Documentation
- Updated API specifications and schema descriptions to clarify how
environment variables can be configured, including support for direct
values and host references.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
  • Loading branch information
wdbaruni authored Feb 9, 2025
1 parent d151162 commit e36b9ab
Show file tree
Hide file tree
Showing 30 changed files with 1,256 additions and 66 deletions.
50 changes: 50 additions & 0 deletions pkg/bidstrategy/semantic/env_resolver.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package semantic

import (
"context"
"fmt"

"github.com/bacalhau-project/bacalhau/pkg/bidstrategy"
"github.com/bacalhau-project/bacalhau/pkg/compute"
)

type EnvResolverStrategyParams struct {
Resolver compute.EnvVarResolver
}

type EnvResolverStrategy struct {
resolver compute.EnvVarResolver
}

// Compile-time check of interface implementation
var _ bidstrategy.SemanticBidStrategy = (*EnvResolverStrategy)(nil)

func NewEnvResolverStrategy(params EnvResolverStrategyParams) *EnvResolverStrategy {
return &EnvResolverStrategy{
resolver: params.Resolver,
}
}

const (
noEnvVarsReason = "accept jobs without environment variables"
canResolveReason = "resolve all required environment variables"
)

func (s *EnvResolverStrategy) ShouldBid(
ctx context.Context,
request bidstrategy.BidStrategyRequest,
) (bidstrategy.BidStrategyResponse, error) {
// If no env vars are requested, we can bid
if len(request.Job.Task().Env) == 0 {
return bidstrategy.NewBidResponse(true, noEnvVarsReason), nil
}

// Check if we can resolve all environment variables
for name, value := range request.Job.Task().Env {
if err := s.resolver.Validate(name, string(value)); err != nil {
return bidstrategy.NewBidResponse(false, fmt.Sprintf("resolve environment variable %s: %v", name, err)), nil
}
}

return bidstrategy.NewBidResponse(true, canResolveReason), nil
}
93 changes: 93 additions & 0 deletions pkg/bidstrategy/semantic/env_resolver_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//go:build unit || !integration

package semantic_test

import (
"context"
"fmt"
"testing"

"github.com/stretchr/testify/require"

"github.com/bacalhau-project/bacalhau/pkg/bidstrategy"
"github.com/bacalhau-project/bacalhau/pkg/bidstrategy/semantic"
"github.com/bacalhau-project/bacalhau/pkg/compute/env"
"github.com/bacalhau-project/bacalhau/pkg/models"
"github.com/bacalhau-project/bacalhau/pkg/test/mock"
)

func TestEnvResolverStrategy(t *testing.T) {
testCases := []struct {
name string
env map[string]models.EnvVarValue
allowList []string
shouldBid bool
}{
{
name: "no env vars",
env: map[string]models.EnvVarValue{},
allowList: []string{},
shouldBid: true,
},
{
name: "literal values only",
env: map[string]models.EnvVarValue{
"LITERAL_VAR": "literal-value",
},
allowList: []string{},
shouldBid: true,
},
{
name: "allowed host env var",
env: map[string]models.EnvVarValue{
"HOST_VAR": "env:TEST_VAR",
},
allowList: []string{"TEST_*"},
shouldBid: true,
},
{
name: "denied host env var",
env: map[string]models.EnvVarValue{
"DENIED_VAR": "env:DENIED_VAR",
},
allowList: []string{"TEST_*"},
shouldBid: false,
},
{
name: "mixed env vars with one denied",
env: map[string]models.EnvVarValue{
"LITERAL_VAR": "literal-value",
"HOST_VAR": "env:TEST_VAR",
"DENIED_VAR": "env:DENIED_VAR",
},
allowList: []string{"TEST_*"},
shouldBid: false,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
// Create resolver with test allowlist
resolver := env.NewResolver(env.ResolverParams{
AllowList: tc.allowList,
})

// Create strategy with resolver
strategy := semantic.NewEnvResolverStrategy(semantic.EnvResolverStrategyParams{
Resolver: resolver,
})

// Create job with test env vars
job := mock.Job()
job.Task().Env = tc.env

// Test bid strategy
response, err := strategy.ShouldBid(context.Background(), bidstrategy.BidStrategyRequest{
Job: *job,
})

require.NoError(t, err)
require.Equal(t, tc.shouldBid, response.ShouldBid, fmt.Sprintf("Reason: %s", response.Reason))
})
}
}
6 changes: 6 additions & 0 deletions pkg/compute/env/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package env

const (
// PrefixDelimiter is used to separate prefix from value in environment variables
PrefixDelimiter = ":"
)
22 changes: 22 additions & 0 deletions pkg/compute/env/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package env

import "github.com/bacalhau-project/bacalhau/pkg/bacerrors"

// Error codes for environment variable resolution
const (
errComponent = "EnvResolver"
)

func newErrNotAllowed(name string) bacerrors.Error {
return bacerrors.New("environment variable '%s' is not in allowed patterns", name).
WithCode(bacerrors.UnauthorizedError).
WithComponent(errComponent).
WithHint("Check allowed patterns of the compute node's configuration")
}

func newErrNotFound(name string) bacerrors.Error {
return bacerrors.New("required environment variable '%s' not found", name).
WithCode(bacerrors.NotFoundError).
WithComponent(errComponent).
WithHint("Check the host environment variables")
}
53 changes: 53 additions & 0 deletions pkg/compute/env/host_resolver.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package env

import (
"os"
"regexp"
)

// HostResolver handles host environment variable references
type HostResolver struct {
allowedPatterns []string
}

func NewHostResolver(allowList []string) *HostResolver {
return &HostResolver{
allowedPatterns: allowList,
}
}

func (h *HostResolver) Prefix() string {
return "env"
}

// Validate checks if the value is allowed
func (h *HostResolver) Validate(name string, value string) error {
if !h.isAllowed(value) {
return newErrNotAllowed(value)
}
return nil
}

// Value returns the value from host environment
func (h *HostResolver) Value(value string) (string, error) {
if !h.isAllowed(value) {
return "", newErrNotAllowed(value)
}

val, exists := os.LookupEnv(value)
if !exists {
return "", newErrNotFound(value)
}

return val, nil
}

func (h *HostResolver) isAllowed(varName string) bool {
for _, pattern := range h.allowedPatterns {
matched, err := regexp.MatchString(pattern, varName)
if err == nil && matched {
return true
}
}
return false
}
117 changes: 117 additions & 0 deletions pkg/compute/env/host_resolver_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
//go:build unit || !integration

package env

import (
"testing"

"github.com/stretchr/testify/suite"
)

type HostResolverSuite struct {
suite.Suite
resolver *HostResolver
}

func TestHostResolverSuite(t *testing.T) {
suite.Run(t, new(HostResolverSuite))
}

func (s *HostResolverSuite) SetupTest() {
s.resolver = NewHostResolver([]string{
"ALLOWED_*",
"TEST_VAR",
})
}

func (s *HostResolverSuite) TestValidate() {
tests := []struct {
name string
varName string
varValue string
shouldErr bool
}{
{
name: "allowed pattern",
varName: "job_var",
varValue: "ALLOWED_VALUE",
shouldErr: false,
},
{
name: "allowed exact match",
varName: "job_var",
varValue: "TEST_VAR",
shouldErr: false,
},
{
name: "not allowed",
varName: "job_var",
varValue: "DENIED_VALUE",
shouldErr: true,
},
}

for _, tt := range tests {
s.Run(tt.name, func() {
err := s.resolver.Validate(tt.varName, tt.varValue)
if tt.shouldErr {
s.Error(err)
} else {
s.NoError(err)
}
})
}
}

func (s *HostResolverSuite) TestValue() {
// Set test environment variables
s.T().Setenv("ALLOWED_VAR", "allowed_value")
s.T().Setenv("TEST_VAR", "test_value")
s.T().Setenv("DENIED_VAR", "denied_value")

tests := []struct {
name string
value string
expected string
shouldErr bool
errContains string
}{
{
name: "allowed pattern var exists",
value: "ALLOWED_VAR",
expected: "allowed_value",
shouldErr: false,
},
{
name: "allowed exact match var exists",
value: "TEST_VAR",
expected: "test_value",
shouldErr: false,
},
{
name: "not allowed var",
value: "DENIED_VAR",
shouldErr: true,
errContains: "not in allowed",
},
{
name: "allowed pattern var doesn't exist",
value: "ALLOWED_MISSING",
shouldErr: true,
errContains: "not found",
},
}

for _, tt := range tests {
s.Run(tt.name, func() {
val, err := s.resolver.Value(tt.value)
if tt.shouldErr {
s.Error(err)
s.Contains(err.Error(), tt.errContains)
} else {
s.NoError(err)
s.Equal(tt.expected, val)
}
})
}
}
Loading

0 comments on commit e36b9ab

Please sign in to comment.