Skip to content

Commit

Permalink
Merge pull request #1068 from threefoldtech/development-fix-tfrobot-w…
Browse files Browse the repository at this point in the history
…orkflow

exit with error if any of the deployments failed
  • Loading branch information
Eslam-Nawara authored Jun 9, 2024
2 parents 61e0756 + 0d625a3 commit 66ad53f
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 41 deletions.
7 changes: 5 additions & 2 deletions tfrobot/cmd/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"path/filepath"

"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
"github.com/threefoldtech/tfgrid-sdk-go/tfrobot/internal/parser"
"github.com/threefoldtech/tfgrid-sdk-go/tfrobot/pkg/deployer"
Expand Down Expand Up @@ -93,8 +94,10 @@ var deployCmd = &cobra.Command{
}

ctx := context.Background()
if err = deployer.RunDeployer(ctx, cfg, tfPluginClient, outputPath, debug); err != nil {
return fmt.Errorf("failed to run the deployer with error: %w", err)
if errs := deployer.RunDeployer(ctx, cfg, tfPluginClient, outputPath, debug); errs != nil {
log.Error().Msg("deployments failed with errors: ")
fmt.Println(errs)
os.Exit(1)
}

return nil
Expand Down
8 changes: 5 additions & 3 deletions tfrobot/cmd/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"path/filepath"

"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
"github.com/threefoldtech/tfgrid-sdk-go/tfrobot/internal/parser"
"github.com/threefoldtech/tfgrid-sdk-go/tfrobot/pkg/deployer"
Expand Down Expand Up @@ -80,9 +81,10 @@ var loadCmd = &cobra.Command{
return err
}

err = deployer.RunLoader(ctx, cfg, tfPluginClient, debug, outputPath)
if err != nil {
return fmt.Errorf("failed to load configured deployments with error: %w", err)
if err := deployer.RunLoader(ctx, cfg, tfPluginClient, debug, outputPath); err != nil {
log.Error().Msg("failed to load configured deployments")
fmt.Println(err)
os.Exit(1)
}

return nil
Expand Down
36 changes: 21 additions & 15 deletions tfrobot/pkg/deployer/deployer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ const (
maxGoroutinesCount = 100
)

func RunDeployer(ctx context.Context, cfg Config, tfPluginClient deployer.TFPluginClient, output string, debug bool) error {
func RunDeployer(ctx context.Context, cfg Config, tfPluginClient deployer.TFPluginClient, output string, debug bool) *multierror.Error {
passedGroups := map[string][]*workloads.Deployment{}
failedGroups := map[string]string{}
var failedGroupsErr *multierror.Error

ctx, cancel := context.WithCancel(ctx)
defer cancel()

Expand Down Expand Up @@ -74,8 +75,8 @@ func RunDeployer(ctx context.Context, cfg Config, tfPluginClient deployer.TFPlug

return nil
}); err != nil {
failedGroupsErr = multierror.Append(failedGroupsErr, fmt.Errorf("%s: %s", nodeGroup.Name, err.Error()))

failedGroups[nodeGroup.Name] = err.Error()
err := tfPluginClient.CancelByProjectName(fmt.Sprintf("vm/%s", nodeGroup.Name))
if err != nil {
log.Debug().Err(err).Send()
Expand All @@ -97,15 +98,20 @@ func RunDeployer(ctx context.Context, cfg Config, tfPluginClient deployer.TFPlug
endTime := time.Since(deploymentStart)

// load deployed deployments
outputBytes, err := loadAfterDeployment(ctx, tfPluginClient, passedGroups, failedGroups, cfg.MaxRetries, filepath.Ext(output) == ".json")
if err != nil {
return err
outputBytes, errs := loadAfterDeployment(ctx, tfPluginClient, passedGroups, cfg.MaxRetries, filepath.Ext(output) == ".json")
if errs != nil {
failedGroupsErr = multierror.Append(failedGroupsErr, errs.Errors...)
}

fmt.Println(string(outputBytes))
log.Info().Msgf("Deployment took %s", endTime)

return os.WriteFile(output, outputBytes, 0644)
err := os.WriteFile(output, outputBytes, 0644)
if err != nil {
log.Error().Err(err).Send()
}

return failedGroupsErr
}

func deployNodeGroup(
Expand Down Expand Up @@ -140,25 +146,25 @@ func loadAfterDeployment(
ctx context.Context,
tfPluginClient deployer.TFPluginClient,
deployedGroups map[string][]*workloads.Deployment,
failedGroups map[string]string,
retries uint64,
asJson bool,
) ([]byte, error) {
) ([]byte, *multierror.Error) {
var loadedgroups map[string][]vmOutput
var failedGroupsErr *multierror.Error

if len(deployedGroups) > 0 {
log.Info().Msg("Loading deployments")
groupsContracts := getDeploymentsContracts(deployedGroups)

var failed map[string]string
loadedgroups, failed = batchLoadNodeGroupsInfo(ctx, tfPluginClient, groupsContracts, retries)
loadedgroups, failedGroupsErr = batchLoadNodeGroupsInfo(ctx, tfPluginClient, groupsContracts, retries)
}

for nodeGroup, err := range failed {
failedGroups[nodeGroup] = err
}
output, err := parseDeploymentOutput(loadedgroups, asJson)
if err != nil {
log.Error().Err(err).Send()
}

return parseDeploymentOutput(loadedgroups, failedGroups, asJson)
return output, failedGroupsErr
}

func parseVMsGroup(vms []Vms, nodeGroup string, nodesIDs []int, sshKeys map[string]string) groupDeploymentsInfo {
Expand Down
47 changes: 26 additions & 21 deletions tfrobot/pkg/deployer/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"golang.org/x/sync/errgroup"

"github.com/hashicorp/go-multierror"
"github.com/rs/zerolog/log"
"github.com/sethvargo/go-retry"
"github.com/threefoldtech/tfgrid-sdk-go/grid-client/deployer"
Expand All @@ -23,31 +24,37 @@ import (

type NodeContracts map[uint32][]uint64

func RunLoader(ctx context.Context, cfg Config, tfPluginClient deployer.TFPluginClient, debug bool, output string) error {
func RunLoader(ctx context.Context, cfg Config, tfPluginClient deployer.TFPluginClient, debug bool, output string) *multierror.Error {
log.Info().Msg("Loading deployments")

asJson := filepath.Ext(output) == ".json"

groupsContracts, failed := getGroupsContracts(ctx, tfPluginClient, cfg.NodeGroups)
passedGroups, failedGroups := batchLoadNodeGroupsInfo(ctx, tfPluginClient, groupsContracts, cfg.MaxRetries)
groupsContracts, failedGroupsErr := getGroupsContracts(tfPluginClient, cfg.NodeGroups)
passedGroups, errs := batchLoadNodeGroupsInfo(ctx, tfPluginClient, groupsContracts, cfg.MaxRetries)

// add projects failed to be loaded
for group, err := range failed {
failedGroups[group] = err
if errs != nil {
failedGroupsErr = multierror.Append(failedGroupsErr, errs.Errors...)
}

outputBytes, err := parseDeploymentOutput(passedGroups, failedGroups, asJson)
outputBytes, err := parseDeploymentOutput(passedGroups, asJson)
if err != nil {
return err
failedGroupsErr = multierror.Append(failedGroupsErr, err)
}

fmt.Println(string(outputBytes))
return os.WriteFile(output, outputBytes, 0644)

err = os.WriteFile(output, outputBytes, 0644)
if err != nil {
log.Info().Err(err).Send()
}

return failedGroupsErr
}

func getGroupsContracts(ctx context.Context, tfPluginClient deployer.TFPluginClient, nodeGroups []NodesGroup) (map[string]NodeContracts, map[string]string) {
func getGroupsContracts(tfPluginClient deployer.TFPluginClient, nodeGroups []NodesGroup) (map[string]NodeContracts, *multierror.Error) {
loadedContracts := make(map[string]NodeContracts)
failedGroups := make(map[string]string)
var failedGroupsErr *multierror.Error

var lock sync.Mutex
var wg sync.WaitGroup
Expand All @@ -65,7 +72,7 @@ func getGroupsContracts(ctx context.Context, tfPluginClient deployer.TFPluginCli
defer lock.Unlock()

if err != nil {
failedGroups[nodeGroup] = err.Error()
failedGroupsErr = multierror.Append(failedGroupsErr, fmt.Errorf("%s: %s", nodeGroup, err.Error()))
return
}

Expand All @@ -74,18 +81,18 @@ func getGroupsContracts(ctx context.Context, tfPluginClient deployer.TFPluginCli
}

wg.Wait()
return loadedContracts, failedGroups
return loadedContracts, failedGroupsErr
}

func batchLoadNodeGroupsInfo(
ctx context.Context,
tfPluginClient deployer.TFPluginClient,
groupsContracts map[string]NodeContracts,
retries uint64,
) (map[string][]vmOutput, map[string]string) {
) (map[string][]vmOutput, *multierror.Error) {
trial := 1
failedGroups := map[string]string{}
nodeGroupsInfo := map[string][]vmOutput{}
var failedGroupsErr *multierror.Error

var lock sync.Mutex
var wg sync.WaitGroup
Expand Down Expand Up @@ -115,25 +122,23 @@ func batchLoadNodeGroupsInfo(
return nil
}); err != nil {
lock.Lock()
failedGroups[nodeGroup] = err.Error()
failedGroupsErr = multierror.Append(failedGroupsErr, fmt.Errorf("%s: %s", nodeGroup, err.Error()))
lock.Unlock()
}
}(nodeGroup, contracts)
}

wg.Wait()
return nodeGroupsInfo, failedGroups
return nodeGroupsInfo, failedGroupsErr
}

func parseDeploymentOutput(passedGroups map[string][]vmOutput, failedGroups map[string]string, asJson bool) ([]byte, error) {
func parseDeploymentOutput(passedGroups map[string][]vmOutput, asJson bool) ([]byte, error) {
var err error
var outputBytes []byte
outData := struct {
OK map[string][]vmOutput `json:"ok"`
Error map[string]string `json:"error"`
OK map[string][]vmOutput `json:"ok"`
}{
OK: passedGroups,
Error: failedGroups,
OK: passedGroups,
}

if asJson {
Expand Down

0 comments on commit 66ad53f

Please sign in to comment.