Skip to content

Commit

Permalink
feat: bat command to build-and-test applications
Browse files Browse the repository at this point in the history
This moves the data-prep-kit tests out of tests/tests and into demos/data-prep-kit, and updates their layout to improve compatbility with the test-data scheme.

Also fixes: tester should Down() the run

Signed-off-by: Nick Mitchell <nickm@us.ibm.com>
  • Loading branch information
starpit committed Dec 2, 2024
1 parent 483a73d commit 868449b
Show file tree
Hide file tree
Showing 218 changed files with 316 additions and 767 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ jobs:
- ./tests/bin/ci.sh -i 'test7(b.*|c.*|d.*|e.*|g.*|h.*)'
- ./tests/bin/ci.sh -i 'test7f.*'
- ./tests/bin/ci.sh -i 'test8.*'
- ./tests/bin/ci.sh -i 'python-code.*'
- ./tests/bin/ci.sh -i 'python-language.*'
- ./tests/bin/ci.sh -i 'python-universal.*'
- /tmp/lunchpail bat demos/data-prep-kit --concurrency 1 --auto-clean --target=$LUNCHPAIL_TARGET # bat=Build and Test
- ./tests/bin/go.sh
- ./tests/bin/pipelines.sh
os: [ubuntu-latest]
Expand Down
1 change: 1 addition & 0 deletions cmd/options/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ func AddBuildOptions(cmd *cobra.Command) (*build.Options, error) {

cmd.Flags().IntVar(&options.Pack, "pack", options.Pack, "Run k concurrent tasks; if k=0 and machine has N cores, then k=N")
cmd.Flags().BoolVarP(&options.Gunzip, "gunzip", "z", options.Gunzip, "Gunzip inputs before passing them to the worker logic")
cmd.Flags().BoolVar(&options.AutoClean, "auto-clean", options.AutoClean, "Clean up any caches prior to exiting")

AddTargetOptionsTo(cmd, &options)
AddLogOptionsTo(cmd, &options)
Expand Down
43 changes: 43 additions & 0 deletions cmd/subcommands/bat.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//go:build full || manage

package subcommands

import (
"context"

"github.com/spf13/cobra"

"lunchpail.io/cmd/options"
"lunchpail.io/pkg/be"
"lunchpail.io/pkg/boot"
)

func init() {
var cmd = &cobra.Command{
Use: "bat",
Short: "Build and test",
Long: "Build and test",
Args: cobra.MatchAll(cobra.MinimumNArgs(1), cobra.OnlyValidArgs),
}

buildOpts, err := options.AddBuildOptions(cmd)
if err != nil {
panic(err)
}

concurrency := 4
cmd.Flags().IntVarP(&concurrency, "concurrency", "j", concurrency, "Maximum tests to run concurrently")

cmd.RunE = func(cmd *cobra.Command, args []string) error {
ctx := context.Background()

backend, err := be.NewInitOk(ctx, true, *buildOpts)
if err != nil {
return err
}

return boot.BuildAndTester{Backend: backend, Concurrency: concurrency, Options: *buildOpts}.RunAll(ctx, args)
}

rootCmd.AddCommand(cmd)
}
10 changes: 6 additions & 4 deletions cmd/subcommands/tester.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (

"lunchpail.io/cmd/options"
"lunchpail.io/pkg/be"
"lunchpail.io/pkg/be/target"
"lunchpail.io/pkg/boot"
"lunchpail.io/pkg/build"
)
Expand All @@ -27,16 +26,19 @@ func init() {
panic(err)
}

quiet := false
cmd.Flags().BoolVarP(&quiet, "quiet", "q", quiet, "Do not show stdout of application being tested")

cmd.RunE = func(cmd *cobra.Command, args []string) error {
ctx := context.Background()

buildOpts.Target.Platform = target.Local
backend, err := be.New(ctx, *buildOpts)
buildOpts.CreateNamespace = true
backend, err := be.NewInitOk(ctx, true, *buildOpts)
if err != nil {
return err
}

return boot.Tester{Backend: backend, Options: *buildOpts}.RunAll(ctx)
return boot.Tester{Quiet: quiet, Backend: backend, Options: *buildOpts}.RunAll(ctx)
}

rootCmd.AddCommand(cmd)
Expand Down
3 changes: 2 additions & 1 deletion cmd/subcommands/up.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ func newUpCmd() *cobra.Command {
return err
}

return boot.Up(ctx, backend, boot.UpOptions{BuildOptions: *buildOpts, DryRun: dryrunFlag, Watch: watchFlag, Inputs: args, Executable: os.Args[0], NoRedirect: noRedirect})
_, err = boot.Up(ctx, backend, boot.UpOptions{BuildOptions: *buildOpts, DryRun: dryrunFlag, Watch: watchFlag, WatchUtil: watchFlag, Inputs: args, Executable: os.Args[0], NoRedirect: noRedirect})
return err
}

return cmd
Expand Down
3 changes: 3 additions & 0 deletions demos/data-prep-kit/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Lunchpail data-prep-kit Demos

These are ports of most of the transforms from the [data-prep-kit](https://github.com/IBM/data-prep-kit).
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
data-prep-toolkit==0.2.2.dev1
scancode-toolkit ; platform_system != 'Darwin'
scancode-toolkit-mini

# we can probably update to 18+, but we will have to re-generate expected output as pyarrow 18 seems to have resulted in a binary format change
pyarrow<17
Expand Down
File renamed without changes.
Binary file not shown.
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions demos/data-prep-kit/universal/resize/env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
max_rows_per_table: 125
1 change: 1 addition & 0 deletions demos/data-prep-kit/universal/tokenization/command
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python3.12 main.py
1 change: 1 addition & 0 deletions demos/data-prep-kit/universal/tokenization/image
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker.io/python:3.12
161 changes: 161 additions & 0 deletions pkg/boot/bat.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
//go:build full || manage

package boot

import (
"bufio"
"context"
"fmt"
"io"
"io/fs"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"slices"

"golang.org/x/sync/errgroup"

"lunchpail.io/pkg/be"
"lunchpail.io/pkg/build"
"lunchpail.io/pkg/fe/builder"
"lunchpail.io/pkg/fe/builder/overlay"
"lunchpail.io/pkg/observe/colors"
)

type BuildAndTester struct {
Concurrency int
be.Backend
build.Options
}

// Run build&test for all applications in all of the given `dirs`
func (t BuildAndTester) RunAll(ctx context.Context, dirs []string) error {
fmt.Fprintln(os.Stderr, "Starting build and test for", dirs)

dirForBinaries, err := ioutil.TempDir("", "lunchpail-bat-")
if err != nil {
return err
}
defer os.RemoveAll(dirForBinaries)

group, gctx := errgroup.WithContext(ctx)
if t.Concurrency != 0 {
group.SetLimit(t.Concurrency)
}

for _, dir := range dirs {
if err := t.RunDir(gctx, group, dir, dirForBinaries); err != nil {
return err
}
}

return group.Wait()
}

// Run build&test for all applications in the given `dir`
func (t BuildAndTester) RunDir(ctx context.Context, group *errgroup.Group, dir, dirForBinaries string) error {
return filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if d.Name() == "src" || d.Name() == "test-data" {
return fs.SkipDir
} else if !d.IsDir() || d.Name() == filepath.Base(dir) {
return nil
}

if files, err := os.ReadDir(path); err != nil {
return err
} else if slices.IndexFunc(files, func(f fs.DirEntry) bool { return f.Name() == "src" || f.Name() == "test-data" }) < 0 {
// not an app directory
if t.Options.Verbose() {
fmt.Fprintln(os.Stderr, "Skipping build and test for", path)
}
return nil
}

group.Go(func() error {
binaryRelPath, err := filepath.Rel(dir, path)
if err != nil {
return err
}
binaryFullPath := filepath.Join(dirForBinaries, binaryRelPath)
return t.Run(ctx, path, binaryRelPath, binaryFullPath)
})

return nil
})
}

// Run one build&test for the application specified in `sourcePath`, storing the build in `binaryFullPath`
func (t BuildAndTester) Run(ctx context.Context, sourcePath, binaryRelPath, binaryFullPath string) error {
select {
case <-ctx.Done():
return nil
default:
}

if err := builder.Build(
ctx,
sourcePath,
builder.Options{
Name: binaryFullPath,
OverlayOptions: overlay.Options{BuildOptions: t.Options},
},
); err != nil {
return err
}

args := []string{"test"}
if t.Options.Verbose() {
args = append(args, "--verbose")
}

cmd := exec.CommandContext(ctx, binaryFullPath, args...)

stdout, err := cmd.StdoutPipe()
if err != nil {
return err
}
stderr, err := cmd.StderrPipe()
if err != nil {
return err
}
if err := cmd.Start(); err != nil {
fmt.Fprintf(os.Stderr, "Error launching test %s: %v\n", binaryRelPath, err)
return err
}
doneout := make(chan struct{})
doneerr := make(chan struct{})

go pipe(binaryRelPath, stdout, os.Stdout, doneout)
go pipe(binaryRelPath, stderr, os.Stderr, doneerr)

select {
case <-ctx.Done():
return nil
case <-doneout:
}
select {
case <-ctx.Done():
return nil
case <-doneerr:
}

return cmd.Wait()
}

// Pipe the output of the test, prefixing emitted lines with the given prefix (application name)
func pipe(prefix string, r io.Reader, w io.Writer, done chan<- struct{}) {
reader := bufio.NewReader(r)

for {
line, _, err := reader.ReadLine()

if err == io.EOF {
break
}

fmt.Fprintf(w, "%s %s\n", colors.Yellow.Render(prefix), line)
}

done <- struct{}{}
}
2 changes: 1 addition & 1 deletion pkg/boot/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func catAndRedirect(ctx context.Context, inputs []string, backend be.Backend, ir
// TODO: backend.Wait(ir)? which would be a no-op for local

// If we aren't piped into anything, then copy out the outbox files
if isFinalStep(ir.Context) && !noRedirect {
if redirectTo != "" || isFinalStep(ir.Context) && !noRedirect {
// We try to place the output files in the same
// directory as the respective input files. TODO: this
// may be a fool's errand, e.g. what if a single input
Expand Down
18 changes: 12 additions & 6 deletions pkg/boot/tester.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
)

type Tester struct {
Quiet bool
be.Backend
build.Options
}
Expand Down Expand Up @@ -48,7 +49,10 @@ func (t Tester) prepareInputs(testData hlir.TestData, stageDir string) (inputs [
expectedDir := build.TestDataDirForExpected(stageDir)
for _, test := range testData {
inputs = append(inputs, filepath.Join(inputDir, test.Input))
outputs = append(outputs, filepath.Join(expectedDir, test.Expected))

for _, expected := range test.Expected {
outputs = append(outputs, filepath.Join(expectedDir, expected))
}
}

if t.Options.Verbose() {
Expand All @@ -59,7 +63,7 @@ func (t Tester) prepareInputs(testData hlir.TestData, stageDir string) (inputs [
}

func (t Tester) Run(ctx context.Context, inputs []string, expected []string) error {
fmt.Fprintf(os.Stderr, "Scheduling %s for %s\n", english.Plural(len(inputs), "test", ""), build.Name())
fmt.Fprintf(os.Stderr, "Testing %s\n", english.Plural(len(inputs), "input", ""))

if slices.IndexFunc(inputs, func(input string) bool { return filepath.Ext(input) == ".gz" }) >= 0 {
t.Options.Gunzip = true
Expand All @@ -73,7 +77,9 @@ func (t Tester) Run(ctx context.Context, inputs []string, expected []string) err
defer os.RemoveAll(redirectTo)
}

if err := Up(ctx, t.Backend, UpOptions{Inputs: inputs, BuildOptions: t.Options, RedirectTo: redirectTo}); err != nil {
if runContext, err := Up(ctx, t.Backend, UpOptions{Inputs: inputs, BuildOptions: t.Options, RedirectTo: redirectTo, Watch: !t.Quiet}); err != nil {
return err
} else if err := Down(ctx, runContext.Run.RunName, t.Backend, DownOptions{Namespace: t.Options.Target.Namespace, Verbose: t.Options.Verbose()}); err != nil {
return err
}

Expand Down Expand Up @@ -105,8 +111,8 @@ func (t Tester) validate(inputs []string, expecteds []string, redirectTo string)
}

found := 0
for idx, expected := range expecteds {
expectedFileName := filepath.Base(inputs[idx])
for _, expected := range expecteds {
expectedFileName := filepath.Base(expected)

// TODO O(N^2)
for _, actual := range actuals {
Expand All @@ -128,7 +134,7 @@ func (t Tester) validate(inputs []string, expecteds []string, redirectTo string)
if ok, err := t.equal(matchesWithGunzip, expectedBytes, actualBytes); err != nil {
return err
} else if !ok {
return fmt.Errorf("actual!=expected for %s", filepath.Base(inputs[idx]))
return fmt.Errorf("actual!=expected for %s", expectedFileName)
}
}
}
Expand Down
Loading

0 comments on commit 868449b

Please sign in to comment.