Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: extend build-from-source to support env.yaml, image, command #581

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 65 additions & 19 deletions pkg/fe/builder/overlay/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,21 @@ import (
"io/fs"
"os"
"path/filepath"
"regexp"
"slices"
"strings"

"gopkg.in/yaml.v3"

"lunchpail.io/pkg/ir/hlir"
)

// Formulate an HLIR for the source in the given `sourcePath` and write it out to the `templatePath`
func copySourceIntoTemplate(appname, sourcePath, templatePath string, opts Options) (appVersion string, err error) {
if opts.Verbose() {
fmt.Fprintln(os.Stderr, "Copying application source into", appdir(templatePath))
}

appVersion, err = addHLIRFromSource(appname, sourcePath, templatePath, opts)
return
}

func addHLIRFromSource(appname, sourcePath, templatePath string, opts Options) (string, error) {
appVersion, app, err := applicationFromSource(appname, sourcePath, templatePath, opts)
if err != nil {
return "", err
Expand All @@ -38,45 +37,92 @@ func addHLIRFromSource(appname, sourcePath, templatePath string, opts Options) (
return appVersion, nil
}

// Formulate an HLIR for the source in the given `sourcePath`
func applicationFromSource(appname, sourcePath, templatePath string, opts Options) (appVersion string, app hlir.Application, err error) {
app = hlir.NewWorkerApplication(appname)
spec := &app.Spec

filepath.WalkDir(sourcePath, func(path string, d fs.DirEntry, err error) error {
maybeImage := ""
maybeCommand := ""

// While walking the directory structure, these are the noteworthy subdirectories
srcPrefix := filepath.Join(sourcePath, "src")

err = filepath.WalkDir(sourcePath, func(path string, d fs.DirEntry, err error) error {
switch {
case d.IsDir():
// skip directories
case filepath.Ext(path) == ".html" || filepath.Ext(path) == ".gz" || filepath.Ext(path) == ".zip" || filepath.Ext(path) == ".parquet":
// skip data files
// skip directories, except to remember which "mode" we are in
case filepath.Ext(path) == ".pdf" || filepath.Ext(path) == ".html" || filepath.Ext(path) == ".gz" || filepath.Ext(path) == ".zip" || filepath.Ext(path) == ".parquet":
// skip data files; TODO add support for .ignore
case path[len(path)-1] == '~':
// skip emacs temporary files
default:
b, err := os.ReadFile(path)
if err != nil {
return err
}

if strings.HasPrefix(path, srcPrefix) {
// Handle src/ artifacts
spec.Code = append(spec.Code, hlir.Code{Name: d.Name(), Source: string(b)})

switch d.Name() {
case "main.sh":
maybeCommand = "./main.sh"
maybeImage = "docker.io/alpine:3"
case "main.py":
maybeCommand = "python3 main.py"
maybeImage = "docker.io/python:3.12"
}
return nil
}

// Handle non-src artifacts
switch d.Name() {
case "version", "version.txt":
if appVersion, err = handleVersionFile(path); err != nil {
return err
}
case "requirements.txt":
spec.Needs = append(spec.Needs, hlir.Needs{Name: "python", Version: "latest", Requirements: string(b)})
case "memory", "memory.txt":
spec.MinMemory = string(b)
case "image":
spec.Image = string(b)
case "command":
spec.Command = string(b)
case "env.yaml":
err := yaml.Unmarshal(b, &spec.Env)
if err != nil {
return fmt.Errorf("Error parsing env.yaml: %v", err)
}
default:
spec.Code = append(spec.Code, hlir.Code{Name: d.Name(), Source: string(b)})
}

switch d.Name() {
case "main.sh":
spec.Command = "./main.sh"
spec.Image = "docker.io/alpine:3"
case "main.py":
spec.Command = "python3 main.py"
spec.Image = "docker.io/python:3.12"
if opts.Verbose() {
fmt.Fprintln(os.Stderr, "Skipping application artifact", strings.Replace(path, sourcePath, "", 1))
}
}
}

return nil
})

if spec.Command == "" && maybeCommand != "" {
spec.Command = maybeCommand
}
if spec.Image == "" && maybeImage != "" {
spec.Image = maybeImage
}

pyNeedsIdx := slices.IndexFunc(spec.Needs, func(n hlir.Needs) bool { return n.Name == "python" && n.Version == "latest" })
if pyNeedsIdx >= 0 && strings.HasPrefix(spec.Command, "python3") {
version := regexp.MustCompile("\\d.\\d+").FindString(spec.Command)
if version != "" {
if opts.Verbose() {
fmt.Fprintln(os.Stderr, "Using Python version", version)
}
spec.Needs[pyNeedsIdx].Version = version
}
}

return
}
10 changes: 7 additions & 3 deletions pkg/runtime/needs/install_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,19 @@ func installPython(ctx context.Context, version string, verbose bool) (string, e
return "", err
}

return "", brewInstall(ctx, "python3", version, verbose) //Todo: versions other than latest
python := "python@" + version
if version == "" || version == "latest" {
python = "python3"
}
return "", brewInstall(ctx, python, version, verbose) //Todo: versions other than latest
}

func brewInstall(ctx context.Context, pkg string, version string, verbose bool) error {
var cmd *exec.Cmd
if verbose {
fmt.Fprintf(os.Stdout, "Installing %s release of %s \n", version, pkg)
fmt.Fprintf(os.Stderr, "Installing %s release of %s \n", version, pkg)
cmd = exec.CommandContext(ctx, "brew", "install", "--verbose", "--debug", pkg)
cmd.Stdout = os.Stdout
cmd.Stdout = os.Stderr // Stderr so as not to collide with `lunchpail needs` stdout
} else {
cmd = exec.Command("brew", "install", pkg)
}
Expand Down
61 changes: 28 additions & 33 deletions pkg/runtime/needs/install_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package needs

import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
Expand Down Expand Up @@ -30,7 +31,7 @@ func installMinio(ctx context.Context, version string, verbose bool) (string, er
cmd := exec.CommandContext(ctx, "wget", "https://dl.min.io/server/minio/release/linux-amd64/minio")
cmd.Dir = dir
if verbose {
cmd.Stdout = os.Stdout
cmd.Stdout = os.Stderr
}
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
Expand All @@ -45,47 +46,41 @@ func installMinio(ctx context.Context, version string, verbose bool) (string, er
}

func installPython(ctx context.Context, version string, verbose bool) (string, error) {
/*
if verbose {
fmt.Fprintf(os.Stdout, "Installing %s release of python \n", version)
}
if version == "" || version == "latest" {
version = "3"
}

dir, err := bindir()
if err != nil {
return err
}
if verbose {
fmt.Fprintf(os.Stderr, "Checking for existence of python%s\n", version)
}

if err := os.MkdirAll(dir, 0755); err != nil {
return err
}
if _, err := exec.LookPath("python" + version); err != nil {
fmt.Fprintf(os.Stderr, "Installing python%s\n", version)

//Todo: versions other than latest
cmd := exec.Command("wget", "https://www.python.org/ftp/python/3.12.7/Python-3.12.7.tgz")
cmd.Dir = dir
if verbose {
cmd.Stdout = os.Stdout
}
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return err
}
var cmdline string
sudo := "sudo"
if _, err := exec.LookPath("sudo"); err != nil {
sudo = ""
}
if _, err := exec.LookPath("apt"); err == nil {
cmdline = fmt.Sprintf("%s add-apt-repository -y ppa:deadsnakes/ppa && %s apt update && %s apt install -y python%s python%s-venv python%s-distutils && curl -sS https://bootstrap.pypa.io/get-pip.py | python%s && which python%s", sudo, sudo, sudo, version, version, version, version, version)
}

cmd = exec.Command("tar", "xf", "Python-3.12.7.tgz")
cmd.Dir = dir
if cmdline != "" {
if verbose {
cmd.Stdout = os.Stdout
fmt.Fprintf(os.Stderr, "Installing python%s via command line %s\n", version, cmdline)
}
cmd := exec.CommandContext(ctx, "/bin/sh", "-c", cmdline)
cmd.Stdout = os.Stderr // Stderr so as not to collide with `lunchpail needs` stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return err
return "", err
}

if err := setenv(dir); err != nil { //setting $PATH
return err
}

os.Chmod(filepath.Join(dir, "python"), 0755)
*/
fmt.Fprintf(os.Stderr, "Successfully installed python%s\n", version)
} else {
return "", fmt.Errorf("Unable to install required python version %s", version)
}
}

return "", nil
}
Expand Down
20 changes: 10 additions & 10 deletions pkg/runtime/needs/install_requirements.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ import (
"syscall"
)

func requirementsInstall(ctx context.Context, requirements string, verbose bool) (string, error) {
var cmd *exec.Cmd
func requirementsInstall(ctx context.Context, version, requirements string, verbose bool) (string, error) {
var verboseFlag string
var reqmtsByte []byte
var reqmtsFile *os.File
Expand Down Expand Up @@ -84,17 +83,18 @@ func requirementsInstall(ctx context.Context, requirements string, verbose bool)
quiet = ""
}

cmds := fmt.Sprintf(`python3 -m venv %s
if version == "" || version == "latest" {
version = "3"
}

cmdline := fmt.Sprintf(`python%s -m venv %s
source %s/bin/activate
if ! which pip3; then python3 -m pip install pip %s; fi
pip3 install %s %s -r %s %s 1>&2`, venvPath, venvPath, verboseFlag, nocache, quiet, reqmtsFile.Name(), verboseFlag)
if ! which pip%s; then python%s -m pip install pip %s; fi
pip%s install %s %s -r %s %s 1>&2`, version, venvPath, venvPath, version, version, verboseFlag, version, nocache, quiet, reqmtsFile.Name(), verboseFlag)

cmd = exec.CommandContext(ctx, "/bin/bash", "-c", cmds)
cmd := exec.CommandContext(ctx, "/bin/bash", "-c", cmdline)
cmd.Dir = filepath.Dir(venvPath)
if verbose {
// Stderr so as not to collide with lunchpail pipeline stdout
cmd.Stdout = os.Stderr
}
cmd.Stdout = os.Stderr // Stderr so as not to collide with `lunchpail needs` stdout
cmd.Stderr = os.Stderr

if err := cmd.Run(); err != nil {
Expand Down
11 changes: 7 additions & 4 deletions pkg/runtime/needs/python.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,21 @@ import (
"os/exec"
)

func InstallPython(ctx context.Context, version string, requirements string, opts Options) (string, error) {
if _, err := exec.LookPath("python3"); err != nil {
func InstallPython(ctx context.Context, version, requirements string, opts Options) (string, error) {
if version == "" || version == "latest" {
version = "3"
}

if _, err := exec.LookPath("python" + version); err != nil {
if errors.Is(err, exec.ErrNotFound) {
if _, err := installPython(ctx, version, opts.Verbose); err != nil {
return "", err
}
}
return "", err
}
if requirements != "" {
//returns bin path where installed
return requirementsInstall(ctx, requirements, opts.Verbose)
return requirementsInstall(ctx, version, requirements, opts.Verbose)
}
return "", nil
}
22 changes: 0 additions & 22 deletions tests/tests/python-language-pdf2parquet/pail/app.yaml

This file was deleted.

1 change: 1 addition & 0 deletions tests/tests/python-language-pdf2parquet/pail/command
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python3.11 main.py

This file was deleted.

1 change: 1 addition & 0 deletions tests/tests/python-language-pdf2parquet/pail/env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
USE_NNPACK: '0' # otherwise torch fails with "Could not initialize NNPACK! Reason: Unsupported hardware" on ARM (lack of AVX instructions)
1 change: 1 addition & 0 deletions tests/tests/python-language-pdf2parquet/pail/image
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker.io/python:3.11
7 changes: 7 additions & 0 deletions tests/tests/python-language-pdf2parquet/pail/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
docling-core==2.3.0
docling-ibm-models==2.0.3
deepsearch-glm==0.26.1
docling==2.3.1
filetype >=1.2.0, <2.0.0

pyarrow<18
3 changes: 3 additions & 0 deletions tests/tests/python-language-pdf2parquet/settings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ api=workqueue
expected=("Done with nrows=1 nsuccess=1 nfail=0 nskip=0" "Done with nrows=2 nsuccess=2 nfail=0 nskip=0")
NUM_DESIRED_OUTPUTS=0

# the default is --yaml. we don't want that
source_from=" "

up_args='"$TEST_PATH"/pail/test-data/input/redp5110-ch1.pdf "$TEST_PATH"/pail/test-data/input/archive1.zip'
2 changes: 1 addition & 1 deletion tests/tests/test7/settings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ api=workqueue
# task.1,task.3,task.5 <-- 3 tasks per iter

expected=("Processing 6 task.1.txt" "Processing 6 task.3.txt" "Processing 6 task.5.txt" "Processing 6 task.2.txt" "Processing 6 task.4.txt" "Processing 6 task.6.txt")
NUM_DESIRED_OUTPUTS=6
NUM_DESIRED_OUTPUTS=10

inputapp='$testapp sweep 1 10 1 --interval 1'
File renamed without changes.