Skip to content

Commit

Permalink
feat: extend build-from-source to support env.yaml, image, command
Browse files Browse the repository at this point in the history
Also:
- fixes pip requirements install to support using the desired version of python
- updates python-language-pdf2parquet to port away from app.yaml (this gives us test coverage for the above)

Signed-off-by: Nick Mitchell <nickm@us.ibm.com>
  • Loading branch information
starpit committed Nov 27, 2024
1 parent a4f471a commit 57027b9
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 47 deletions.
58 changes: 46 additions & 12 deletions pkg/fe/builder/overlay/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,21 @@ import (
"io/fs"
"os"
"path/filepath"
"regexp"
"slices"
"strings"

"gopkg.in/yaml.v3"

"lunchpail.io/pkg/ir/hlir"
)

// Formulate an HLIR for the source in the given `sourcePath` and write it out to the `templatePath`
func copySourceIntoTemplate(appname, sourcePath, templatePath string, opts Options) (appVersion string, err error) {
if opts.Verbose() {
fmt.Fprintln(os.Stderr, "Copying application source into", appdir(templatePath))
}

appVersion, err = addHLIRFromSource(appname, sourcePath, templatePath, opts)
return
}

func addHLIRFromSource(appname, sourcePath, templatePath string, opts Options) (string, error) {
appVersion, app, err := applicationFromSource(appname, sourcePath, templatePath, opts)
if err != nil {
return "", err
Expand All @@ -38,16 +37,22 @@ func addHLIRFromSource(appname, sourcePath, templatePath string, opts Options) (
return appVersion, nil
}

// Formulate an HLIR for the source in the given `sourcePath`
func applicationFromSource(appname, sourcePath, templatePath string, opts Options) (appVersion string, app hlir.Application, err error) {
app = hlir.NewWorkerApplication(appname)
spec := &app.Spec

filepath.WalkDir(sourcePath, func(path string, d fs.DirEntry, err error) error {
maybeImage := ""
maybeCommand := ""

err = filepath.WalkDir(sourcePath, func(path string, d fs.DirEntry, err error) error {
switch {
case d.IsDir():
// skip directories
case filepath.Ext(path) == ".html" || filepath.Ext(path) == ".gz" || filepath.Ext(path) == ".zip" || filepath.Ext(path) == ".parquet":
// skip data files
case filepath.Ext(path) == ".pdf" || filepath.Ext(path) == ".html" || filepath.Ext(path) == ".gz" || filepath.Ext(path) == ".zip" || filepath.Ext(path) == ".parquet":
// skip data files; TODO add support for .ignore
case path[len(path)-1] == '~':
// skip emacs temporary files
default:
b, err := os.ReadFile(path)
if err != nil {
Expand All @@ -61,22 +66,51 @@ func applicationFromSource(appname, sourcePath, templatePath string, opts Option
}
case "requirements.txt":
spec.Needs = append(spec.Needs, hlir.Needs{Name: "python", Version: "latest", Requirements: string(b)})
case "memory", "memory.txt":
spec.MinMemory = string(b)
case "image":
spec.Image = string(b)
case "command":
spec.Command = string(b)
case "env.yaml":
err := yaml.Unmarshal(b, &spec.Env)
if err != nil {
return fmt.Errorf("Error parsing env.yaml: %v", err)
}
default:
spec.Code = append(spec.Code, hlir.Code{Name: d.Name(), Source: string(b)})
}

switch d.Name() {
case "main.sh":
spec.Command = "./main.sh"
spec.Image = "docker.io/alpine:3"
maybeCommand = "./main.sh"
maybeImage = "docker.io/alpine:3"
case "main.py":
spec.Command = "python3 main.py"
spec.Image = "docker.io/python:3.12"
maybeCommand = "python3 main.py"
maybeImage = "docker.io/python:3.12"
}
}

return nil
})

if spec.Command == "" && maybeCommand != "" {
spec.Command = maybeCommand
}
if spec.Image == "" && maybeImage != "" {
spec.Image = maybeImage
}

pyNeedsIdx := slices.IndexFunc(spec.Needs, func(n hlir.Needs) bool { return n.Name == "python" && n.Version == "latest" })
if pyNeedsIdx >= 0 && strings.HasPrefix(spec.Command, "python3") {
version := regexp.MustCompile("\\d.\\d+").FindString(spec.Command)
if version != "" {
if opts.Verbose() {
fmt.Fprintln(os.Stderr, "Using Python version", version)
}
spec.Needs[pyNeedsIdx].Version = version
}
}

return
}
10 changes: 7 additions & 3 deletions pkg/runtime/needs/install_requirements.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"syscall"
)

func requirementsInstall(ctx context.Context, requirements string, verbose bool) (string, error) {
func requirementsInstall(ctx context.Context, version, requirements string, verbose bool) (string, error) {
var cmd *exec.Cmd
var verboseFlag string
var reqmtsByte []byte
Expand Down Expand Up @@ -84,10 +84,14 @@ func requirementsInstall(ctx context.Context, requirements string, verbose bool)
quiet = ""
}

if version == "" || version == "latest" {
version = "3"
}

cmds := fmt.Sprintf(`python3 -m venv %s
source %s/bin/activate
if ! which pip3; then python3 -m pip install pip %s; fi
pip3 install %s %s -r %s %s 1>&2`, venvPath, venvPath, verboseFlag, nocache, quiet, reqmtsFile.Name(), verboseFlag)
if ! which pip%s; then python%s -m pip install pip %s; fi
pip%s install %s %s -r %s %s 1>&2`, venvPath, venvPath, version, version, verboseFlag, version, nocache, quiet, reqmtsFile.Name(), verboseFlag)

cmd = exec.CommandContext(ctx, "/bin/bash", "-c", cmds)
cmd.Dir = filepath.Dir(venvPath)
Expand Down
4 changes: 2 additions & 2 deletions pkg/runtime/needs/python.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"os/exec"
)

func InstallPython(ctx context.Context, version string, requirements string, opts Options) (string, error) {
func InstallPython(ctx context.Context, version, requirements string, opts Options) (string, error) {
if _, err := exec.LookPath("python3"); err != nil {
if errors.Is(err, exec.ErrNotFound) {
if _, err := installPython(ctx, version, opts.Verbose); err != nil {
Expand All @@ -17,7 +17,7 @@ func InstallPython(ctx context.Context, version string, requirements string, opt
}
if requirements != "" {
//returns bin path where installed
return requirementsInstall(ctx, requirements, opts.Verbose)
return requirementsInstall(ctx, version, requirements, opts.Verbose)
}
return "", nil
}
22 changes: 0 additions & 22 deletions tests/tests/python-language-pdf2parquet/pail/app.yaml

This file was deleted.

1 change: 1 addition & 0 deletions tests/tests/python-language-pdf2parquet/pail/command
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python3.11 main.py

This file was deleted.

1 change: 1 addition & 0 deletions tests/tests/python-language-pdf2parquet/pail/env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
USE_NNPACK: '0' # otherwise torch fails with "Could not initialize NNPACK! Reason: Unsupported hardware" on ARM (lack of AVX instructions)
1 change: 1 addition & 0 deletions tests/tests/python-language-pdf2parquet/pail/image
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker.io/python:3.12
7 changes: 7 additions & 0 deletions tests/tests/python-language-pdf2parquet/pail/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
docling-core==2.3.0
docling-ibm-models==2.0.3
deepsearch-glm==0.26.1
docling==2.3.1
filetype >=1.2.0, <2.0.0

pyarrow<18
3 changes: 3 additions & 0 deletions tests/tests/python-language-pdf2parquet/settings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ api=workqueue
expected=("Done with nrows=1 nsuccess=1 nfail=0 nskip=0" "Done with nrows=2 nsuccess=2 nfail=0 nskip=0")
NUM_DESIRED_OUTPUTS=0

# the default is --yaml. we don't want that
source_from=" "

up_args='"$TEST_PATH"/pail/test-data/input/redp5110-ch1.pdf "$TEST_PATH"/pail/test-data/input/archive1.zip'

0 comments on commit 57027b9

Please sign in to comment.