Skip to content

Commit

Permalink
more fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
jmagoon committed Jan 24, 2025
1 parent dfdd4e4 commit 1cbed14
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 123 deletions.
6 changes: 3 additions & 3 deletions anvil/cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ Description:
- docker: Containerizes the execution using Docker
By default, Katana is immutable and will copy your pipeline to a history folder and
execute in that folder. By default, only output files will be preserved per run. You can
preserve full history via the -history flag.
execute in that folder. You can preserve full history via the -history flag.
Examples:
# Run a pipeline using system dependencies
Expand Down Expand Up @@ -58,7 +57,7 @@ func main() {

// Define flags
flag.StringVar(&opts.mode, "mode", "default", "Execution mode: default, uv, or docker")
flag.StringVar(&opts.mode, "history", "false", "Preserve full history of runs")
flag.BoolVar(&opts.history, "history", false, "Preserve full history of runs")
flag.BoolVar(&opts.verbose, "verbose", false, "Enable verbose logging")
flag.BoolVar(&opts.noCache, "no-cache", false, "Disable caching (applies to uv and docker modes)")
flag.StringVar(&opts.workingDir, "work-dir", "", "Custom working directory (default: /tmp/katana)")
Expand Down Expand Up @@ -108,6 +107,7 @@ func main() {
// Set up execution options
execOpts := katana.Options{
Mode: opts.mode,
History: opts.history,
Args: argsKeyVal,
PipelinePath: pipelinePath,
Verbose: opts.verbose,
Expand Down
2 changes: 0 additions & 2 deletions anvil/katana/katanaExec.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,6 @@ func (t *Task) Execute(args Dict, executionDir string, opts Options) (Dict, erro
for _, v := range data {
outputs[key] = v
}

os.Remove(filepath.Join(TMPPATH, value) + ".txt")
}

return outputs, nil
Expand Down
266 changes: 148 additions & 118 deletions anvil/katana/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,141 +121,171 @@ func copyFile(sourcePath, targetPath string) error {
return os.Chmod(targetPath, sourceInfo.Mode())
}


type fileInfo struct {
exists bool
hash string
exists bool
hash string
}

// getDirectoryContents returns a map of files to their content hashes
func getDirectoryContents(dir string) (map[string]fileInfo, error) {
contents := make(map[string]fileInfo)
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

relPath, err := filepath.Rel(dir, path)
if err != nil {
return err
}

// Skip the root directory itself
if relPath == "." {
return nil
}

// For directories, just mark that they exist
if info.IsDir() {
contents[relPath] = fileInfo{exists: true}
return nil
}

// For files, compute hash
hash, err := getFileHash(path)
if err != nil {
return err
}

contents[relPath] = fileInfo{exists: true, hash: hash}
return nil
})
return contents, err
contents := make(map[string]fileInfo)
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

relPath, err := filepath.Rel(dir, path)
if err != nil {
return err
}

// Skip the root directory itself
if relPath == "." {
return nil
}

// For directories, just mark that they exist
if info.IsDir() {
contents[relPath] = fileInfo{exists: true}
return nil
}

// For files, compute hash
hash, err := getFileHash(path)
if err != nil {
return err
}

contents[relPath] = fileInfo{exists: true, hash: hash}
return nil
})
return contents, err
}

// getFileHash returns a hash of the file contents
func getFileHash(path string) (string, error) {
data, err := os.ReadFile(path)
if err != nil {
return "", err
}
hash := sha256.Sum256(data)
return fmt.Sprintf("%x", hash), nil
data, err := os.ReadFile(path)
if err != nil {
return "", err
}
hash := sha256.Sum256(data)
return fmt.Sprintf("%x", hash), nil
}

// shouldKeepFile determines if a file should be kept based on original and current state
func shouldKeepFile(path string, originalFiles, currentFiles map[string]fileInfo) bool {
orig, wasOriginal := originalFiles[path]
curr, stillExists := currentFiles[path]
orig, wasOriginal := originalFiles[path]
curr, stillExists := currentFiles[path]

// Keep if it's new
if !wasOriginal && stillExists {
return true
}

// Keep if it's new
if !wasOriginal && stillExists {
return true
}
// Keep if it changed
if wasOriginal && stillExists && orig.hash != curr.hash {
return true
}

// Keep if it changed
if wasOriginal && stillExists && orig.hash != curr.hash {
return true
}
return false
}

return false
// patternsToClean defines paths that should always be removed
var patternsToClean = []string{
"__pycache__",
"*.pyc",
".pytest_cache",
"*.pyo",
// Add any other patterns here
}

// cleanup removes unchanged files while preserving new and modified files
// cleanup removes unchanged files and specified patterns while preserving new and modified files
func cleanup(executionDir string, originalFiles map[string]fileInfo) error {
// Get current state
currentFiles, err := getDirectoryContents(executionDir)
if err != nil {
return fmt.Errorf("failed to get current directory contents: %w", err)
}

// Build list of files to remove
var toRemove []string
for path, origInfo := range originalFiles {
fullPath := filepath.Join(executionDir, path)

// Skip directories for now
if origInfo.hash == "" {
continue
}

// Remove only if file is unchanged
if !shouldKeepFile(path, originalFiles, currentFiles) {
toRemove = append(toRemove, fullPath)
}
}

// Sort paths by length in reverse order to handle nested files first
sort.Slice(toRemove, func(i, j int) bool {
return len(toRemove[i]) > len(toRemove[j])
})

// Remove files
for _, path := range toRemove {
if err := os.Remove(path); err != nil {
return fmt.Errorf("failed to remove %s: %w", path, err)
}
}

// Clean up empty directories
err = filepath.Walk(executionDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

if !info.IsDir() {
return nil
}

// Skip the root directory
if path == executionDir {
return nil
}

// Check if directory is empty
entries, err := os.ReadDir(path)
if err != nil {
return err
}

if len(entries) == 0 {
if err := os.Remove(path); err != nil {
return fmt.Errorf("failed to remove empty directory %s: %w", path, err)
}
}

return nil
})

return err
// Get current state
currentFiles, err := getDirectoryContents(executionDir)
if err != nil {
return fmt.Errorf("failed to get current directory contents: %w", err)
}

var toRemove []string
err = filepath.Walk(executionDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

relPath, err := filepath.Rel(executionDir, path)
if err != nil {
return err
}

// Skip root directory
if relPath == "." {
return nil
}

// Check if path matches any patterns to clean
for _, pattern := range patternsToClean {
matched, err := filepath.Match(pattern, info.Name())
if err != nil {
return err
}
if matched || info.Name() == pattern {
if info.IsDir() {
toRemove = append(toRemove, path)
return filepath.SkipDir
}
toRemove = append(toRemove, path)
return nil
}
}

// If not a pattern to clean, check if it's an unchanged original file
if !info.IsDir() && !shouldKeepFile(relPath, originalFiles, currentFiles) {
toRemove = append(toRemove, path)
}

return nil
})

if err != nil {
return fmt.Errorf("failed to walk directory: %w", err)
}

// Sort paths by length in reverse order to handle nested paths first
sort.Slice(toRemove, func(i, j int) bool {
return len(toRemove[i]) > len(toRemove[j])
})

// Remove files and directories
for _, path := range toRemove {
if err := os.RemoveAll(path); err != nil {
return fmt.Errorf("failed to remove %s: %w", path, err)
}
}

// Clean up any remaining empty directories
err = filepath.Walk(executionDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

if !info.IsDir() || path == executionDir {
return nil
}

entries, err := os.ReadDir(path)
if err != nil {
return err
}

if len(entries) == 0 {
if err := os.Remove(path); err != nil {
return fmt.Errorf("failed to remove empty directory %s: %w", path, err)
}
}

return nil
})

return err
}

0 comments on commit 1cbed14

Please sign in to comment.