diff --git a/.gitignore b/.gitignore index daf913b1b3..fd433aa533 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ _testmain.go *.exe *.test *.prof +/s2/cmd/_sfx/sfx-exe diff --git a/.goreleaser.yml b/.goreleaser.yml index 03a99033ff..c3c984bfbd 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -2,6 +2,8 @@ # Make sure to check the documentation at http://goreleaser.com before: hooks: + - ./gen.sh + builds: - id: "s2c" @@ -51,6 +53,30 @@ builds: - mips64le goarm: - 7 + - + id: "s2sx" + binary: s2sx + main: ./s2/cmd/_s2sx/main.go + env: + - CGO_ENABLED=0 + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm + - arm64 + - ppc64 + - ppc64le + - mips64 + - mips64le + goarm: + - 7 archives: - @@ -69,6 +95,7 @@ archives: - goos: windows format: zip files: + - unpack/* - s2/LICENSE - s2/README.md checksum: diff --git a/.travis.yml b/.travis.yml index 5e4ce96ab5..dead4ad0c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,7 +52,7 @@ deploy: script: curl -sL https://git.io/goreleaser | VERSION=v0.157.0 bash || true on: tags: true - condition: $TRAVIS_OS_NAME = linux + condition: $TRAVIS_OS_NAME = linux AND $TRAVIS_CPU_ARCH = amd64 go: 1.16.x branches: only: diff --git a/gen.sh b/gen.sh new file mode 100755 index 0000000000..aff942205f --- /dev/null +++ b/gen.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +cd s2/cmd/_s2sx/ || exit 1 +go generate . diff --git a/s2/README.md b/s2/README.md index 601bd397a4..a65a109ee2 100644 --- a/s2/README.md +++ b/s2/README.md @@ -112,12 +112,20 @@ For big skips the decompressor is able to skip blocks without decompressing them ## Single Blocks Similar to Snappy S2 offers single block compression. -Blocks do not offer the same flexibility and safety as streams, but may be preferable for very small payloads, less than 100K. +Blocks do not offer the same flexibility and safety as streams, +but may be preferable for very small payloads, less than 100K. -Using a simple `dst := s2.Encode(nil, src)` will compress `src` and return the compressed result. It is possible to provide a destination buffer. If the buffer has a capacity of `s2.MaxEncodedLen(len(src))` it will be used. If not a new will be allocated. Alternatively `EncodeBetter` can also be used for better, but slightly slower compression. +Using a simple `dst := s2.Encode(nil, src)` will compress `src` and return the compressed result. +It is possible to provide a destination buffer. +If the buffer has a capacity of `s2.MaxEncodedLen(len(src))` it will be used. +If not a new will be allocated. -Similarly to decompress a block you can use `dst, err := s2.Decode(nil, src)`. Again an optional destination buffer can be supplied. -The `s2.DecodedLen(src)` can be used to get the minimum capacity needed. If that is not satisfied a new buffer will be allocated. +Alternatively `EncodeBetter`/`EncodeBest` can also be used for better, but slightly slower compression. + +Similarly to decompress a block you can use `dst, err := s2.Decode(nil, src)`. +Again an optional destination buffer can be supplied. +The `s2.DecodedLen(src)` can be used to get the minimum capacity needed. +If that is not satisfied a new buffer will be allocated. Block function always operate on a single goroutine since it should only be used for small payloads. @@ -151,23 +159,28 @@ Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/ Options: -bench int - Run benchmark n times. No output will be written + Run benchmark n times. No output will be written -blocksize string - Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M") - -c Write all output to stdout. Multiple input files will be concatenated + Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB (default "4M") + -c Write all output to stdout. Multiple input files will be concatenated -cpu int - Compress using this amount of threads (default CPU_THREADS]) + Compress using this amount of threads (default 32) -faster - Compress faster, but with a minor compression loss + Compress faster, but with a minor compression loss -help - Display help + Display help -pad string - Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1") - -q Don't write any output to terminal, except errors + Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc (default "1") + -q Don't write any output to terminal, except errors -rm - Delete source file(s) after successful compression + Delete source file(s) after successful compression -safe - Do not overwrite output files + Do not overwrite output files + -slower + Compress more, but a lot slower + -verify + Verify written files + ``` ## s2d @@ -184,17 +197,73 @@ Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/ Options: -bench int - Run benchmark n times. No output will be written - -c Write all output to stdout. Multiple input files will be concatenated + Run benchmark n times. No output will be written + -c Write all output to stdout. Multiple input files will be concatenated -help - Display help - -q Don't write any output to terminal, except errors + Display help + -q Don't write any output to terminal, except errors -rm - Delete source file(s) after successful decompression + Delete source file(s) after successful decompression -safe - Do not overwrite output files + Do not overwrite output files + -verify + Verify files, but do not write output +``` + +## s2sx: self-extracting archives + +s2sx allows creating self-extracting archives with no dependencies. + +By default, executables are created for the same platforms as the host os, +but this can be overridden with `-os` parameter. + +Extracted files have 0666 permissions, except when untar option used. ``` +Usage: s2sx [options] file1 file2 + +Compresses all files supplied as input separately. +If files have '.s2' extension they are assumed to be compressed already. +Output files are written as 'filename.s2sfx' and with '.exe' for windows targets. +By default output files will be overwritten. + +Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt +Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt + +Options: + -arch string + Destination architecture (default "amd64") + -c Write all output to stdout. Multiple input files will be concatenated + -cpu int + Compress using this amount of threads (default 32) + -help + Display help + -os string + Destination operating system (default "windows") + -q Don't write any output to terminal, except errors + -rm + Delete source file(s) after successful compression + -safe + Do not overwrite output files + -untar + Untar on destination + +Available platforms are: + + * darwin-amd64 + * linux-amd64 + * windows-amd64 +``` + +### Self-extracting TAR files + +If you wrap a TAR file you can specify `-untar` to make it untar on the destination host. + +Files are extracted to the current folder with the path specified in the tar file. + +Note that tar files are not validated before they are wrapped. + +For security reasons files that move below the root folder are not allowed. # Performance diff --git a/s2/cmd/_s2sx/_unpack/main.go b/s2/cmd/_s2sx/_unpack/main.go new file mode 100644 index 0000000000..1bb36bbf7c --- /dev/null +++ b/s2/cmd/_s2sx/_unpack/main.go @@ -0,0 +1,296 @@ +package main + +import ( + "archive/tar" + "debug/elf" + "debug/macho" + "debug/pe" + "errors" + "fmt" + "io" + "os" + "path" + "path/filepath" + "strings" + + "github.com/klauspost/compress/s2" +) + +const ( + opUnpack = iota + 1 + opUnTar +) + +func main() { + me, err := os.Executable() + exitErr(err) + f, err := os.Open(me) + exitErr(err) + defer f.Close() + stat, err := f.Stat() + exitErr(err) + rd, err := newReader(f, stat.Size()) + exitErr(err) + var tmp [1]byte + _, err = io.ReadFull(rd, tmp[:]) + exitErr(err) + dec := s2.NewReader(rd) + switch tmp[0] { + case opUnpack: + outname := me + "-extracted" + if idx := strings.Index(me, ".s2sfx"); idx > 0 { + // Trim from '.s2sfx' + outname = me[:idx] + } + fmt.Printf("Extracting to %q...", outname) + out, err := os.Create(outname) + exitErr(err) + _, err = io.Copy(out, dec) + exitErr(err) + + case opUnTar: + dir, err := os.Getwd() + if err != nil { + dir = filepath.Dir(me) + } + fmt.Printf("Extracting TAR file to %s...\n", dir) + exitErr(untar(dir, dec)) + default: + exitErr(fmt.Errorf("unknown operation: %d", tmp[0])) + } + fmt.Println("\nDone.") +} + +func exitErr(err error) { + if err != nil { + fmt.Fprintln(os.Stderr, "\nERROR:", err.Error()) + os.Exit(2) + } +} + +func newReader(rda io.ReaderAt, size int64) (io.Reader, error) { + handlers := []func(io.ReaderAt, int64) (io.Reader, error){ + exeReaderMacho, + exeReaderElf, + exeReaderPe, + } + + for _, handler := range handlers { + zfile, err := handler(rda, size) + if err == nil { + return zfile, nil + } + } + return nil, errors.New("no archive data found") +} + +// zipExeReaderPe treats the file as a Portable Executable binary +func exeReaderPe(rda io.ReaderAt, size int64) (io.Reader, error) { + file, err := pe.NewFile(rda) + if err != nil { + return nil, err + } + + var max int64 + for _, sec := range file.Sections { + end := int64(sec.Offset + sec.Size) + if end > max { + max = end + } + } + + if size == max { + return nil, errors.New("data not found") + } + return io.NewSectionReader(rda, max, size-max), nil +} + +// zipExeReaderElf treats the file as a ELF binary +func exeReaderElf(rda io.ReaderAt, size int64) (io.Reader, error) { + file, err := elf.NewFile(rda) + if err != nil { + return nil, err + } + + var max int64 + for _, sect := range file.Sections { + if sect.Type == elf.SHT_NOBITS { + continue + } + + // Move to end of file pointer + end := int64(sect.Offset + sect.Size) + if end > max { + max = end + } + } + + if size == max { + return nil, errors.New("data not found") + } + return io.NewSectionReader(rda, max, size-max), nil +} + +// zipExeReaderMacho treats the file as a Mach-O binary +func exeReaderMacho(rda io.ReaderAt, size int64) (io.Reader, error) { + file, err := macho.NewFile(rda) + if err != nil { + return nil, err + } + + var max int64 + for _, load := range file.Loads { + seg, ok := load.(*macho.Segment) + if ok { + // Move to end of file pointer + end := int64(seg.Offset + seg.Filesz) + if end > max { + max = end + } + } + } + + // No zip file within binary, try appended to end + if size == max { + return nil, errors.New("data not found") + } + return io.NewSectionReader(rda, max, size-max), nil +} + +// untar takes a destination path and a reader; a tar reader loops over the tarfile +// creating the file structure at 'dst' along the way, and writing any files +func untar(dst string, r io.Reader) error { + tr := tar.NewReader(r) + + for { + header, err := tr.Next() + + switch { + + // if no more files are found return + case err == io.EOF: + return nil + + // return any other error + case err != nil: + return err + + // if the header is nil, just skip it (not sure how this happens) + case header == nil: + continue + } + + // the target location where the dir/file should be created + if err := checkPath(dst, header.Name); err != nil { + return err + } + target := filepath.Join(dst, header.Name) + + // check the file type + switch header.Typeflag { + + // if its a dir and it doesn't exist create it + case tar.TypeDir: + fmt.Println(target) + if _, err := os.Stat(target); err != nil { + if err := os.MkdirAll(target, 0755); err != nil { + return err + } + } + + // if it's a file create it + case tar.TypeReg, tar.TypeChar, tar.TypeBlock, tar.TypeFifo, tar.TypeGNUSparse: + target = path.Clean(target) + fmt.Println(target) + + f, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) + if err != nil { + if os.IsNotExist(err) { + if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil { + return err + } + f, err = os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) + } + if err != nil { + return err + } + } + + // copy over contents + if _, err := io.Copy(f, tr); err != nil { + return err + } + + f.Close() + case tar.TypeSymlink: + target = path.Clean(target) + fmt.Println(target) + + err := writeNewSymbolicLink(target, header.Linkname) + if err != nil { + return err + } + case tar.TypeLink: + target = path.Clean(target) + fmt.Println(target) + + err := writeNewHardLink(target, filepath.Join(dst, header.Linkname)) + if err != nil { + return err + } + } + } +} + +// Thanks to https://github.com/mholt/archiver for the following: + +func checkPath(dst, filename string) error { + dest := filepath.Join(dst, filename) + //prevent path traversal attacks + if !strings.HasPrefix(dest, dst) { + return fmt.Errorf("illegal file path: %s", filename) + } + return nil +} + +func writeNewSymbolicLink(fpath string, target string) error { + err := os.MkdirAll(filepath.Dir(fpath), 0755) + if err != nil { + return fmt.Errorf("%s: making directory for file: %v", fpath, err) + } + + _, err = os.Lstat(fpath) + if err == nil { + err = os.Remove(fpath) + if err != nil { + return fmt.Errorf("%s: failed to unlink: %+v", fpath, err) + } + } + + err = os.Symlink(target, fpath) + if err != nil { + return fmt.Errorf("%s: making symbolic link for: %v", fpath, err) + } + return nil +} + +func writeNewHardLink(fpath string, target string) error { + err := os.MkdirAll(filepath.Dir(fpath), 0755) + if err != nil { + return fmt.Errorf("%s: making directory for file: %v", fpath, err) + } + + _, err = os.Lstat(fpath) + if err == nil { + err = os.Remove(fpath) + if err != nil { + return fmt.Errorf("%s: failed to unlink: %+v", fpath, err) + } + } + + err = os.Link(target, fpath) + if err != nil { + return fmt.Errorf("%s: making hard link for: %v", fpath, err) + } + return nil +} diff --git a/s2/cmd/_s2sx/gen_other.go b/s2/cmd/_s2sx/gen_other.go new file mode 100644 index 0000000000..82319bdc24 --- /dev/null +++ b/s2/cmd/_s2sx/gen_other.go @@ -0,0 +1,5 @@ +//+build !windows + +package main + +//go:generate ./gensfx.sh diff --git a/s2/cmd/_s2sx/gen_windows.go b/s2/cmd/_s2sx/gen_windows.go new file mode 100644 index 0000000000..ae833e7843 --- /dev/null +++ b/s2/cmd/_s2sx/gen_windows.go @@ -0,0 +1,3 @@ +package main + +//go:generate .\gensfx.cmd diff --git a/s2/cmd/_s2sx/gensfx.cmd b/s2/cmd/_s2sx/gensfx.cmd new file mode 100644 index 0000000000..23efa499d8 --- /dev/null +++ b/s2/cmd/_s2sx/gensfx.cmd @@ -0,0 +1,7 @@ +SET GOOS=linux +SET GOARCH=amd64 +go build -ldflags="-s -w" -o ./sfx-exe/%GOOS%-%GOARCH% ./_unpack/main.go +SET GOOS=darwin +go build -ldflags="-s -w" -o ./sfx-exe/%GOOS%-%GOARCH% ./_unpack/main.go +SET GOOS=windows +go build -ldflags="-s -w" -o ./sfx-exe/%GOOS%-%GOARCH% ./_unpack/main.go diff --git a/s2/cmd/_s2sx/gensfx.sh b/s2/cmd/_s2sx/gensfx.sh new file mode 100755 index 0000000000..391435f6df --- /dev/null +++ b/s2/cmd/_s2sx/gensfx.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +GOOS=linux GOARCH=amd64 go build -ldflags="-s -w" -o ./sfx-exe/$GOOS-$GOARCH ./_unpack/main.go +GOOS=darwin GOARCH=amd64 go build -ldflags="-s -w" -o ./sfx-exe/$GOOS-$GOARCH ./_unpack/main.go +GOOS=windows GOARCH=amd64 go build -ldflags="-s -w" -o ./sfx-exe/$GOOS-$GOARCH ./_unpack/main.go diff --git a/s2/cmd/_s2sx/go.mod b/s2/cmd/_s2sx/go.mod new file mode 100644 index 0000000000..8344921606 --- /dev/null +++ b/s2/cmd/_s2sx/go.mod @@ -0,0 +1,7 @@ +module github.com/klauspost/compress/s2/cmd/_s2sx + +go 1.16 + +require github.com/klauspost/compress v1.11.9 + +replace github.com/klauspost/compress v1.11.9 => ../../.. diff --git a/s2/cmd/_s2sx/go.sum b/s2/cmd/_s2sx/go.sum new file mode 100644 index 0000000000..e69de29bb2 diff --git a/s2/cmd/_s2sx/main.go b/s2/cmd/_s2sx/main.go new file mode 100644 index 0000000000..eca3d9f579 --- /dev/null +++ b/s2/cmd/_s2sx/main.go @@ -0,0 +1,206 @@ +package main + +import ( + "bufio" + "embed" + "errors" + "flag" + "fmt" + "io" + "os" + "path" + "path/filepath" + "runtime" + "strings" + "sync" + "time" + + "github.com/klauspost/compress/s2" + + "github.com/klauspost/compress/s2/cmd/internal/readahead" +) + +const ( + opUnpack = iota + 1 + opUnTar +) + +var ( + goos = flag.String("os", runtime.GOOS, "Destination operating system") + goarch = flag.String("arch", runtime.GOARCH, "Destination architecture") + cpu = flag.Int("cpu", runtime.GOMAXPROCS(0), "Compress using this amount of threads") + safe = flag.Bool("safe", false, "Do not overwrite output files") + stdout = flag.Bool("c", false, "Write all output to stdout. Multiple input files will be concatenated") + remove = flag.Bool("rm", false, "Delete source file(s) after successful compression") + quiet = flag.Bool("q", false, "Don't write any output to terminal, except errors") + untar = flag.Bool("untar", false, "Untar on destination") + help = flag.Bool("help", false, "Display help") + + version = "(dev)" + date = "(unknown)" +) + +//go:embed sfx-exe +var embeddedFiles embed.FS + +func main() { + flag.Parse() + args := flag.Args() + if len(args) == 0 || *help { + _, _ = fmt.Fprintf(os.Stderr, "s2sx v%v, built at %v.\n\n", version, date) + _, _ = fmt.Fprintf(os.Stderr, "Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.\n"+ + "Copyright (c) 2021 Klaus Post. All rights reserved.\n\n") + _, _ = fmt.Fprintln(os.Stderr, `Usage: s2sx [options] file1 file2 + +Compresses all files supplied as input separately. +If files have '.s2' extension they are assumed to be compressed already. +Output files are written as 'filename.s2sfx' and with '.exe' for windows targets. +By default output files will be overwritten. + +Wildcards are accepted: testdir/*.txt will compress all files in testdir ending with .txt +Directories can be wildcards as well. testdir/*/*.txt will match testdir/subdir/b.txt + +Options:`) + flag.PrintDefaults() + dir, err := embeddedFiles.ReadDir("sfx-exe") + exitErr(err) + _, _ = fmt.Fprintf(os.Stderr, "\nAvailable platforms are:\n\n") + for _, d := range dir { + _, _ = fmt.Fprintf(os.Stderr, " * %s\n", d.Name()) + } + + os.Exit(0) + } + + opts := []s2.WriterOption{s2.WriterBestCompression(), s2.WriterConcurrency(*cpu), s2.WriterBlockSize(4 << 20)} + wr := s2.NewWriter(nil, opts...) + var files []string + for _, pattern := range args { + found, err := filepath.Glob(pattern) + exitErr(err) + if len(found) == 0 { + exitErr(fmt.Errorf("unable to find file %v", pattern)) + } + files = append(files, found...) + } + wantPlat := *goos + "-" + *goarch + exec, err := embeddedFiles.ReadFile(path.Join("sfx-exe", wantPlat)) + if os.IsNotExist(err) { + dir, err := embeddedFiles.ReadDir("sfx-exe") + exitErr(err) + _, _ = fmt.Fprintf(os.Stderr, "os-arch %v not available. Available sfx platforms are:\n\n", wantPlat) + for _, d := range dir { + _, _ = fmt.Fprintf(os.Stderr, "* %s\n", d.Name()) + } + _, _ = fmt.Fprintf(os.Stderr, "\nUse -os and -arch to specify the destination platform.") + os.Exit(1) + } + mode := byte(opUnpack) + if *untar { + mode = opUnTar + } + if *stdout { + // Write exec once to stdout + _, err = os.Stdout.Write(exec) + exitErr(err) + _, err = os.Stdout.Write([]byte{mode}) + exitErr(err) + } + for _, filename := range files { + func() { + var closeOnce sync.Once + isCompressed := strings.HasSuffix(filename, ".s2") + filename = strings.TrimPrefix(filename, ".s2") + dstFilename := fmt.Sprintf("%s%s", filename, ".s2sfx") + if *goos == "windows" { + dstFilename += ".exe" + } + if !*quiet { + if !isCompressed { + fmt.Print("Compressing ", filename, " -> ", dstFilename, " for ", wantPlat) + } else { + fmt.Print("Creating sfx archive ", filename, " -> ", dstFilename, " for ", wantPlat) + } + } + // Input file. + file, err := os.Open(filename) + exitErr(err) + defer closeOnce.Do(func() { file.Close() }) + src, err := readahead.NewReaderSize(file, *cpu+1, 1<<20) + exitErr(err) + defer src.Close() + var out io.Writer + switch { + case *stdout: + out = os.Stdout + default: + if *safe { + _, err := os.Stat(dstFilename) + if !os.IsNotExist(err) { + exitErr(errors.New("destination file exists")) + } + } + dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY, 0777) + exitErr(err) + defer dstFile.Close() + bw := bufio.NewWriterSize(dstFile, 4<<20*2) + defer bw.Flush() + out = bw + _, err = out.Write(exec) + exitErr(err) + _, err = out.Write([]byte{mode}) + } + exitErr(err) + wc := wCounter{out: out} + start := time.Now() + var input int64 + if !isCompressed { + wr.Reset(&wc) + defer wr.Close() + input, err = wr.ReadFrom(src) + exitErr(err) + err = wr.Close() + exitErr(err) + } else { + input, err = io.Copy(&wc, src) + exitErr(err) + } + if !*quiet { + elapsed := time.Since(start) + mbpersec := (float64(input) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second))) + pct := float64(wc.n) * 100 / float64(input) + fmt.Printf(" %d -> %d [%.02f%%]; %.01fMB/s\n", input, wc.n, pct, mbpersec) + } + if *remove { + closeOnce.Do(func() { + file.Close() + if !*quiet { + fmt.Println("Removing", filename) + } + err := os.Remove(filename) + exitErr(err) + }) + } + }() + } + +} + +func exitErr(err error) { + if err != nil { + fmt.Fprintln(os.Stderr, "\nERROR:", err.Error()) + os.Exit(2) + } +} + +type wCounter struct { + n int + out io.Writer +} + +func (w *wCounter) Write(p []byte) (n int, err error) { + n, err = w.out.Write(p) + w.n += n + return n, err + +}