diff --git a/s2/cmd/internal/filepathx/LICENSE b/s2/cmd/internal/filepathx/LICENSE new file mode 100644 index 0000000000..29019f0f31 --- /dev/null +++ b/s2/cmd/internal/filepathx/LICENSE @@ -0,0 +1,7 @@ +Copyright 2016 The filepathx Authors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/s2/cmd/internal/filepathx/README.md b/s2/cmd/internal/filepathx/README.md new file mode 100644 index 0000000000..0e89666c97 --- /dev/null +++ b/s2/cmd/internal/filepathx/README.md @@ -0,0 +1,73 @@ +# filepathx + +> A small `filepath` extension library that supports double star globbling. + +## Documentation + +GoDoc: + +## Install + +```bash +go get github.com/yargevad/filepathx +``` + +## Usage Example + +You can use `a/**/*.*` to match everything under the `a` directory +that contains a dot, like so: + +```go +package main + +import ( + "fmt" + "os" + + "github.com/yargevad/filepathx" +) + +func main() { + if 2 != len(os.Args) { + fmt.Println(len(os.Args), os.Args) + fmt.Fprintf(os.Stderr, "Usage: go build example/find/*.go; ./find \n") + os.Exit(1) + return + } + pattern := os.Args[1] + + matches, err := filepathx.Glob(pattern) + if err != nil { + panic(err) + } + + for _, match := range matches { + fmt.Printf("MATCH: [%v]\n", match) + } +} +``` + +Given this directory structure: + +```bash +find a +``` + +```txt +a +a/b +a/b/c.d +a/b/c.d/e.f +``` + +This will be the output: + +```bash +go build example/find/*.go +./find 'a/**/*.*' +``` + +```txt +MATCH: [a/b/c.d] +MATCH: [a/b/c.d/e.f] +``` diff --git a/s2/cmd/internal/filepathx/filepathx.go b/s2/cmd/internal/filepathx/filepathx.go new file mode 100644 index 0000000000..0129314826 --- /dev/null +++ b/s2/cmd/internal/filepathx/filepathx.go @@ -0,0 +1,62 @@ +// Package filepathx adds double-star globbing support to the Glob function from the core path/filepath package. +// You might recognize "**" recursive globs from things like your .gitignore file, and zsh. +// The "**" glob represents a recursive wildcard matching zero-or-more directory levels deep. +package filepathx + +import ( + "os" + "path/filepath" + "strings" +) + +// Globs represents one filepath glob, with its elements joined by "**". +type Globs []string + +// Glob adds double-star support to the core path/filepath Glob function. +// It's useful when your globs might have double-stars, but you're not sure. +func Glob(pattern string) ([]string, error) { + if !strings.Contains(pattern, "**") { + // passthru to core package if no double-star + return filepath.Glob(pattern) + } + return Globs(strings.Split(pattern, "**")).Expand() +} + +// Expand finds matches for the provided Globs. +func (globs Globs) Expand() ([]string, error) { + var matches = []string{""} // accumulate here + for _, glob := range globs { + var hits []string + var hitMap = map[string]bool{} + for _, match := range matches { + paths, err := filepath.Glob(match + glob) + if err != nil { + return nil, err + } + for _, path := range paths { + err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + // save deduped match from current iteration + if _, ok := hitMap[path]; !ok { + hits = append(hits, path) + hitMap[path] = true + } + return nil + }) + if err != nil { + return nil, err + } + } + } + matches = hits + } + + // fix up return value for nil input + if globs == nil && len(matches) > 0 && matches[0] == "" { + matches = matches[1:] + } + + return matches, nil +} diff --git a/s2/cmd/internal/filepathx/filepathx_test.go b/s2/cmd/internal/filepathx/filepathx_test.go new file mode 100644 index 0000000000..096baafe76 --- /dev/null +++ b/s2/cmd/internal/filepathx/filepathx_test.go @@ -0,0 +1,105 @@ +package filepathx + +import ( + "os" + "strings" + "testing" +) + +func TestGlob_ZeroDoubleStars_oneMatch(t *testing.T) { + // test passthru to vanilla path/filepath + path := "./a/b/c.d/e.f" + err := os.MkdirAll(path, 0755) + if err != nil { + t.Fatalf("os.MkdirAll: %s", err) + } + matches, err := Glob("./*/*/*.d") + if err != nil { + t.Fatalf("Glob: %s", err) + } + if len(matches) != 1 { + t.Fatalf("got %d matches, expected 1", len(matches)) + } + expected := strings.Join([]string{"a", "b", "c.d"}, string(os.PathSeparator)) + if matches[0] != expected { + t.Fatalf("matched [%s], expected [%s]", matches[0], expected) + } +} + +func TestGlob_OneDoubleStar_oneMatch(t *testing.T) { + // test a single double-star + path := "./a/b/c.d/e.f" + err := os.MkdirAll(path, 0755) + if err != nil { + t.Fatalf("os.MkdirAll: %s", err) + } + matches, err := Glob("./**/*.f") + if err != nil { + t.Fatalf("Glob: %s", err) + } + if len(matches) != 1 { + t.Fatalf("got %d matches, expected 1", len(matches)) + } + expected := strings.Join([]string{"a", "b", "c.d", "e.f"}, string(os.PathSeparator)) + if matches[0] != expected { + t.Fatalf("matched [%s], expected [%s]", matches[0], expected) + } +} + +func TestGlob_OneDoubleStar_twoMatches(t *testing.T) { + // test a single double-star + path := "./a/b/c.d/e.f" + err := os.MkdirAll(path, 0755) + if err != nil { + t.Fatalf("os.MkdirAll: %s", err) + } + matches, err := Glob("./a/**/*.*") + if err != nil { + t.Fatalf("Glob: %s", err) + } + if len(matches) != 2 { + t.Fatalf("got %d matches, expected 2", len(matches)) + } + expected := []string{ + strings.Join([]string{"a", "b", "c.d"}, string(os.PathSeparator)), + strings.Join([]string{"a", "b", "c.d", "e.f"}, string(os.PathSeparator)), + } + + for i, match := range matches { + if match != expected[i] { + t.Fatalf("matched [%s], expected [%s]", match, expected[i]) + } + } +} + +func TestGlob_TwoDoubleStars_oneMatch(t *testing.T) { + // test two double-stars + path := "./a/b/c.d/e.f" + err := os.MkdirAll(path, 0755) + if err != nil { + t.Fatalf("os.MkdirAll: %s", err) + } + matches, err := Glob("./**/b/**/*.f") + if err != nil { + t.Fatalf("Glob: %s", err) + } + if len(matches) != 1 { + t.Fatalf("got %d matches, expected 1", len(matches)) + } + expected := strings.Join([]string{"a", "b", "c.d", "e.f"}, string(os.PathSeparator)) + + if matches[0] != expected { + t.Fatalf("matched [%s], expected [%s]", matches[0], expected) + } +} + +func TestExpand_DirectCall_emptySlice(t *testing.T) { + var empty []string + matches, err := Globs(empty).Expand() + if err != nil { + t.Fatalf("Glob: %s", err) + } + if len(matches) != 0 { + t.Fatalf("got %d matches, expected 0", len(matches)) + } +} diff --git a/s2/cmd/s2c/main.go b/s2/cmd/s2c/main.go index c8a6b88744..6875139c7b 100644 --- a/s2/cmd/s2c/main.go +++ b/s2/cmd/s2c/main.go @@ -12,7 +12,6 @@ import ( "net/http" "os" "os/signal" - "path/filepath" "runtime" "runtime/debug" "runtime/pprof" @@ -24,6 +23,7 @@ import ( "unicode" "github.com/klauspost/compress/s2" + "github.com/klauspost/compress/s2/cmd/internal/filepathx" "github.com/klauspost/compress/s2/cmd/internal/readahead" ) @@ -33,6 +33,7 @@ var ( snappy = flag.Bool("snappy", false, "Generate Snappy compatible output stream") cpu = flag.Int("cpu", runtime.GOMAXPROCS(0), "Compress using this amount of threads") blockSize = flag.String("blocksize", "4M", "Max block size. Examples: 64K, 256K, 1M, 4M. Must be power of two and <= 4MB") + block = flag.Bool("block", false, "Compress as a single block. Will load content into memory.") safe = flag.Bool("safe", false, "Do not overwrite output files") padding = flag.String("pad", "1", "Pad size to a multiple of this value, Examples: 500, 64K, 256K, 1M, 4M, etc") stdout = flag.Bool("c", false, "Write all output to stdout. Multiple input files will be concatenated") @@ -128,7 +129,7 @@ Options:`) files = append(files, pattern) continue } - found, err := filepath.Glob(pattern) + found, err := filepathx.Glob(pattern) exitErr(err) if len(found) == 0 { exitErr(fmt.Errorf("unable to find file %v", pattern)) @@ -169,6 +170,88 @@ Options:`) debug.SetGCPercent(10) dec := s2.NewReader(nil) for _, filename := range files { + if *block { + func() { + if !*quiet { + fmt.Print("Reading ", filename, "...") + } + // Input file. + file, size, _ := openFile(filename) + b := make([]byte, size) + _, err = io.ReadFull(file, b) + exitErr(err) + file.Close() + for i := 0; i < *bench; i++ { + if !*quiet { + fmt.Print("\nCompressing...") + } + start := time.Now() + var compressed []byte + switch { + case *faster: + if *snappy { + compressed = s2.EncodeSnappy(nil, b) + break + } + compressed = s2.Encode(nil, b) + case *slower: + if *snappy { + compressed = s2.EncodeSnappyBest(nil, b) + break + } + compressed = s2.EncodeBest(nil, b) + default: + if *snappy { + compressed = s2.EncodeSnappyBetter(nil, b) + break + } + compressed = s2.EncodeBetter(nil, b) + } + exitErr(err) + err = wr.Close() + exitErr(err) + if !*quiet { + input := len(b) + elapsed := time.Since(start) + mbpersec := (float64(input) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second))) + pct := float64(len(compressed)) * 100 / float64(input) + ms := elapsed.Round(time.Millisecond) + fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", input, len(compressed), pct, ms, mbpersec) + } + if *verify { + if !*quiet { + fmt.Print("\nDecompressing.") + } + decomp := make([]byte, 0, len(b)) + start := time.Now() + decomp, err = s2.Decode(decomp, compressed) + exitErr(err) + if len(decomp) != len(b) { + exitErr(fmt.Errorf("unexpected size, want %d, got %d", len(b), len(decomp))) + } + if !*quiet { + input := len(b) + elapsed := time.Since(start) + mbpersec := (float64(input) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second))) + pct := float64(input) * 100 / float64(len(compressed)) + ms := elapsed.Round(time.Millisecond) + fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", len(compressed), len(decomp), pct, ms, mbpersec) + } + if !bytes.Equal(decomp, b) { + exitErr(fmt.Errorf("decompresed data mismatch")) + } + if !*quiet { + fmt.Print("... Verified ok.") + } + } + } + if !*quiet { + fmt.Println("") + } + wr.Close() + }() + continue + } func() { if !*quiet { fmt.Print("Reading ", filename, "...") @@ -230,7 +313,9 @@ Options:`) dec.Reset(nil) } } - fmt.Println("") + if !*quiet { + fmt.Println("") + } wr.Close() }() } @@ -240,10 +325,99 @@ Options:`) if *snappy { ext = ".snappy" } + if *block { + ext += ".block" + } if *out != "" && len(files) > 1 { exitErr(errors.New("-out parameter can only be used with one input")) } for _, filename := range files { + if *block { + func() { + var closeOnce sync.Once + dstFilename := cleanFileName(fmt.Sprintf("%s%s", filename, ext)) + if *out != "" { + dstFilename = *out + } + if !*quiet { + fmt.Print("Compressing ", filename, " -> ", dstFilename) + } + // Input file. + file, _, mode := openFile(filename) + exitErr(err) + defer closeOnce.Do(func() { file.Close() }) + inBytes, err := ioutil.ReadAll(file) + exitErr(err) + + var out io.Writer + switch { + case *stdout: + out = os.Stdout + default: + if *safe { + _, err := os.Stat(dstFilename) + if !os.IsNotExist(err) { + exitErr(errors.New("destination file exists")) + } + } + dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, mode) + exitErr(err) + defer dstFile.Close() + out = dstFile + } + start := time.Now() + var compressed []byte + switch { + case *faster: + if *snappy { + compressed = s2.EncodeSnappy(nil, inBytes) + break + } + compressed = s2.Encode(nil, inBytes) + case *slower: + if *snappy { + compressed = s2.EncodeSnappyBest(nil, inBytes) + break + } + compressed = s2.EncodeBest(nil, inBytes) + default: + if *snappy { + compressed = s2.EncodeSnappyBetter(nil, inBytes) + break + } + compressed = s2.EncodeBetter(nil, inBytes) + } + _, err = out.Write(compressed) + exitErr(err) + if !*quiet { + elapsed := time.Since(start) + mbpersec := (float64(len(inBytes)) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second))) + pct := float64(len(compressed)) * 100 / float64(len(inBytes)) + fmt.Printf(" %d -> %d [%.02f%%]; %.01fMB/s\n", len(inBytes), len(compressed), pct, mbpersec) + } + if *verify { + got, err := s2.Decode(make([]byte, 0, len(inBytes)), compressed) + exitErr(err) + if !bytes.Equal(got, inBytes) { + exitErr(fmt.Errorf("decoded content mismatch")) + } + if !*quiet { + fmt.Print("... Verified ok.") + } + } + if *remove { + closeOnce.Do(func() { + file.Close() + if !*quiet { + fmt.Println("Removing", filename) + } + err := os.Remove(filename) + exitErr(err) + }) + } + }() + continue + } func() { var closeOnce sync.Once dstFilename := cleanFileName(fmt.Sprintf("%s%s", filename, ext)) diff --git a/s2/cmd/s2d/main.go b/s2/cmd/s2d/main.go index 439f387a6a..5f973c5100 100644 --- a/s2/cmd/s2d/main.go +++ b/s2/cmd/s2d/main.go @@ -10,13 +10,13 @@ import ( "io/ioutil" "net/http" "os" - "path/filepath" "runtime/debug" "strings" "sync" "time" "github.com/klauspost/compress/s2" + "github.com/klauspost/compress/s2/cmd/internal/filepathx" "github.com/klauspost/compress/s2/cmd/internal/readahead" ) @@ -29,6 +29,7 @@ var ( bench = flag.Int("bench", 0, "Run benchmark n times. No output will be written") help = flag.Bool("help", false, "Display help") out = flag.String("o", "", "Write output to another file. Single input file only") + block = flag.Bool("block", false, "Decompress as a single block. Will load content into memory.") version = "(dev)" date = "(unknown)" @@ -96,7 +97,7 @@ Options:`) continue } - found, err := filepath.Glob(pattern) + found, err := filepathx.Glob(pattern) exitErr(err) if len(found) == 0 { exitErr(fmt.Errorf("unable to find file %v", pattern)) @@ -109,9 +110,15 @@ Options:`) if *bench > 0 { debug.SetGCPercent(10) for _, filename := range files { + block := *block + dstFilename := cleanFileName(filename) + if strings.HasSuffix(filename, ".block") { + dstFilename = strings.TrimSuffix(dstFilename, ".block") + block = true + } switch { - case strings.HasSuffix(filename, ".s2"): - case strings.HasSuffix(filename, ".snappy"): + case strings.HasSuffix(dstFilename, ".s2"): + case strings.HasSuffix(dstFilename, ".snappy"): default: if !isHTTP(filename) { fmt.Println("Skipping", filename) @@ -134,10 +141,17 @@ Options:`) if !*quiet { fmt.Print("\nDecompressing...") } - r.Reset(bytes.NewBuffer(b)) start := time.Now() - output, err := io.Copy(ioutil.Discard, r) - exitErr(err) + var output int64 + if block { + dec, err := s2.Decode(nil, b) + exitErr(err) + output = int64(len(dec)) + } else { + r.Reset(bytes.NewBuffer(b)) + output, err = io.Copy(ioutil.Discard, r) + exitErr(err) + } if !*quiet { elapsed := time.Since(start) ms := elapsed.Round(time.Millisecond) @@ -146,7 +160,9 @@ Options:`) fmt.Printf(" %d -> %d [%.02f%%]; %v, %.01fMB/s", len(b), output, pct, ms, mbPerSec) } } - fmt.Println("") + if !*quiet { + fmt.Println("") + } }() } os.Exit(0) @@ -158,12 +174,17 @@ Options:`) for _, filename := range files { dstFilename := cleanFileName(filename) + block := *block + if strings.HasSuffix(dstFilename, ".block") { + dstFilename = strings.TrimSuffix(dstFilename, ".block") + block = true + } switch { case *out != "": dstFilename = *out - case strings.HasSuffix(filename, ".s2"): + case strings.HasSuffix(dstFilename, ".s2"): dstFilename = strings.TrimSuffix(dstFilename, ".s2") - case strings.HasSuffix(filename, ".snappy"): + case strings.HasSuffix(dstFilename, ".snappy"): dstFilename = strings.TrimSuffix(dstFilename, ".snappy") default: if !isHTTP(filename) { @@ -184,9 +205,15 @@ Options:`) file, _, mode := openFile(filename) defer closeOnce.Do(func() { file.Close() }) rc := rCounter{in: file} - src, err := readahead.NewReaderSize(&rc, 2, 4<<20) - exitErr(err) - defer src.Close() + var src io.Reader + if !block { + ra, err := readahead.NewReaderSize(&rc, 2, 4<<20) + exitErr(err) + defer ra.Close() + src = ra + } else { + src = &rc + } if *safe { _, err := os.Stat(dstFilename) if !os.IsNotExist(err) { @@ -203,13 +230,26 @@ Options:`) dstFile, err := os.OpenFile(dstFilename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, mode) exitErr(err) defer dstFile.Close() - bw := bufio.NewWriterSize(dstFile, 4<<20) - defer bw.Flush() - out = bw + out = dstFile + if !block { + bw := bufio.NewWriterSize(dstFile, 4<<20) + defer bw.Flush() + out = bw + } } - r.Reset(src) + var decoded io.Reader start := time.Now() - output, err := io.Copy(out, r) + if block { + all, err := ioutil.ReadAll(src) + exitErr(err) + b, err := s2.Decode(nil, all) + exitErr(err) + decoded = bytes.NewReader(b) + } else { + r.Reset(src) + decoded = r + } + output, err := io.Copy(out, decoded) exitErr(err) if !*quiet { elapsed := time.Since(start)