diff --git a/go.mod b/go.mod index 86a5d86441..1a4d7bce13 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/BurntSushi/toml v0.3.1 github.com/Microsoft/go-winio v0.4.15 github.com/Microsoft/hcsshim v0.8.9 + github.com/cyphar/filepath-securejoin v0.2.2 github.com/docker/go-units v0.4.0 github.com/hashicorp/go-multierror v1.1.0 github.com/klauspost/compress v1.11.3 diff --git a/pkg/zstdchunked/compression.go b/pkg/zstdchunked/compression.go new file mode 100644 index 0000000000..ce32b5702a --- /dev/null +++ b/pkg/zstdchunked/compression.go @@ -0,0 +1,465 @@ +package zstdchunked + +import ( + "bytes" + "crypto/sha256" + "encoding/base64" + "encoding/binary" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "time" + + "github.com/klauspost/compress/zstd" + "github.com/sirupsen/logrus" + "github.com/vbatts/tar-split/archive/tar" +) + +type FileMetadata struct { + Type string `json:"type"` + Name string `json:"name"` + Linkname string `json:"linkName,omitempty"` + Mode int64 `json:"mode,omitempty"` + Size int64 `json:"size"` + UID int `json:"uid"` + GID int `json:"gid"` + ModTime time.Time `json:"modTime"` + AccessTime time.Time `json:"accessTime"` + ChangeTime time.Time `json:"changeTime"` + Devmajor int64 `json:"devMajor"` + Devminor int64 `json:"devMinor"` + Xattrs map[string]string `json:"xattrs,omitempty"` + Checksum string `json:"checksum,omitempty"` + StartOffset int64 `json:"startOffset,omitempty"` + EndOffset int64 `json:"endOffset,omitempty"` +} + +func getType(t byte) (string, error) { + switch t { + case tar.TypeReg, tar.TypeRegA: + return "REG", nil + case tar.TypeLink: + return "LINK", nil + case tar.TypeChar: + return "CHAR", nil + case tar.TypeBlock: + return "BLOCK", nil + case tar.TypeDir: + return "DIR", nil + case tar.TypeFifo: + return "FIFO", nil + case tar.TypeSymlink: + return "SYMLINK", nil + } + return "", fmt.Errorf("unknown tarball type: %v", t) +} + +var typesToTar = map[string]byte{ + "REG": tar.TypeReg, + "LINK": tar.TypeLink, + "CHAR": tar.TypeChar, + "BLOCK": tar.TypeBlock, + "DIR": tar.TypeDir, + "FIFO": tar.TypeFifo, + "SYMLINK": tar.TypeSymlink, +} + +func TypeToTarType(t string) (byte, error) { + r, found := typesToTar[t] + if !found { + return 0, fmt.Errorf("unknown type: %v", t) + } + return r, nil +} + +// sizeCounter is an io.Writer which only counts the total size of its input. +type sizeCounter struct{ size int64 } + +func (c *sizeCounter) Write(p []byte) (int, error) { + c.size += int64(len(p)) + return len(p), nil +} + +const ( + manifestChecksumKey = "io.containers.zstd-chunked.manifest-checksum" + manifestInfoKey = "io.containers.zstd-chunked.manifest-position" + footerSizeSupported = 32 +) + +var ( + // when the zstd decoder encounters a skippable frame + 1 byte for the size, it + // will ignore it. + // https://tools.ietf.org/html/rfc8478#section-3.1.2 + skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18} + + zstdChunkedFrameMagic = []byte{0xAB, 0xCD, 0xDE, 0xFA} +) + +func isZstdSkippableFrameMagic(data []byte) bool { + if len(data) < 4 { + return false + } + return bytes.Equal(skippableFrameMagic, data[:4]) +} + +func isZstdChunkedFrameMagic(data []byte) bool { + if len(data) < 4 { + return false + } + return bytes.Equal(zstdChunkedFrameMagic, data[:4]) +} + +// ReadZstdChunkedManifest reads the zstd:chunked manifest from the seekable stream blobStream. The blob total size must +// be specified. +// This function uses the io.containers.zstd-chunked. annotations when specified. +func ReadZstdChunkedManifest(blobStream ImageSourceSeekable, blobSize int64, annotations map[string]string) ([]byte, error) { + footerSize := int64(footerSizeSupported + 4) // also include the zstd skippable frame header in the request + if blobSize <= footerSize { + return nil, fmt.Errorf("blob too small") + } + + var offset, length, lengthUncompressed uint64 + + if offsetMetadata := annotations[manifestInfoKey]; offsetMetadata != "" { + if _, err := fmt.Sscanf(offsetMetadata, "%d:%d:%d", &offset, &length, &lengthUncompressed); err != nil { + return nil, err + } + } else { + chunk := ImageSourceChunk{ + Offset: uint64(blobSize - footerSize), + Length: uint64(footerSize), + } + reader, _, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk}) + if err != nil { + return nil, err + } + footer := make([]byte, footerSize) + if _, err := io.ReadFull(reader, footer); err != nil && err != io.EOF { + return nil, err + } + + frameLen := binary.LittleEndian.Uint32(footer[4:8]) + offset = binary.LittleEndian.Uint64(footer[8:16]) + length = binary.LittleEndian.Uint64(footer[16:24]) + lengthUncompressed = binary.LittleEndian.Uint64(footer[24:32]) + + if !isZstdChunkedFrameMagic(footer[32:36]) { + return nil, fmt.Errorf("unknown zstd chunked frame magic") + } else if footerSizeSupported < 32 { + return nil, fmt.Errorf("unknown chunked zstd footer") + } else if frameLen == footerSizeSupported && !isZstdSkippableFrameMagic(footer) { + return nil, fmt.Errorf("invalid zstd skippable frame magic number") + } + } + + // set a reasonable limit + if length > (1<<20)*50 { + return nil, fmt.Errorf("manifest too big") + } + if lengthUncompressed > (1<<20)*50 { + return nil, fmt.Errorf("manifest too big") + } + + chunk := ImageSourceChunk{ + Offset: offset, + Length: length, + } + + reader, _, err := blobStream.GetBlobAt([]ImageSourceChunk{chunk}) + if err != nil { + return nil, err + } + + manifest := make([]byte, length) + if _, err := io.ReadFull(reader, manifest); err != nil && err != io.EOF { + return nil, err + } + + manifestChecksum := sha256.New() + if _, err := manifestChecksum.Write(manifest); err != nil { + return nil, err + } + + if manifestChecksumAnnotation := annotations[manifestChecksumKey]; manifestChecksumAnnotation == "" { + logrus.Warningf("Manifest checksum annotation not found. Skipping manifest validation") + } else { + if fmt.Sprintf("%x", manifestChecksum.Sum(nil)) != manifestChecksumAnnotation { + return nil, fmt.Errorf("invalid manifest checksum") + } + } + + decoder, err := zstd.NewReader(nil) + if err != nil { + return nil, err + } + defer decoder.Close() + + b := make([]byte, 0, lengthUncompressed) + if decoded, err := decoder.DecodeAll(manifest, b); err == nil { + return decoded, nil + } + + return manifest, nil +} + +func appendZstdSkippableFrame(dest io.Writer, data []byte) error { + if _, err := dest.Write(skippableFrameMagic); err != nil { + return err + } + + var size []byte = make([]byte, 4) + binary.LittleEndian.PutUint32(size, uint32(len(data))) + if _, err := dest.Write(size); err != nil { + return err + } + if _, err := dest.Write(data); err != nil { + return err + } + return nil +} + +func writeZstdChunkedManifest(dest io.Writer, outMetadata map[string]string, offset *sizeCounter, metadata []FileMetadata, level int) error { + // 8 is the size of the zstd skippable frame header + the frame size + manifestOffset := uint64(offset.size) + 8 + + // Generate the manifest + manifest, err := json.Marshal(metadata) + if err != nil { + return err + } + + var compressedBuffer bytes.Buffer + zstdWriter, err := zstdWriterWithLevel(&compressedBuffer, level) + if err != nil { + return err + } + if _, err := zstdWriter.Write(manifest); err != nil { + zstdWriter.Close() + return err + } + if err := zstdWriter.Close(); err != nil { + return err + } + + compressedManifest := compressedBuffer.Bytes() + + manifestChecksum := sha256.New() + if _, err := manifestChecksum.Write(compressedManifest); err != nil { + return err + } + + outMetadata[manifestChecksumKey] = fmt.Sprintf("%x", manifestChecksum.Sum(nil)) + outMetadata[manifestInfoKey] = fmt.Sprintf("%d:%d:%d", manifestOffset, len(compressedManifest), len(manifest)) + if err := appendZstdSkippableFrame(dest, compressedManifest); err != nil { + return err + } + + // Store the offset to the manifest and its size in LE order + var manifestDataLE []byte = make([]byte, footerSizeSupported) + binary.LittleEndian.PutUint64(manifestDataLE, manifestOffset) + binary.LittleEndian.PutUint64(manifestDataLE[8:], uint64(len(compressedManifest))) + binary.LittleEndian.PutUint64(manifestDataLE[16:], uint64(len(manifest))) + binary.LittleEndian.PutUint64(manifestDataLE[24:], uint64(len(manifest))) + + return appendZstdSkippableFrame(dest, manifestDataLE) +} + +func writeZstdChunkedStream(destFile io.Writer, outMetadata map[string]string, reader io.Reader, level int) error { + // total written so far. Used to retrieve partial offsets in the file + sizeCounter := &sizeCounter{} + + dest := io.MultiWriter(destFile, sizeCounter) + + tr := tar.NewReader(reader) + tr.RawAccounting = true + + buf := make([]byte, 4096) + + zstdWriter, err := zstdWriterWithLevel(dest, level) + if err != nil { + return err + } + defer func() { + if zstdWriter != nil { + zstdWriter.Close() + zstdWriter.Flush() + } + }() + + restartCompression := func() (int64, error) { + var offset int64 + if zstdWriter != nil { + if err := zstdWriter.Close(); err != nil { + return 0, err + } + if err := zstdWriter.Flush(); err != nil { + return 0, err + } + offset = sizeCounter.size + zstdWriter.Reset(dest) + } + return offset, nil + } + + var metadata []FileMetadata + for { + hdr, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + if _, err := zstdWriter.Write(tr.RawBytes()); err != nil { + return err + } + + payloadChecksum := sha256.New() + payloadDest := io.MultiWriter(payloadChecksum, zstdWriter) + + // Now handle the payload, if any + var startOffset, endOffset int64 + var checksum []byte + for { + read, errRead := tr.Read(buf) + if errRead != nil && errRead != io.EOF { + return err + } + + // restart the compression only if there is + // a payload. + if read > 0 { + if startOffset == 0 { + startOffset, err = restartCompression() + if err != nil { + return err + } + } + _, err := payloadDest.Write(buf[:read]) + if err != nil { + return err + } + } + if errRead == io.EOF { + if startOffset > 0 { + endOffset, err = restartCompression() + if err != nil { + return err + } + checksum = payloadChecksum.Sum(nil) + } + break + } + } + + typ, err := getType(hdr.Typeflag) + if err != nil { + return err + } + xattrs := make(map[string]string) + for k, v := range hdr.Xattrs { + xattrs[k] = base64.StdEncoding.EncodeToString([]byte(v)) + } + m := FileMetadata{ + Type: typ, + Name: hdr.Name, + Linkname: hdr.Linkname, + Mode: hdr.Mode, + Size: hdr.Size, + UID: hdr.Uid, + GID: hdr.Gid, + ModTime: hdr.ModTime, + AccessTime: hdr.AccessTime, + ChangeTime: hdr.ChangeTime, + Devmajor: hdr.Devmajor, + Devminor: hdr.Devminor, + Xattrs: xattrs, + Checksum: fmt.Sprintf("%x", checksum), + StartOffset: startOffset, + EndOffset: endOffset, + } + metadata = append(metadata, m) + } + + if err := zstdWriter.Flush(); err != nil { + return err + } + if err := zstdWriter.Close(); err != nil { + return err + } + zstdWriter = nil + + return writeZstdChunkedManifest(dest, outMetadata, sizeCounter, metadata, level) +} + +type zstdChunkedWriter struct { + tarSplitOut *io.PipeWriter + tarSplitErr chan error +} + +func (w zstdChunkedWriter) Close() error { + err := <-w.tarSplitErr + if err != nil { + w.tarSplitOut.Close() + return err + } + return w.tarSplitOut.Close() +} + +func (w zstdChunkedWriter) Write(p []byte) (int, error) { + select { + case err := <-w.tarSplitErr: + w.tarSplitOut.Close() + return 0, err + default: + return w.tarSplitOut.Write(p) + } +} + +// zstdChunkedWriterWithLevel writes a zstd compressed tarball where each file is +// compressed separately so it can be addressed separately. Idea based on CRFS: +// https://github.com/google/crfs +// The difference with CRFS is that the zstd compression is used instead of gzip. +// The reason for it is that zstd supports embedding metadata ignored by the decoder +// as part of the compressed stream. +// A manifest json file with all the metadata is appended at the end of the tarball +// stream, using zstd skippable frames. +// The final file will look like: +// [FILE_1][FILE_2]..[FILE_N][SKIPPABLE FRAME 1][SKIPPABLE FRAME 2] +// Where: +// [FILE_N]: [ZSTD HEADER][TAR HEADER][PAYLOAD FILE_N][ZSTD FOOTER] +// [SKIPPABLE FRAME 1]: [ZSTD SKIPPABLE FRAME, SIZE=MANIFEST LENGTH][MANIFEST] +// [SKIPPABLE FRAME 2]: [ZSTD SKIPPABLE FRAME, SIZE=16][MANIFEST_OFFSET][MANIFEST_LENGTH][MANIFEST_LENGTH_UNCOMPRESSED][CHUNKED_ZSTD_MAGIC_NUMBER] +// MANIFEST_OFFSET, MANIFEST_LENGTH, MANIFEST_LENGTH_UNCOMPRESSED and CHUNKED_ZSTD_MAGIC_NUMBER are 64 bits unsigned in little endian format. +func zstdChunkedWriterWithLevel(out io.Writer, metadata map[string]string, level int) (io.WriteCloser, error) { + ch := make(chan error, 1) + r, w := io.Pipe() + + go func() { + defer close(ch) + defer r.Close() + defer io.Copy(ioutil.Discard, r) + ch <- writeZstdChunkedStream(out, metadata, r, level) + }() + + return zstdChunkedWriter{ + tarSplitOut: w, + tarSplitErr: ch, + }, nil +} + +func zstdWriterWithLevel(dest io.Writer, level int) (*zstd.Encoder, error) { + el := zstd.EncoderLevelFromZstd(level) + return zstd.NewWriter(dest, zstd.WithEncoderLevel(el)) +} + +// Compressor is a CompressorFunc for the zstd compression algorithm. +func Compressor(r io.Writer, metadata map[string]string, level *int) (io.WriteCloser, error) { + if level == nil { + l := 3 + level = &l + } + return zstdChunkedWriterWithLevel(r, metadata, *level) +} diff --git a/pkg/zstdchunked/storage.go b/pkg/zstdchunked/storage.go new file mode 100644 index 0000000000..896d620f3a --- /dev/null +++ b/pkg/zstdchunked/storage.go @@ -0,0 +1,952 @@ +package zstdchunked + +import ( + archivetar "archive/tar" + "context" + "crypto/sha256" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "mime" + "mime/multipart" + "os" + "path/filepath" + "sort" + "strings" + "sync" + "syscall" + "time" + "unsafe" + + storage "github.com/containers/storage" + graphdriver "github.com/containers/storage/drivers" + driversCopy "github.com/containers/storage/drivers/copy" + "github.com/containers/storage/pkg/archive" + "github.com/containers/storage/pkg/idtools" + securejoin "github.com/cyphar/filepath-securejoin" + "github.com/klauspost/compress/zstd" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "github.com/vbatts/tar-split/archive/tar" + "golang.org/x/sys/unix" +) + +const ( + maxNumberMissingChunks = 1024 + newFileFlags = (unix.O_CREAT | unix.O_TRUNC | unix.O_WRONLY | unix.O_EXCL) + containersOverrideXattr = "user.containers.override_stat" +) + +// ImageSourceChunk is a portion of a blob. +type ImageSourceChunk struct { + Offset uint64 + Length uint64 +} + +// ImageSourceSeekable is an image source that permits to fetch chunks of the entire blob. +type ImageSourceSeekable interface { + // GetBlobAt returns a stream for the specified blob. + GetBlobAt([]ImageSourceChunk) (io.ReadCloser, string, error) +} + +type ChunkedZstdDiffer struct { + stream ImageSourceSeekable + manifest []byte + ctx context.Context + layersMetadata map[string][]FileMetadata + layersTarget map[string]string +} + +// ErrBadRequest is returned when the request is not valid +type ErrBadRequest struct { +} + +func (e ErrBadRequest) Error() string { + return fmt.Sprintf("http bad request") +} + +func timeToTimespec(time time.Time) (ts syscall.Timespec) { + if time.IsZero() { + // Return UTIME_OMIT special value + ts.Sec = 0 + ts.Nsec = ((1 << 30) - 2) + return + } + return syscall.NsecToTimespec(time.UnixNano()) +} + +func copyFileContent(src, destFile, root string, dirfd int, missingDirsMode, mode os.FileMode) (*os.File, int64, error) { + st, err := os.Stat(src) + if err != nil { + return nil, -1, err + } + + copyWithFileRange, copyWithFileClone := true, true + + // If the destination file already exists, we shouldn't blow it away + dstFile, err := openFileUnderRoot(destFile, root, dirfd, newFileFlags, mode) + if err != nil { + return nil, -1, err + } + + err = driversCopy.CopyRegularToFile(src, dstFile, st, ©WithFileRange, ©WithFileClone) + if err != nil { + dstFile.Close() + return nil, -1, err + } + return dstFile, st.Size(), err +} + +func prepareOtherLayersCache(layersMetadata map[string][]FileMetadata) map[string]map[string]*FileMetadata { + maps := make(map[string]map[string]*FileMetadata) + + for layerID, v := range layersMetadata { + r := make(map[string]*FileMetadata) + for i := range v { + r[v[i].Checksum] = &v[i] + } + maps[layerID] = r + } + return maps +} + +func getLayersCache(store storage.Store) (map[string][]FileMetadata, map[string]string, error) { + allLayers, err := store.Layers() + if err != nil { + return nil, nil, err + } + + layersMetadata := make(map[string][]FileMetadata) + layersTarget := make(map[string]string) + for _, r := range allLayers { + if r.Metadata == "" { + continue + } + var metadataLayer []FileMetadata + if err := json.Unmarshal([]byte(r.Metadata), &metadataLayer); err != nil { + continue + } + layersMetadata[r.ID] = metadataLayer + target, err := store.DifferTarget(r.ID) + if err != nil { + return nil, nil, err + } + layersTarget[r.ID] = target + } + + return layersMetadata, layersTarget, nil +} + +func MakeZstdChunkedDiffer(ctx context.Context, store storage.Store, blobSize int64, annotations map[string]string, iss ImageSourceSeekable) (*ChunkedZstdDiffer, error) { + manifest, err := ReadZstdChunkedManifest(iss, blobSize, annotations) + if err != nil { + return nil, err + } + layersMetadata, layersTarget, err := getLayersCache(store) + if err != nil { + return nil, err + } + + return &ChunkedZstdDiffer{ + stream: iss, + manifest: manifest, + ctx: ctx, + layersMetadata: layersMetadata, + layersTarget: layersTarget, + }, nil +} + +func findFileInOtherLayers(file FileMetadata, root string, dirfd int, layersMetadata map[string]map[string]*FileMetadata, layersTarget map[string]string, missingDirsMode, mode os.FileMode) (*os.File, int64, error) { + // this is ugly, needs to be indexed + for layerID, checksums := range layersMetadata { + m, found := checksums[file.Checksum] + if !found { + continue + } + + source, ok := layersTarget[layerID] + if !ok { + continue + } + + sourceFile, err := securejoin.SecureJoin(source, m.Name) + if err != nil { + continue + } + + dstFile, written, err := copyFileContent(sourceFile, file.Name, root, dirfd, missingDirsMode, mode) + if err != nil { + continue + } + return dstFile, written, nil + } + return nil, 0, nil +} + +// findFileOnTheHost checks whether the requested file already exist on the host and copies the file content from there if possible. +// It is currently implemented to look only at the file with the same path. Ideally it can detect the same content also at different +// paths. +func findFileOnTheHost(file FileMetadata, root string, dirfd int, missingDirsMode, mode os.FileMode) (*os.File, int64, error) { + hostContentChecksum := sha256.New() + + sourceFile := filepath.Join("/", file.Name) + + st, err := os.Stat(sourceFile) + if err != nil || !st.Mode().IsRegular() { + return nil, 0, nil + } + + if st.Size() != file.Size { + return nil, 0, nil + } + + fd, err := unix.Open(sourceFile, unix.O_RDONLY|unix.O_NONBLOCK, 0) + if err != nil { + return nil, 0, nil + } + + f := os.NewFile(uintptr(fd), "fd") + defer f.Close() + + if _, err := io.Copy(hostContentChecksum, f); err != nil { + return nil, 0, nil + } + + checksum := fmt.Sprintf("%x", hostContentChecksum.Sum(nil)) + if checksum != file.Checksum { + return nil, 0, nil + } + + dstFile, written, err := copyFileContent(fmt.Sprintf("/proc/self/fd/%d", fd), file.Name, root, dirfd, missingDirsMode, mode) + if err != nil { + return nil, 0, nil + } + return dstFile, written, nil +} + +func maybeDoIDRemap(manifest []FileMetadata, options *archive.TarOptions) error { + if options.ChownOpts == nil && len(options.UIDMaps) == 0 || len(options.GIDMaps) == 0 { + return nil + } + + idMappings := idtools.NewIDMappingsFromMaps(options.UIDMaps, options.GIDMaps) + + for i := range manifest { + if options.ChownOpts != nil { + manifest[i].UID = options.ChownOpts.UID + manifest[i].GID = options.ChownOpts.GID + } else { + pair := idtools.IDPair{ + UID: manifest[i].UID, + GID: manifest[i].GID, + } + var err error + manifest[i].UID, manifest[i].GID, err = idMappings.ToContainer(pair) + if err != nil { + return err + } + } + } + return nil +} + +type bufferedNetworkReaderBuffer struct { + data []byte + len int + consumed int + err error +} + +type bufferedNetworkReader struct { + stream io.Reader + emptyBuffer chan *bufferedNetworkReaderBuffer + readyBuffer chan *bufferedNetworkReaderBuffer + terminate chan bool + current *bufferedNetworkReaderBuffer + mutex sync.Mutex + gotEOF bool +} + +// handleBufferedNetworkReader runs in a goroutine +func handleBufferedNetworkReader(br *bufferedNetworkReader) { + defer close(br.readyBuffer) + for { + select { + case b := <-br.emptyBuffer: + b.len, b.err = br.stream.Read(b.data) + br.readyBuffer <- b + if b.err != nil { + return + } + case <-br.terminate: + return + } + } +} + +func (n *bufferedNetworkReader) Close() { + close(n.terminate) + close(n.emptyBuffer) +} + +func (n *bufferedNetworkReader) read(p []byte) (int, error) { + if n.current != nil { + copied := copy(p, n.current.data[n.current.consumed:n.current.len]) + n.current.consumed += copied + if n.current.consumed == n.current.len { + n.emptyBuffer <- n.current + n.current = nil + } + if copied > 0 { + return copied, nil + } + } + if n.gotEOF { + return 0, io.EOF + } + + var b *bufferedNetworkReaderBuffer + + select { + case b = <-n.readyBuffer: + if b.err != nil { + if b.err != io.EOF { + return b.len, b.err + } + n.gotEOF = true + } + b.consumed = 0 + n.current = b + return n.read(p) + case <-n.terminate: + return 0, io.EOF + } +} + +func (n *bufferedNetworkReader) Read(p []byte) (int, error) { + n.mutex.Lock() + defer n.mutex.Unlock() + + return n.read(p) +} + +func makeBufferedNetworkReader(stream io.Reader, nBuffers, bufferSize uint) *bufferedNetworkReader { + br := bufferedNetworkReader{ + stream: stream, + emptyBuffer: make(chan *bufferedNetworkReaderBuffer, nBuffers), + readyBuffer: make(chan *bufferedNetworkReaderBuffer, nBuffers), + terminate: make(chan bool), + } + + go func() { + handleBufferedNetworkReader(&br) + }() + + for i := uint(0); i < nBuffers; i++ { + b := bufferedNetworkReaderBuffer{ + data: make([]byte, bufferSize), + } + br.emptyBuffer <- &b + } + + return &br +} + +type missingFile struct { + File *FileMetadata + Gap int64 +} + +func (m missingFile) Length() int64 { + return m.File.EndOffset - m.File.StartOffset +} + +type missingChunk struct { + RawChunk ImageSourceChunk + Files []missingFile +} + +func setFileAttrs(file *os.File, mode os.FileMode, metadata *FileMetadata, options *archive.TarOptions) error { + if file == nil || file.Fd() < 0 { + return errors.Errorf("invalid file") + } + fd := int(file.Fd()) + + t, err := TypeToTarType(metadata.Type) + if err != nil { + return err + } + if t == tar.TypeSymlink { + return nil + } + + if err := unix.Fchown(fd, metadata.UID, metadata.GID); err != nil { + if !options.IgnoreChownErrors { + return err + } + } + + for k, v := range metadata.Xattrs { + data, err := base64.StdEncoding.DecodeString(v) + if err != nil { + return err + } + if err := unix.Fsetxattr(fd, k, data, 0); err != nil { + return err + } + } + + ts := []syscall.Timespec{timeToTimespec(metadata.AccessTime), timeToTimespec(metadata.ModTime)} + if _, _, err := unix.Syscall6(unix.SYS_UTIMENSAT, uintptr(fd), 0, uintptr(unsafe.Pointer(&ts[0])), 0, 0, 0); err != 0 && err != unix.ENOSYS { + return err + } + + // Files must be already created with the correct mode, but we must make sure the setuid/setgid + // is set. + if t == tar.TypeDir || mode&(os.ModeSetuid|os.ModeSetgid) != 0 { + if err := unix.Fchmod(fd, uint32(mode)); err != nil { + return err + } + } + return nil +} + +func openFileUnderRoot(name, root string, dirfd int, flags uint64, mode os.FileMode) (*os.File, error) { + how := unix.OpenHow{ + Flags: flags, + Mode: uint64(mode & 07777), + Resolve: unix.RESOLVE_IN_ROOT, + } + + fd, err := unix.Openat2(dirfd, name, &how) + if err == nil { + return os.NewFile(uintptr(fd), name), nil + } + + path, err := securejoin.SecureJoin(root, name) + if err != nil { + return nil, err + } + + return os.OpenFile(path, int(flags), mode) +} + +func createFileFromZstdStream(dest string, dirfd int, reader io.Reader, missingDirsMode, mode os.FileMode, metadata *FileMetadata, options *archive.TarOptions) (err error) { + file, err := openFileUnderRoot(metadata.Name, dest, dirfd, newFileFlags, mode) + if err != nil { + return err + } + defer func() { + err2 := file.Close() + if err == nil { + err = err2 + } + }() + + z, err := zstd.NewReader(reader) + if err != nil { + return err + } + defer z.Close() + + checksum := sha256.New() + _, err = z.WriteTo(io.MultiWriter(file, checksum)) + if err != nil { + return err + } + if metadata.Checksum != fmt.Sprintf("%x", checksum.Sum(nil)) { + return fmt.Errorf("checksum mismatch for %q", dest) + } + return setFileAttrs(file, mode, metadata, options) +} + +func storeMissingFiles(stream io.Reader, contentType string, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { + mediaType, params, err := mime.ParseMediaType(contentType) + if err != nil { + return err + } + if !strings.HasPrefix(mediaType, "multipart/") { + if len(missingChunks) != 1 { + return errors.Errorf("invalid response, no multipart for multiple chunks requested") + } + + for _, mf := range missingChunks[0].Files { + if mf.Gap > 0 { + limitReader := io.LimitReader(stream, mf.Gap) + _, err := io.Copy(ioutil.Discard, limitReader) + if err != nil { + return err + } + continue + } + limitReader := io.LimitReader(stream, mf.Length()) + + if err := createFileFromZstdStream(dest, dirfd, limitReader, missingDirsMode, os.FileMode(mf.File.Mode), mf.File, options); err != nil { + return err + } + } + return nil + } + + // multipart response + boundary, found := params["boundary"] + if !found { + return errors.Errorf("boundary param not found") + } + + buffered := makeBufferedNetworkReader(stream, 64, 16384) + defer buffered.Close() + mr := multipart.NewReader(buffered, boundary) + + for mc := 0; ; mc++ { + p, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if mc == len(missingChunks) { + return errors.Errorf("too many chunks returned") + } + + for _, mf := range missingChunks[mc].Files { + if mf.Gap > 0 { + limitReader := io.LimitReader(p, mf.Gap) + _, err := io.Copy(ioutil.Discard, limitReader) + if err != nil { + return err + } + continue + } + + limitReader := io.LimitReader(p, mf.Length()) + + if err := createFileFromZstdStream(dest, dirfd, limitReader, missingDirsMode, os.FileMode(mf.File.Mode), mf.File, options); err != nil { + p.Close() + return err + } + } + p.Close() + } + return nil +} + +func mergeMissingChunks(missingChunks []missingChunk, target int) []missingChunk { + if len(missingChunks) <= target { + return missingChunks + } + + getGap := func(missingChunks []missingChunk, i int) int { + prev := missingChunks[i-1].RawChunk.Offset + missingChunks[i-1].RawChunk.Length + return int(missingChunks[i].RawChunk.Offset - prev) + } + + // this implementation doesn't account for duplicates, so it could merge + // more than necessary to reach the specified target. Since target itself + // is a heuristic value, it doesn't matter. + var gaps []int + for i := 1; i < len(missingChunks); i++ { + gaps = append(gaps, getGap(missingChunks, i)) + } + sort.Ints(gaps) + + toShrink := len(missingChunks) - target + targetValue := gaps[toShrink-1] + + newMissingChunks := missingChunks[0:1] + for i := 1; i < len(missingChunks); i++ { + gap := getGap(missingChunks, i) + if gap > targetValue { + newMissingChunks = append(newMissingChunks, missingChunks[i]) + } else { + prev := &newMissingChunks[len(newMissingChunks)-1] + gapFile := missingFile{ + Gap: int64(gap), + } + prev.RawChunk.Length += uint64(gap) + missingChunks[i].RawChunk.Length + prev.Files = append(append(prev.Files, gapFile), missingChunks[i].Files...) + } + } + + return newMissingChunks +} + +func retrieveMissingFiles(input *ChunkedZstdDiffer, dest string, dirfd int, missingChunks []missingChunk, missingDirsMode os.FileMode, options *archive.TarOptions) error { + var chunksToRequest []ImageSourceChunk + for _, c := range missingChunks { + chunksToRequest = append(chunksToRequest, c.RawChunk) + } + + // There are some missing files. Prepare a multirange request for the missing chunks. + var stream io.ReadCloser + var contentType string + var err error + + for stream == nil { + stream, contentType, err = input.stream.GetBlobAt(chunksToRequest) + if err != nil { + if _, ok := err.(ErrBadRequest); ok { + requested := len(missingChunks) + // If the server cannot handle at least 64 chunks in a single request, just give up. + if requested < 64 { + return err + } + + // Merge more chunks to request + missingChunks = mergeMissingChunks(missingChunks, requested/2) + continue + } + return err + } + } + + if err := storeMissingFiles(stream, contentType, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { + stream.Close() + return err + } + if err := stream.Close(); err != nil { + return err + } + + return nil +} + +func safeMkdir(target string, dirfd int, mode os.FileMode, metadata *FileMetadata, options *archive.TarOptions) error { + parent := filepath.Dir(metadata.Name) + base := filepath.Base(metadata.Name) + + parentFd := dirfd + if parent != "." { + parentFile, err := openFileUnderRoot(parent, target, dirfd, unix.O_DIRECTORY|unix.O_RDONLY, 0) + if err != nil { + return err + } + defer parentFile.Close() + parentFd = int(parentFile.Fd()) + } + + if err := unix.Mkdirat(parentFd, base, uint32(mode)); err != nil { + return err + } + + file, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer file.Close() + + return setFileAttrs(file, mode, metadata, options) +} + +func safeLink(target string, dirfd int, mode os.FileMode, metadata *FileMetadata, options *archive.TarOptions) error { + sourceFile, err := openFileUnderRoot(metadata.Linkname, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer sourceFile.Close() + + destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) + destDirFd := dirfd + if destDir != "." { + f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + destDirFd = int(f.Fd()) + } + + err = unix.Linkat(int(sourceFile.Fd()), "", destDirFd, destBase, unix.AT_EMPTY_PATH) + if err != nil { + return err + } + + newFile, err := openFileUnderRoot(metadata.Name, target, dirfd, unix.O_WRONLY, 0) + if err != nil { + return err + } + defer newFile.Close() + + return setFileAttrs(newFile, mode, metadata, options) +} + +func safeSymlink(target string, dirfd int, mode os.FileMode, metadata *FileMetadata, options *archive.TarOptions) error { + destDir, destBase := filepath.Dir(metadata.Name), filepath.Base(metadata.Name) + destDirFd := dirfd + if destDir != "." { + f, err := openFileUnderRoot(destDir, target, dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer f.Close() + destDirFd = int(f.Fd()) + } + + return unix.Symlinkat(metadata.Linkname, destDirFd, destBase) +} + +type whiteoutHandler struct { + Dirfd int + Root string +} + +func (d whiteoutHandler) Setxattr(path, name string, value []byte) error { + file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer file.Close() + + return unix.Fsetxattr(int(file.Fd()), name, value, 0) +} + +func (d whiteoutHandler) Mknod(path string, mode uint32, dev int) error { + dir := filepath.Dir(path) + base := filepath.Base(path) + + dirfd := d.Dirfd + if dir != "" { + dir, err := openFileUnderRoot(dir, d.Root, d.Dirfd, unix.O_RDONLY, 0) + if err != nil { + return err + } + defer dir.Close() + + dirfd = int(dir.Fd()) + } + + return unix.Mknodat(dirfd, base, mode, dev) +} + +func checkChownErr(err error, name string, uid, gid int) error { + if e, ok := err.(*os.PathError); ok && e.Err == syscall.EINVAL { + return errors.Wrapf(err, "potentially insufficient UIDs or GIDs available in user namespace (requested %d:%d for %s): Check /etc/subuid and /etc/subgid", uid, gid, name) + } + return err +} + +func (d whiteoutHandler) Chown(path string, uid, gid int) error { + file, err := openFileUnderRoot(path, d.Root, d.Dirfd, unix.O_PATH, 0) + if err != nil { + return err + } + defer file.Close() + + if err := unix.Fchownat(int(file.Fd()), "", uid, gid, unix.AT_EMPTY_PATH); err != nil { + var stat unix.Stat_t + if unix.Fstat(int(file.Fd()), &stat) == nil { + if stat.Uid == uint32(uid) && stat.Gid == uint32(gid) { + return nil + } + } + return checkChownErr(err, path, uid, gid) + } + return nil +} + +type hardLinkToCreate struct { + dest string + dirfd int + mode os.FileMode + metadata *FileMetadata +} + +func (d *ChunkedZstdDiffer) Diff(dest string, options *archive.TarOptions) (graphdriver.DriverWithDifferOutput, error) { + output := graphdriver.DriverWithDifferOutput{ + Differ: d, + Metadata: string(d.manifest), + } + + // Generate the manifest + var manifest []FileMetadata + if err := json.Unmarshal(d.manifest, &manifest); err != nil { + return output, err + } + + whiteoutConverter := archive.GetWhiteoutConverter(options.WhiteoutFormat, options.WhiteoutData) + + var missingChunks []missingChunk + + if err := maybeDoIDRemap(manifest, options); err != nil { + return output, err + } + + if options.ForceMask != nil { + uid, gid, mode, err := archive.GetFileOwner(dest) + if err == nil { + value := fmt.Sprintf("%d:%d:0%o", uid, gid, mode) + if err := unix.Setxattr(dest, containersOverrideXattr, []byte(value), 0); err != nil { + return output, err + } + } + } + + dirfd, err := unix.Open(dest, unix.O_RDONLY, 0) + if err != nil { + return output, err + } + defer unix.Close(dirfd) + + otherLayersCache := prepareOtherLayersCache(d.layersMetadata) + + missingDirsMode := os.FileMode(0700) + if options.ForceMask != nil { + missingDirsMode = *options.ForceMask + } + + // hardlinks can point to missing files. So create them after all files + // are retrieved + var hardLinks []hardLinkToCreate + + missingChunksSize, totalChunksSize := int64(0), int64(0) + for i, r := range manifest { + if options.ForceMask != nil { + value := fmt.Sprintf("%d:%d:0%o", r.UID, r.GID, r.Mode&07777) + r.Xattrs[containersOverrideXattr] = base64.StdEncoding.EncodeToString([]byte(value)) + r.Mode = int64(*options.ForceMask) + } + + mode := os.FileMode(r.Mode) + + r.Name = filepath.Clean(r.Name) + r.Linkname = filepath.Clean(r.Linkname) + + t, err := TypeToTarType(r.Type) + if err != nil { + return output, err + } + if whiteoutConverter != nil { + hdr := archivetar.Header{ + Typeflag: t, + Name: r.Name, + Linkname: r.Linkname, + Size: r.Size, + Mode: r.Mode, + Uid: r.UID, + Gid: r.GID, + } + handler := whiteoutHandler{ + Dirfd: dirfd, + Root: dest, + } + writeFile, err := whiteoutConverter.ConvertReadWithHandler(&hdr, r.Name, &handler) + if err != nil { + return output, err + } + if !writeFile { + continue + } + } + switch t { + case tar.TypeReg: + // Create directly empty files. + if r.Size == 0 { + // Used to have a scope for cleanup. + createEmptyFile := func() error { + file, err := openFileUnderRoot(r.Name, dest, dirfd, newFileFlags, mode) + if err != nil { + return err + } + defer file.Close() + if err := setFileAttrs(file, mode, &r, options); err != nil { + return err + } + return nil + } + if err := createEmptyFile(); err != nil { + return output, err + } + continue + } + case tar.TypeDir: + if err := safeMkdir(dest, dirfd, mode, &r, options); err != nil { + return output, err + } + continue + case tar.TypeLink: + dest := dest + dirfd := dirfd + mode := mode + r := r + hardLinks = append(hardLinks, hardLinkToCreate{ + dest: dest, + dirfd: dirfd, + mode: mode, + metadata: &r, + }) + continue + case tar.TypeSymlink: + if err := safeSymlink(dest, dirfd, mode, &r, options); err != nil { + return output, err + } + continue + } + + totalChunksSize += r.Size + + dstFile, _, err := findFileInOtherLayers(r, dest, dirfd, otherLayersCache, d.layersTarget, missingDirsMode, mode) + if err != nil { + return output, err + } + if dstFile != nil { + if err := setFileAttrs(dstFile, mode, &r, options); err != nil { + dstFile.Close() + return output, err + } + dstFile.Close() + continue + } + + dstFile, _, err = findFileOnTheHost(r, dest, dirfd, missingDirsMode, mode) + if err != nil { + return output, err + } + if dstFile != nil { + if err := setFileAttrs(dstFile, mode, &r, options); err != nil { + dstFile.Close() + return output, err + } + dstFile.Close() + continue + } + + missingChunksSize += r.Size + if t == tar.TypeReg { + rawChunk := ImageSourceChunk{ + Offset: uint64(r.StartOffset), + Length: uint64(r.EndOffset - r.StartOffset), + } + file := missingFile{ + File: &manifest[i], + } + missingChunks = append(missingChunks, missingChunk{ + RawChunk: rawChunk, + Files: []missingFile{ + file, + }, + }) + } + } + // There are some missing files. Prepare a multirange request for the missing chunks. + if len(missingChunks) > 0 { + missingChunks = mergeMissingChunks(missingChunks, maxNumberMissingChunks) + if err := retrieveMissingFiles(d, dest, dirfd, missingChunks, missingDirsMode, options); err != nil { + return output, err + } + } + + for _, m := range hardLinks { + if err := safeLink(m.dest, m.dirfd, m.mode, m.metadata, options); err != nil { + return output, err + } + } + + if totalChunksSize > 0 { + logrus.Debugf("Missing %d bytes out of %d (%.2f %%)", missingChunksSize, totalChunksSize, float32(missingChunksSize*100.0)/float32(totalChunksSize)) + } + return output, nil +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/.travis.yml b/vendor/github.com/cyphar/filepath-securejoin/.travis.yml new file mode 100644 index 0000000000..3938f38349 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/.travis.yml @@ -0,0 +1,19 @@ +# Copyright (C) 2017 SUSE LLC. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +language: go +go: + - 1.7.x + - 1.8.x + - tip + +os: + - linux + - osx + +script: + - go test -cover -v ./... + +notifications: + email: false diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE b/vendor/github.com/cyphar/filepath-securejoin/LICENSE new file mode 100644 index 0000000000..bec842f294 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE @@ -0,0 +1,28 @@ +Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +Copyright (C) 2017 SUSE LLC. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/cyphar/filepath-securejoin/README.md b/vendor/github.com/cyphar/filepath-securejoin/README.md new file mode 100644 index 0000000000..49b2baa9f3 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/README.md @@ -0,0 +1,65 @@ +## `filepath-securejoin` ## + +[![Build Status](https://travis-ci.org/cyphar/filepath-securejoin.svg?branch=master)](https://travis-ci.org/cyphar/filepath-securejoin) + +An implementation of `SecureJoin`, a [candidate for inclusion in the Go +standard library][go#20126]. The purpose of this function is to be a "secure" +alternative to `filepath.Join`, and in particular it provides certain +guarantees that are not provided by `filepath.Join`. + +This is the function prototype: + +```go +func SecureJoin(root, unsafePath string) (string, error) +``` + +This library **guarantees** the following: + +* If no error is set, the resulting string **must** be a child path of + `SecureJoin` and will not contain any symlink path components (they will all + be expanded). + +* When expanding symlinks, all symlink path components **must** be resolved + relative to the provided root. In particular, this can be considered a + userspace implementation of how `chroot(2)` operates on file paths. Note that + these symlinks will **not** be expanded lexically (`filepath.Clean` is not + called on the input before processing). + +* Non-existant path components are unaffected by `SecureJoin` (similar to + `filepath.EvalSymlinks`'s semantics). + +* The returned path will always be `filepath.Clean`ed and thus not contain any + `..` components. + +A (trivial) implementation of this function on GNU/Linux systems could be done +with the following (note that this requires root privileges and is far more +opaque than the implementation in this library, and also requires that +`readlink` is inside the `root` path): + +```go +package securejoin + +import ( + "os/exec" + "path/filepath" +) + +func SecureJoin(root, unsafePath string) (string, error) { + unsafePath = string(filepath.Separator) + unsafePath + cmd := exec.Command("chroot", root, + "readlink", "--canonicalize-missing", "--no-newline", unsafePath) + output, err := cmd.CombinedOutput() + if err != nil { + return "", err + } + expanded := string(output) + return filepath.Join(root, expanded), nil +} +``` + +[go#20126]: https://github.com/golang/go/issues/20126 + +### License ### + +The license of this project is the same as Go, which is a BSD 3-clause license +available in the `LICENSE` file. diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION new file mode 100644 index 0000000000..ee1372d33a --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION @@ -0,0 +1 @@ +0.2.2 diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go new file mode 100644 index 0000000000..c4ca3d7130 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/join.go @@ -0,0 +1,134 @@ +// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +// Copyright (C) 2017 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package securejoin is an implementation of the hopefully-soon-to-be-included +// SecureJoin helper that is meant to be part of the "path/filepath" package. +// The purpose of this project is to provide a PoC implementation to make the +// SecureJoin proposal (https://github.com/golang/go/issues/20126) more +// tangible. +package securejoin + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "syscall" + + "github.com/pkg/errors" +) + +// ErrSymlinkLoop is returned by SecureJoinVFS when too many symlinks have been +// evaluated in attempting to securely join the two given paths. +var ErrSymlinkLoop = errors.Wrap(syscall.ELOOP, "secure join") + +// IsNotExist tells you if err is an error that implies that either the path +// accessed does not exist (or path components don't exist). This is +// effectively a more broad version of os.IsNotExist. +func IsNotExist(err error) bool { + // If it's a bone-fide ENOENT just bail. + if os.IsNotExist(errors.Cause(err)) { + return true + } + + // Check that it's not actually an ENOTDIR, which in some cases is a more + // convoluted case of ENOENT (usually involving weird paths). + var errno error + switch err := errors.Cause(err).(type) { + case *os.PathError: + errno = err.Err + case *os.LinkError: + errno = err.Err + case *os.SyscallError: + errno = err.Err + } + return errno == syscall.ENOTDIR || errno == syscall.ENOENT +} + +// SecureJoinVFS joins the two given path components (similar to Join) except +// that the returned path is guaranteed to be scoped inside the provided root +// path (when evaluated). Any symbolic links in the path are evaluated with the +// given root treated as the root of the filesystem, similar to a chroot. The +// filesystem state is evaluated through the given VFS interface (if nil, the +// standard os.* family of functions are used). +// +// Note that the guarantees provided by this function only apply if the path +// components in the returned string are not modified (in other words are not +// replaced with symlinks on the filesystem) after this function has returned. +// Such a symlink race is necessarily out-of-scope of SecureJoin. +func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { + // Use the os.* VFS implementation if none was specified. + if vfs == nil { + vfs = osVFS{} + } + + var path bytes.Buffer + n := 0 + for unsafePath != "" { + if n > 255 { + return "", ErrSymlinkLoop + } + + // Next path component, p. + i := strings.IndexRune(unsafePath, filepath.Separator) + var p string + if i == -1 { + p, unsafePath = unsafePath, "" + } else { + p, unsafePath = unsafePath[:i], unsafePath[i+1:] + } + + // Create a cleaned path, using the lexical semantics of /../a, to + // create a "scoped" path component which can safely be joined to fullP + // for evaluation. At this point, path.String() doesn't contain any + // symlink components. + cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p) + if cleanP == string(filepath.Separator) { + path.Reset() + continue + } + fullP := filepath.Clean(root + cleanP) + + // Figure out whether the path is a symlink. + fi, err := vfs.Lstat(fullP) + if err != nil && !IsNotExist(err) { + return "", err + } + // Treat non-existent path components the same as non-symlinks (we + // can't do any better here). + if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 { + path.WriteString(p) + path.WriteRune(filepath.Separator) + continue + } + + // Only increment when we actually dereference a link. + n++ + + // It's a symlink, expand it by prepending it to the yet-unparsed path. + dest, err := vfs.Readlink(fullP) + if err != nil { + return "", err + } + // Absolute symlinks reset any work we've already done. + if filepath.IsAbs(dest) { + path.Reset() + } + unsafePath = dest + string(filepath.Separator) + unsafePath + } + + // We have to clean path.String() here because it may contain '..' + // components that are entirely lexical, but would be misleading otherwise. + // And finally do a final clean to ensure that root is also lexically + // clean. + fullP := filepath.Clean(string(filepath.Separator) + path.String()) + return filepath.Clean(root + fullP), nil +} + +// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library +// of functions as the VFS. If in doubt, use this function over SecureJoinVFS. +func SecureJoin(root, unsafePath string) (string, error) { + return SecureJoinVFS(root, unsafePath, nil) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/vendor.conf b/vendor/github.com/cyphar/filepath-securejoin/vendor.conf new file mode 100644 index 0000000000..66bb574b95 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/vendor.conf @@ -0,0 +1 @@ +github.com/pkg/errors v0.8.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go new file mode 100644 index 0000000000..a82a5eae11 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go @@ -0,0 +1,41 @@ +// Copyright (C) 2017 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package securejoin + +import "os" + +// In future this should be moved into a separate package, because now there +// are several projects (umoci and go-mtree) that are using this sort of +// interface. + +// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is +// equivalent to using the standard os.* family of functions. This is mainly +// used for the purposes of mock testing, but also can be used to otherwise use +// SecureJoin with VFS-like system. +type VFS interface { + // Lstat returns a FileInfo describing the named file. If the file is a + // symbolic link, the returned FileInfo describes the symbolic link. Lstat + // makes no attempt to follow the link. These semantics are identical to + // os.Lstat. + Lstat(name string) (os.FileInfo, error) + + // Readlink returns the destination of the named symbolic link. These + // semantics are identical to os.Readlink. + Readlink(name string) (string, error) +} + +// osVFS is the "nil" VFS, in that it just passes everything through to the os +// module. +type osVFS struct{} + +// Lstat returns a FileInfo describing the named file. If the file is a +// symbolic link, the returned FileInfo describes the symbolic link. Lstat +// makes no attempt to follow the link. These semantics are identical to +// os.Lstat. +func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } + +// Readlink returns the destination of the named symbolic link. These +// semantics are identical to os.Readlink. +func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) } diff --git a/vendor/modules.txt b/vendor/modules.txt index 46d8f63a33..ba25d49176 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -29,6 +29,9 @@ github.com/Microsoft/hcsshim/internal/wclayer github.com/Microsoft/hcsshim/osversion # github.com/containerd/cgroups v0.0.0-20190919134610-bf292b21730f github.com/containerd/cgroups/stats/v1 +# github.com/cyphar/filepath-securejoin v0.2.2 +## explicit +github.com/cyphar/filepath-securejoin # github.com/davecgh/go-spew v1.1.1 github.com/davecgh/go-spew/spew # github.com/docker/go-units v0.4.0