Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve POSIX metadata #24

Merged
merged 8 commits into from
Oct 23, 2024
47 changes: 28 additions & 19 deletions cmd/s3tar/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ import (
"encoding/csv"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"strconv"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/aws/middleware"
"github.com/aws/aws-sdk-go-v2/aws/retry"
Expand All @@ -16,10 +21,6 @@ import (
"github.com/aws/aws-sdk-go-v2/service/s3/types"
s3tar "github.com/awslabs/amazon-s3-tar-tool"
"github.com/urfave/cli/v2"
"log"
"os"
"path/filepath"
"strconv"
)

var (
Expand Down Expand Up @@ -69,6 +70,7 @@ func run(args []string) error {
var tagSetInput string
var kmsKeyID string
var sseAlgo string
var preservePosixMetadata bool

var tagSet types.Tagging
var err error
Expand Down Expand Up @@ -250,6 +252,11 @@ func run(args []string) error {
Usage: "aws:kms or AES256",
Destination: &sseAlgo,
},
&cli.BoolFlag{
Name: "preserve-posix-metadata",
Usage: "Preserve POSIX permisions, uid and gid if present in S3 object metadata. See https://docs.aws.amazon.com/fsx/latest/LustreGuide/posix-metadata-support.html",
Destination: &preservePosixMetadata,
},
},
Action: func(cCtx *cli.Context) error {
logLevel := parseLogLevel(cCtx.Count("verbose"))
Expand Down Expand Up @@ -307,16 +314,17 @@ func run(args []string) error {
}

s3opts := &s3tar.S3TarS3Options{
SrcManifest: manifestPath,
SkipManifestHeader: skipManifestHeader,
Threads: threads,
DeleteSource: false,
Region: region,
EndpointUrl: endpointUrl,
ConcatInMemory: concatInMemory,
UrlDecode: urlDecode,
UserMaxPartSize: userPartMaxSize,
ObjectTags: tagSet,
SrcManifest: manifestPath,
SkipManifestHeader: skipManifestHeader,
Threads: threads,
DeleteSource: false,
Region: region,
EndpointUrl: endpointUrl,
ConcatInMemory: concatInMemory,
UrlDecode: urlDecode,
UserMaxPartSize: userPartMaxSize,
ObjectTags: tagSet,
PreservePOSIXMetadata: preservePosixMetadata,
}
s3opts.DstBucket, s3opts.DstKey = s3tar.ExtractBucketAndPath(archiveFile)
s3opts.DstPrefix = filepath.Dir(s3opts.DstKey)
Expand Down Expand Up @@ -380,11 +388,12 @@ func run(args []string) error {
fmt.Printf("appending '/' to destination path\n")
}
s3opts := &s3tar.S3TarS3Options{
Threads: threads,
DeleteSource: false,
Region: region,
EndpointUrl: endpointUrl,
ExternalToc: externalToc,
Threads: threads,
DeleteSource: false,
Region: region,
EndpointUrl: endpointUrl,
ExternalToc: externalToc,
PreservePOSIXMetadata: preservePosixMetadata,
}
s3opts.SrcBucket, s3opts.SrcKey = s3tar.ExtractBucketAndPath(archiveFile)
s3opts.SrcPrefix = filepath.Dir(s3opts.SrcKey)
Expand Down
75 changes: 70 additions & 5 deletions extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ import (
"encoding/csv"
"errors"
"fmt"
"github.com/aws/aws-sdk-go-v2/service/s3"
"golang.org/x/sync/errgroup"
"io"
"path/filepath"
"strconv"
"strings"

"github.com/aws/aws-sdk-go-v2/service/s3"
"golang.org/x/sync/errgroup"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
)
Expand Down Expand Up @@ -87,11 +89,42 @@ func List(ctx context.Context, svc *s3.Client, bucket, key string, opts *S3TarS3
}

func extractRange(ctx context.Context, svc *s3.Client, bucket, key, dstBucket, dstKey string, start, size int64, opts *S3TarS3Options) error {
var Metadata map[string]string
if opts.PreservePOSIXMetadata {
hdr, headerSize, err := extractTarHeaderEnding(ctx, svc, bucket, key, start)
if err != nil {
Warnf(ctx, "unable to extract tar header for %s, cannot set permissions", dstKey)
hdr = nil
}
if hdr != nil {
var mtime string = strconv.FormatInt(hdr.ModTime.UnixMilli(), 10)
var hasATime = hdr.Format == tar.FormatGNU || hdr.Format == tar.FormatPAX
var atime string
if hasATime {
atime = strconv.FormatInt(hdr.AccessTime.UnixMilli(), 10)
} else {
atime = mtime
}
Metadata = map[string]string{
"file-permissions": fmt.Sprintf("%#o", hdr.Mode),
"file-owner": strconv.Itoa(hdr.Uid),
"file-group": strconv.Itoa(hdr.Gid),
"file-atime": atime,
"file-mtime": mtime,
}
Debugf(ctx, "got posix metadata permissions: %s uid: %s gid: %s name: %s from header size %d, ending %d, format %s",
Metadata["file-permissions"], Metadata["file-owner"], Metadata["file-group"], hdr.Name,
headerSize, start, hdr.Format,
)
}

}

output, err := svc.CreateMultipartUpload(ctx, &s3.CreateMultipartUploadInput{
Bucket: aws.String(dstBucket),
Key: aws.String(dstKey),
ACL: types.ObjectCannedACLBucketOwnerFullControl,
Bucket: aws.String(dstBucket),
Key: aws.String(dstKey),
ACL: types.ObjectCannedACLBucketOwnerFullControl,
Metadata: Metadata,
})
if err != nil {
return err
Expand Down Expand Up @@ -207,6 +240,38 @@ retry:
return hdr, headerSize, err
}

func extractTarHeaderEnding(ctx context.Context, svc *s3.Client, bucket, key string, end int64) (*tar.Header, int64, error) {

headerSize := paxTarHeaderSize
ctr := 0

// the last 512 bytes of a PAX (POSIX.1-2001) header resemble a USTAR (POSIX.1-1988) header
// so we should check for the longer header first.
if end < paxTarHeaderSize {
// however, if there is not enough room for a PAX header, only look for GNU header
headerSize = gnuTarHeaderSize
ctr = 1
}

retry:
if ctr >= 2 {
Fatalf(ctx, "unable to parse header ending %d from TAR", end)
}
ctr += 1

output, err := getObjectRange(ctx, svc, bucket, key, end-headerSize, end-1)
if err != nil {
return nil, 0, err
}
tr := tar.NewReader(output)
hdr, err := tr.Next()
if err != nil {
headerSize = gnuTarHeaderSize
goto retry
}
return hdr, headerSize, err
}

func extractCSVToc(ctx context.Context, svc *s3.Client, bucket, key, externalToc string) (TOC, error) {
var m TOC

Expand Down
103 changes: 101 additions & 2 deletions headers.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,48 @@ import (
"log"
"path/filepath"
"sort"
"strconv"
"strings"
"sync/atomic"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
)

func buildHeader(o, prev *S3Obj, addZeros bool) S3Obj {
// buildHeader builds a tar header for the given S3 object.
//
// Parameters:
// - o: The S3 object for which the tar header needs to be built.
// - prev: The previous S3 object.
// - addZeros: A flag indicating whether to add zeros.
// - head: The head object containing S3 metadata, used to set file permissions, owner, and group.
//
// Returns:
// - S3Obj: The S3 object with the built tar header.
//
// Example:
//
// o := &S3Obj{
// Key: aws.String("example.txt"),
// Size: aws.Int64(1024),
// LastModified: aws.Time(time.Now()),
// }
// prev := &S3Obj{
// Size: aws.Int64(512),
// }
// addZeros := true
// head := &s3.HeadObjectOutput{
// Metadata: map[string]*string{
// "file-permissions": aws.String("0644"),
// "file-owner": aws.String("1000"),
// "file-group": aws.String("1000"),
// },
// }
// result := buildHeader(o, prev, addZeros, head)
// fmt.Println(result)
func buildHeader(o, prev *S3Obj, addZeros bool, head *s3.HeadObjectOutput) S3Obj {

name := *o.Key
var buff bytes.Buffer
Expand All @@ -33,6 +67,8 @@ func buildHeader(o, prev *S3Obj, addZeros bool) S3Obj {
AccessTime: time.Now(),
Format: tarFormat,
}
setHeaderPermissions(hdr, head)

if addZeros {
buff.Write(pad)
}
Expand Down Expand Up @@ -62,6 +98,65 @@ func buildHeader(o, prev *S3Obj, addZeros bool) S3Obj {
}
}

// setHeaderPermissions sets the permissions, owner, and group of a tar.Header based on the metadata provided in the s3.HeadObjectOutput.
// If the "file-permissions" metadata is present, it is parsed as an octal string and set as the Mode of the tar.Header.
// If the "file-owner" metadata is present, it is parsed as an integer and set as the Uid of the tar.Header.
// If the "file-group" metadata is present, it is parsed as an integer and set as the Gid of the tar.Header.
// The hdr parameter is a pointer to the tar.Header that will be modified.
// The head parameter is a pointer to the s3.HeadObjectOutput that contains the metadata.
// If head is nil or if the metadata is empty, no modifications will be made to the tar.Header.
func setHeaderPermissions(hdr *tar.Header, head *s3.HeadObjectOutput) {
if head != nil && len(head.Metadata) > 0 {
if modeStr, ok := head.Metadata["file-permissions"]; ok {
modeInt, err := strconv.ParseInt(modeStr, 8, 64)
if err != nil {
log.Fatal(err)
}
hdr.Mode = modeInt
}
if ownerStr, ok := head.Metadata["file-owner"]; ok {
ownerInt, err := strconv.ParseInt(ownerStr, 10, 32)
if err != nil {
log.Fatal(err)
}
hdr.Uid = int(ownerInt)
}
if groupStr, ok := head.Metadata["file-group"]; ok {
groupInt, err := strconv.ParseInt(groupStr, 10, 32)
if err != nil {
log.Fatal(err)
}
hdr.Gid = int(groupInt)
}
if atimeStr, ok := head.Metadata["file-atime"]; ok {
hdr.AccessTime = s3metadataToTime(atimeStr)
}
if mtimeStr, ok := head.Metadata["file-mtime"]; ok {
var timeVal = s3metadataToTime(mtimeStr)
hdr.ModTime = timeVal
hdr.ChangeTime = timeVal
}
}
}

func s3metadataToTime(timeStr string) time.Time {
var timeValue time.Time
if strings.HasSuffix(timeStr, "ns") {
timeInt, err := strconv.ParseInt(strings.TrimSuffix(timeStr, "ns"), 10, 64)
if err != nil {
log.Fatal(err)
}
timeValue = time.Unix(0, timeInt)
} else {
atimeInt, err := strconv.ParseInt(timeStr, 10, 64)
if err != nil {
log.Fatal(err)
}
timeValue = time.Unix(0, atimeInt*int64(time.Millisecond))
}
return timeValue
}

func buildHeaders(objectList []*S3Obj, frontPad bool) []*S3Obj {
headers := []*S3Obj{}
for i := 0; i < len(objectList); i++ {
Expand All @@ -77,7 +172,11 @@ func buildHeaders(objectList []*S3Obj, frontPad bool) []*S3Obj {
if !frontPad {
addZero = false
}
newObject := buildHeader(o, prev, addZero)
/* buildHeaders is only called from processHeaders which builds the manifest using createCSVTOC.
* inspection of createCSVTOC shows that file permissions, uid and gid are not used in the manifest
* therefore we do not need to pass in the head object output
*/
newObject := buildHeader(o, prev, addZero, nil)
newObject.PartNum = i
newObject.Key = aws.String(filename + ".hdr")
headers = append(headers, &newObject)
Expand Down
6 changes: 4 additions & 2 deletions manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ import (
"context"
"encoding/csv"
"fmt"
"github.com/aws/aws-sdk-go-v2/service/s3"
"io"
"log"
"os"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/service/s3"

"github.com/aws/aws-sdk-go-v2/aws"
)

Expand All @@ -31,7 +32,8 @@ func buildToc(ctx context.Context, objectList []*S3Obj) (*S3Obj, *S3Obj, error)
tocObj := NewS3Obj()
tocObj.Key = aws.String("toc.csv")
tocObj.AddData(toc.Bytes())
tocHeader := buildHeader(tocObj, nil, false)
// passing nil as we don't need to set permissions/owner/group for toc.csv
tocHeader := buildHeader(tocObj, nil, false, nil)
tocHeader.Bucket = objectList[0].Bucket
tocObj.Bucket = objectList[0].Bucket

Expand Down
Loading