Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change default copy behaviour to preserve directory structure #133

Merged
merged 12 commits into from
Mar 30, 2020
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ This is a major release with many breaking changes.
- `-ds`, `-dw`, `-us` and `-uw` global flags are no longer available. Multipart
concurrency and part size flags are now part of the `cp/mv` command. New
replacement flags are `--concurrency | -c` and `--part-size | -p`. ([#110](https://github.com/peak/s5cmd/pull/110))

- Dropped `-parents` flag from copy command. Copy behaviour has changed to preserve the directory hierarchy as a default.
Optional `-flatten` flag is added to flatten directory structure.
#### Features

- Added `mb` command to make buckets. ([#25](https://github.com/peak/s5cmd/issues/25))
Expand Down
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ parallel. `s5cmd` will create the destination directory if it is missing.

`file1.gz file2.gz file3.gz`

ℹ️ `s5cmd` flattens the source directory structure by default. If you want to keep
the source directory structure, use the `--parents` flag.
ℹ️ `s5cmd` preserves the source directory structure by default. If you want to flatten
the source directory structure, use the `--flatten` flag.

s5cmd cp --parents 's3://bucket/logs/2020/03/*' logs/
s5cmd cp 's3://bucket/logs/2020/03/*' logs/

The above command will match the following objects:

Expand All @@ -111,7 +111,7 @@ logs/19/originals/file3.gz

#### Upload multiple files to S3

s5cmd cp --parents directory/ s3://bucket/
s5cmd cp directory/ s3://bucket/

Will upload all files at given directory to S3 while keeping the folder hiearchy
of the source.
Expand All @@ -138,7 +138,7 @@ they'll be deleted in a single request.

`s5cmd` supports copying objects on the server side as well.

s5cmd cp --parents 's3://bucket/logs/2020/*' s3://bucket/logs/backup/
s5cmd cp 's3://bucket/logs/2020/*' s3://bucket/logs/backup/

Will copy all the matching objects to the given S3 prefix, respecting the source
folder hiearchy.
Expand Down Expand Up @@ -169,7 +169,7 @@ or
`commands.txt` content could look like:

```
cp --parents s3://bucket/2020/03/* logs/2020/03/
cp s3://bucket/2020/03/* logs/2020/03/

# line comments are supported
rm s3://bucket/2020/03/19/file2.gz
Expand Down
85 changes: 26 additions & 59 deletions command/cp.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ var copyCommandFlags = []cli.Flag{
Usage: "only overwrite destination if source modtime is newer",
},
&cli.BoolFlag{
Name: "parents",
Usage: "create same directory structure of source, starting from the first wildcard",
Name: "flatten",
Aliases: []string{"f"},
Usage: "flatten directory structure of source, starting from the first wildcard",
},
&cli.StringFlag{
Name: "storage-class",
Expand Down Expand Up @@ -118,7 +119,7 @@ var CopyCommand = &cli.Command{
noClobber: c.Bool("no-clobber"),
ifSizeDiffer: c.Bool("if-size-differ"),
ifSourceNewer: c.Bool("if-source-newer"),
parents: c.Bool("parents"),
flatten: c.Bool("flatten"),
storageClass: storage.LookupClass(c.String("storage-class")),
concurrency: c.Int("concurrency"),
partSize: c.Int64("part-size") * megabytes,
Expand All @@ -140,7 +141,7 @@ type Copy struct {
noClobber bool
ifSizeDiffer bool
ifSourceNewer bool
parents bool
flatten bool
storageClass storage.StorageClass

// s3 options
Expand Down Expand Up @@ -183,6 +184,11 @@ func (c Copy) Run(ctx context.Context) error {
}()

isBatch := srcurl.HasGlob()
if !isBatch && !srcurl.IsRemote() {
obj, _ := client.Stat(ctx, srcurl)
isBatch = obj != nil && obj.Type.IsDir()
}

for object := range objch {
if object.Type.IsDir() || errorpkg.IsCancelation(object.Err) {
continue
Expand Down Expand Up @@ -223,7 +229,7 @@ func (c Copy) prepareCopyTask(
isBatch bool,
) func() error {
return func() error {
dsturl = prepareCopyDestination(srcurl, dsturl, c.parents, isBatch)
dsturl = prepareRemoteDestination(srcurl, dsturl, c.flatten, isBatch)
err := c.doCopy(ctx, srcurl, dsturl)
if err != nil {
return &errorpkg.Error{
Expand All @@ -244,7 +250,7 @@ func (c Copy) prepareDownloadTask(
isBatch bool,
) func() error {
return func() error {
dsturl, err := prepareDownloadDestination(ctx, srcurl, dsturl, c.parents, isBatch)
dsturl, err := prepareLocalDestination(ctx, srcurl, dsturl, c.flatten, isBatch)
if err != nil {
return err
}
Expand All @@ -269,7 +275,7 @@ func (c Copy) prepareUploadTask(
isBatch bool,
) func() error {
return func() error {
dsturl = prepareUploadDestination(srcurl, dsturl, c.parents, isBatch)
dsturl = prepareRemoteDestination(srcurl, dsturl, c.flatten, isBatch)
err := c.doUpload(ctx, srcurl, dsturl)
if err != nil {
return &errorpkg.Error{
Expand Down Expand Up @@ -494,48 +500,36 @@ func (c Copy) shouldOverride(ctx context.Context, srcurl *url.URL, dsturl *url.U
return stickyErr
}

// prepareCopyDestination will return a new destination URL for local->local
// and remote->remote copy operations.
func prepareCopyDestination(
// prepareRemoteDestination will return a new destination URL for
// remote->remote and local->remote copy operations.
func prepareRemoteDestination(
srcurl *url.URL,
dsturl *url.URL,
parents bool,
flatten bool,
isBatch bool,
) *url.URL {
objname := srcurl.Base()
if parents {
if isBatch && !flatten {
objname = srcurl.Relative()
}

// For remote->remote copy operations, treat <dst> as prefix if it has "/"
// suffix.
if dsturl.IsRemote() {
if dsturl.IsPrefix() || dsturl.IsBucket() {
dsturl = dsturl.Join(objname)
}
return dsturl
}

// Absolute <src> path is given. Use given <dst> and local copy operation
// will create missing directories if <dst> has one.
if !isBatch {
return dsturl
if dsturl.IsPrefix() || dsturl.IsBucket() {
dsturl = dsturl.Join(objname)
}

return dsturl.Join(objname)
return dsturl
}

// prepareDownloadDestination will return a new destination URL for
// remote->local and remote->remote copy operations.
func prepareDownloadDestination(
// remote->local copy operations.
func prepareLocalDestination(
ctx context.Context,
srcurl *url.URL,
dsturl *url.URL,
parents bool,
flatten bool,
isBatch bool,
) (*url.URL, error) {
objname := srcurl.Base()
if parents {
if isBatch && !flatten {
objname = srcurl.Relative()
}

Expand All @@ -555,7 +549,7 @@ func prepareDownloadDestination(
return nil, err
}

if parents {
if isBatch && !flatten {
dsturl = dsturl.Join(objname)
if err := os.MkdirAll(dsturl.Dir(), os.ModePerm); err != nil {
return nil, err
Expand All @@ -578,27 +572,6 @@ func prepareDownloadDestination(
return dsturl, nil
}

// prepareUploadDestination will return a new destination URL for local->remote
// operations.
func prepareUploadDestination(
srcurl *url.URL,
dsturl *url.URL,
parents bool,
isBatch bool,
) *url.URL {
// if given destination is a bucket/objname, don't do any join and respect
// the user's destination object name.
if !isBatch && !dsturl.IsBucket() && !dsturl.IsPrefix() {
return dsturl
}

objname := srcurl.Base()
if parents {
objname = srcurl.Relative()
}
return dsturl.Join(objname)
}

// getObject checks if the object from given url exists. If no object is
// found, error and returning object would be nil.
func getObject(ctx context.Context, url *url.URL) (*storage.Object, error) {
Expand Down Expand Up @@ -644,12 +617,6 @@ func Validate(c *cli.Context) error {
return fmt.Errorf("target %q can not contain glob characters", dst)
}

// --parents is used in conjunction with a wildcard source to deduce
// relative source paths.
if !srcurl.HasGlob() && c.Bool("parents") {
return fmt.Errorf("source argument must contain wildcard if --parents flag is provided")
}

// we don't operate on S3 prefixes for copy and delete operations.
if srcurl.IsBucket() || srcurl.IsPrefix() {
return fmt.Errorf("source argument must contain wildcard character")
Expand Down
2 changes: 1 addition & 1 deletion command/mv.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ var MoveCommand = &cli.Command{
noClobber: c.Bool("no-clobber"),
ifSizeDiffer: c.Bool("if-size-differ"),
ifSourceNewer: c.Bool("if-source-newer"),
parents: c.Bool("parents"),
flatten: c.Bool("flatten"),
storageClass: storage.LookupClass(c.String("storage-class")),
}

Expand Down
Loading