Skip to content

Commit

Permalink
Fix recursive symlinks when uploading to sky storage (#882)
Browse files Browse the repository at this point in the history
* Fix symlinks

* Add tests

* fix symlink

* address comments

* lint

* fix
  • Loading branch information
romilbhardwaj authored and suquark committed Jun 7, 2022
1 parent 36647ea commit ade89e7
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 7 deletions.
6 changes: 3 additions & 3 deletions docs/source/reference/storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ and storage mounting:

.. note::
Symbolic links are handled differently in :code:`file_mounts` depending on whether Sky Storage is used.
For mounts backed by Sky Storage, referenced data for all symbolic links is copied to remote.
For mounts not using Sky Storage (e.g., those using rsync) the symbolic links are directly copied.
Their targets must be separately mounted or else the symlinks may break.
For mounts backed by Sky Storage, symbolic links are not copied to remote.
For mounts not using Sky Storage (e.g., those using rsync) the symbolic links are directly copied, not their target data.
The targets must be separately mounted or else the symlinks may break.

Creating a shared file system
-----------------------------
Expand Down
5 changes: 5 additions & 0 deletions examples/using_file_mounts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,8 @@ run: |
# sky.egg-info/ should not exists on remote due to the .gitingore
cd ~/sky_workdir
! ls sky.egg-info/
# Symlinks should be copied, but they will be broken
# Assumes symlink named circle-link in /tmp/workdir (created by test_smoke.py)
ls /tmp/workdir/circle-link
! readlink -f /tmp/workdir/circle-link
7 changes: 4 additions & 3 deletions sky/cloud_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,17 @@ def make_sync_dir_command(self, source: str, destination: str) -> str:
# To increase parallelism, modify max_concurrent_requests in your
# aws config file (Default path: ~/.aws/config).
download_via_awscli = f'mkdir -p {destination} && \
aws s3 sync {source} {destination}'
aws s3 sync --no-follow-symlinks ' \
f'{source} {destination}'

all_commands = list(self._GET_AWSCLI)
all_commands.append(download_via_awscli)
return ' && '.join(all_commands)

def make_sync_file_command(self, source: str, destination: str) -> str:
"""Downloads a file using AWS CLI."""
download_via_awscli = f'mkdir -p {destination} && \
aws s3 cp {source} {destination}'
download_via_awscli = (f'mkdir -p {destination} &&'
f'aws s3 cp {source} {destination}')

all_commands = list(self._GET_AWSCLI)
all_commands.append(download_via_awscli)
Expand Down
3 changes: 2 additions & 1 deletion sky/data/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,8 @@ def sync_local_dir(self) -> None:
file (Default path: ~/.aws/config).
"""
source = os.path.abspath(os.path.expanduser(self.source))
sync_command = f'aws s3 sync {source} s3://{self.name}/'
sync_command = ('aws s3 sync --no-follow-symlinks '
f'{source} s3://{self.name}/')
with backend_utils.safe_console_status(
f'[bold cyan]Syncing '
f'[green]{self.source} to s3://{self.name}/'):
Expand Down
11 changes: 11 additions & 0 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def test_file_mounts():
'touch ~/tmpfile',
'mkdir -p ~/tmp-workdir',
'touch ~/tmp-workdir/foo',
'ln -f -s ~/tmp-workdir/ ~/tmp-workdir/circle-link',
f'sky launch -y -c {name} examples/using_file_mounts.yaml',
f'sky logs {name} 1 --status', # Ensure the job succeeded.
],
Expand Down Expand Up @@ -543,6 +544,8 @@ def tmp_mount(self, tmp_path):
tmp_dir.mkdir()
tmp_file = tmp_dir / 'tmp-file'
tmp_file.write_text('test')
circle_link = tmp_dir / 'circle-link'
circle_link.symlink_to(tmp_dir, target_is_directory=True)
yield str(tmp_dir)

@pytest.fixture
Expand Down Expand Up @@ -625,6 +628,14 @@ def test_upload_to_existing_bucket(self, tmp_awscli_bucket, tmp_mount):
'File not found in bucket - output was : {}'.format(out.decode
('utf-8'))

# Check symlinks - symlinks don't get copied by sky storage
assert (pathlib.Path(tmp_mount) / 'circle-link').is_symlink(), (
'circle-link was not found in the upload source - '
'are the test fixtures correct?')
assert 'circle-link' not in out.decode('utf-8'), (
'Symlink found in bucket - ls output was : {}'.format(
out.decode('utf-8')))

# Run sky storage ls to check if storage object exists in the output.
# It should not exist because the bucket was created externally.
out = subprocess.check_output(['sky', 'storage', 'ls'])
Expand Down

0 comments on commit ade89e7

Please sign in to comment.