Skip to content

Commit

Permalink
feat!: Add hashAlg file extension to files uploaded to CAS (#167)
Browse files Browse the repository at this point in the history
Signed-off-by: Caden Marofke <marofke@amazon.com>
  • Loading branch information
marofke committed Feb 29, 2024
1 parent 41b5964 commit 398da18
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 34 deletions.
6 changes: 2 additions & 4 deletions src/deadline/job_attachments/asset_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,9 @@ def _upload_output_manifest_to_s3(
manifest_name_prefix = hash_data(
f"{file_system_location_name or ''}{root_path}".encode(), hash_alg
)
# TODO: Remove hash algorithm file extension after sufficient time after the next release
manifest_path = _join_s3_paths(
full_output_prefix,
f"{manifest_name_prefix}_output.{output_manifest.hashAlg.value}",
f"{manifest_name_prefix}_output",
)
metadata = {"Metadata": {"asset-root": root_path}}
if file_system_location_name:
Expand Down Expand Up @@ -225,8 +224,7 @@ def _get_output_files(
):
file_size = file_path.lstat().st_size
file_hash = hash_file(str(file_path), self.hash_alg)
# TODO: replace with uncommented line below after sufficient time after the next release
s3_key = file_hash # f"{file_hash}.{self.hash_alg.value}"
s3_key = f"{file_hash}.{self.hash_alg.value}"

if s3_settings.full_cas_prefix():
s3_key = _join_s3_paths(s3_settings.full_cas_prefix(), s3_key)
Expand Down
3 changes: 1 addition & 2 deletions src/deadline/job_attachments/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,7 @@ def upload_object_to_cas(
Returns a tuple (whether it has been uploaded, the file size).
"""
local_path = source_root.joinpath(file.path)
# TODO: replace with uncommented line below after sufficient time after the next release
s3_upload_key = f"{file.hash}" # .{hash_algorithm.value}"
s3_upload_key = f"{file.hash}.{hash_algorithm.value}"
if s3_cas_prefix:
s3_upload_key = _join_s3_paths(s3_cas_prefix, s3_upload_key)
is_uploaded = False
Expand Down
12 changes: 6 additions & 6 deletions test/integ/deadline_job_attachments/test_job_attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def upload_input_files_assets_not_in_cas(job_attachment_test: JobAttachmentTest)

# THEN
scene_ma_s3_path = (
f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}"
f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}.xxh128"
)

object_summary_iterator = job_attachment_test.bucket.objects.filter(
Expand Down Expand Up @@ -211,7 +211,7 @@ def upload_input_files_one_asset_in_cas(
]

scene_ma_s3_path = (
f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}"
f"{job_attachment_settings.full_cas_prefix()}/{job_attachment_test.SCENE_MA_HASH}.xxh128"
)

# This file has already been uploaded
Expand Down Expand Up @@ -245,8 +245,8 @@ def upload_input_files_one_asset_in_cas(
brick_png_hash = hash_file(str(job_attachment_test.BRICK_PNG_PATH), HashAlgorithm.XXH128)
cloth_png_hash = hash_file(str(job_attachment_test.CLOTH_PNG_PATH), HashAlgorithm.XXH128)

brick_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{brick_png_hash}"
cloth_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{cloth_png_hash}"
brick_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{brick_png_hash}.xxh128"
cloth_png_s3_path = f"{job_attachment_settings.full_cas_prefix()}/{cloth_png_hash}.xxh128"

object_summary_iterator = job_attachment_test.bucket.objects.filter(
Prefix=f"{job_attachment_settings.full_cas_prefix()}/",
Expand Down Expand Up @@ -831,11 +831,11 @@ def sync_outputs(
object_key_set = set(obj.key for obj in object_summary_iterator)

assert (
f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_to_be_synced_step0_task0), HashAlgorithm.XXH128)}"
f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_to_be_synced_step0_task0), HashAlgorithm.XXH128)}.xxh128"
in object_key_set
)
assert (
f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_not_to_be_synced), HashAlgorithm.XXH128)}"
f"{job_attachment_settings.full_cas_prefix()}/{hash_file(str(file_not_to_be_synced), HashAlgorithm.XXH128)}.xxh128"
not in object_key_set
)

Expand Down
14 changes: 7 additions & 7 deletions test/unit/deadline_job_attachments/test_asset_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,11 +528,11 @@ def test_sync_outputs(
s3 = boto3.Session(region_name="us-west-2").resource("s3") # pylint: disable=invalid-name
bucket = s3.Bucket(s3_settings.s3BucketName)
bucket.put_object(
Key=f"{expected_cas_prefix}hash1",
Key=f"{expected_cas_prefix}hash1.xxh128",
Body="a",
)
expected_metadata = s3.meta.client.head_object(
Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1"
Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1.xxh128"
)

# WHEN
Expand Down Expand Up @@ -585,21 +585,21 @@ def test_sync_outputs(

# THEN
actual_metadata = s3.meta.client.head_object(
Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1"
Bucket=s3_settings.s3BucketName, Key=f"{expected_cas_prefix}hash1.xxh128"
)
assert actual_metadata["LastModified"] == expected_metadata["LastModified"]
assert_expected_files_on_s3(
bucket,
expected_files={
f"{expected_cas_prefix}hash1",
f"{expected_cas_prefix}hash2",
f"{expected_output_prefix}hash3_output.xxh128",
f"{expected_cas_prefix}hash1.xxh128",
f"{expected_cas_prefix}hash2.xxh128",
f"{expected_output_prefix}hash3_output",
},
)

assert_canonical_manifest(
bucket,
f"{expected_output_prefix}hash3_output.xxh128",
f"{expected_output_prefix}hash3_output",
expected_manifest='{"hashAlg":"xxh128","manifestVersion":"2023-03-03",'
f'"paths":[{{"hash":"hash2","mtime":{expected_sub_file_mtime},"path":"{expected_sub_file_rel_path}",'
f'"size":{expected_processed_bytes}}},'
Expand Down
30 changes: 15 additions & 15 deletions test/unit/deadline_job_attachments/test_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,10 @@ def test_asset_management(
bucket,
expected_files={
f"assetRoot/Manifests/{farm_id}/{queue_id}/Inputs/0000/e_input",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/a",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/b",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/c",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/d",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/a.xxh128",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/b.xxh128",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/c.xxh128",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/d.xxh128",
},
)

Expand Down Expand Up @@ -438,7 +438,7 @@ def test_asset_management_windows_multi_root(
bucket,
expected_files={
f"{self.job_attachment_s3_settings.rootPrefix}/Manifests/{farm_id}/{queue_id}/Inputs/0000/b_input",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/a",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/a.xxh128",
},
)

Expand Down Expand Up @@ -600,7 +600,7 @@ def test_asset_management_many_inputs(

expected_files = set(
[
f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}"
f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}.xxh128"
for i in range(num_input_files)
]
)
Expand Down Expand Up @@ -792,7 +792,7 @@ def mock_hash_file(file_path: str, hash_alg: HashAlgorithm):

# mock pre-uploading the file
bucket.put_object(
Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash",
Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash.xxh128",
Body="a",
)

Expand Down Expand Up @@ -856,8 +856,8 @@ def mock_hash_file(file_path: str, hash_alg: HashAlgorithm):
bucket,
expected_files={
f"{self.job_attachment_s3_settings.rootPrefix}/Manifests/{farm_id}/{queue_id}/Inputs/0000/manifest_input",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/somethingnew",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/existinghash.xxh128",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/somethingnew.xxh128",
},
)

Expand Down Expand Up @@ -933,7 +933,7 @@ def test_asset_management_no_outputs_large_number_of_inputs_already_uploaded(
input_files.append(test_file)
# mock pre-uploading the file
bucket.put_object(
Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}",
Key=f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}.xxh128",
Body=f"test {i}",
)

Expand Down Expand Up @@ -997,7 +997,7 @@ def test_asset_management_no_outputs_large_number_of_inputs_already_uploaded(

expected_files = set(
[
f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}"
f"{self.job_attachment_s3_settings.full_cas_prefix()}/{i}.xxh128"
for i in range(num_input_files)
]
)
Expand Down Expand Up @@ -1770,7 +1770,7 @@ def test_manage_assets_with_symlinks(
bucket,
expected_files={
f"assetRoot/Manifests/{farm_id}/{queue_id}/Inputs/0000/manifest_input",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/a",
f"{self.job_attachment_s3_settings.full_cas_prefix()}/a.xxh128",
},
)

Expand Down Expand Up @@ -2274,7 +2274,7 @@ def test_upload_object_to_cas_skips_upload_with_cache(
job_attachment_settings=self.job_attachment_s3_settings,
asset_manifest_version=manifest_version,
)
s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash"
s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash.xxh128"
test_entry = S3CheckCacheEntry(s3_key, "123.45")
s3_cache = MagicMock()
s3_cache.get_entry.return_value = test_entry
Expand Down Expand Up @@ -2327,7 +2327,7 @@ def test_upload_object_to_cas_adds_cache_entry(
job_attachment_settings=self.job_attachment_s3_settings,
asset_manifest_version=manifest_version,
)
s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash"
s3_key = f"{default_job_attachment_s3_settings.s3BucketName}/prefix/test-hash.xxh128"
s3_cache = MagicMock()
s3_cache.get_entry.return_value = None
expected_new_entry = S3CheckCacheEntry(s3_key, "345.67")
Expand Down Expand Up @@ -2359,7 +2359,7 @@ def test_upload_object_to_cas_adds_cache_entry(

assert_expected_files_on_s3(
bucket,
expected_files={"prefix/test-hash"},
expected_files={"prefix/test-hash.xxh128"},
)


Expand Down

0 comments on commit 398da18

Please sign in to comment.