From 7729722c927ef0be8abac6881ecc26ec240d3bfe Mon Sep 17 00:00:00 2001 From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com> Date: Fri, 5 Apr 2024 14:05:38 +0000 Subject: [PATCH 1/3] temp patch --- optimum/exporters/neuron/convert.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py index 4438a7414..40c87bf62 100644 --- a/optimum/exporters/neuron/convert.py +++ b/optimum/exporters/neuron/convert.py @@ -342,6 +342,14 @@ def export_models( output_path.parent.mkdir(parents=True, exist_ok=True) try: + + # TODO: Remove after the weights/neff separation compilation of sdxl is patched by a neuron sdk release: https://github.com/aws-neuron/aws-neuron-sdk/issues/859 + if not inline_weights_to_neff and getattr(sub_neuron_config, "is_sdxl", False): + logger.warning( + "The compilation of SDXL's unet with the weights/neff separation is broken since the Neuron sdk 2.18 release. `inline_weights_to_neff` will be set to True and the caching will be disabled. If you still want to separate the neff and weights, please downgrade your Neuron setup to the 2.17.1 release." + ) + inline_weights_to_neff = True + start_time = time.time() neuron_inputs, neuron_outputs = export( model=submodel, From bd3af8b752fe0cd8aef63d6d1d185fd60099ad36 Mon Sep 17 00:00:00 2001 From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com> Date: Fri, 5 Apr 2024 14:36:46 +0000 Subject: [PATCH 2/3] disable sdxl caching test --- tests/cache/test_neuronx_cache.py | 57 ++++++++++++++++--------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py index 319efe3a5..8a90e8de8 100644 --- a/tests/cache/test_neuronx_cache.py +++ b/tests/cache/test_neuronx_cache.py @@ -272,34 +272,35 @@ def test_stable_diffusion_cache(cache_repos): assert len(get_local_cached_files(cache_path, ".neuron")) == 0 -@is_inferentia_test -@requires_neuronx -def test_stable_diffusion_xl_cache(cache_repos): - cache_path, cache_repo_id = cache_repos - model_id = "echarlaix/tiny-random-stable-diffusion-xl" - # Export the model a first time to populate the local cache - model = export_stable_diffusion_xl_model(model_id) - check_stable_diffusion_inference(model) - # check registry - check_traced_cache_entry(cache_path) - # Synchronize the hub cache with the local cache - synchronize_hub_cache(cache_repo_id=cache_repo_id) - assert_local_and_hub_cache_sync(cache_path, cache_repo_id) - # Verify we are able to fetch the cached entry for the model - model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id) - assert len(model_entries) == 1 - # Clear the local cache - for root, dirs, files in os.walk(cache_path): - for f in files: - os.unlink(os.path.join(root, f)) - for d in dirs: - shutil.rmtree(os.path.join(root, d)) - assert local_cache_size(cache_path) == 0 - # Export the model again: the compilation artifacts should be fetched from the Hub - model = export_stable_diffusion_xl_model(model_id) - check_stable_diffusion_inference(model) - # Verify the local cache directory has not been populated - assert len(get_local_cached_files(cache_path, ".neuron")) == 0 +# TODO: Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859 +# @is_inferentia_test +# @requires_neuronx +# def test_stable_diffusion_xl_cache(cache_repos): +# cache_path, cache_repo_id = cache_repos +# model_id = "echarlaix/tiny-random-stable-diffusion-xl" +# # Export the model a first time to populate the local cache +# model = export_stable_diffusion_xl_model(model_id) +# check_stable_diffusion_inference(model) +# # check registry +# check_traced_cache_entry(cache_path) +# # Synchronize the hub cache with the local cache +# synchronize_hub_cache(cache_repo_id=cache_repo_id) +# assert_local_and_hub_cache_sync(cache_path, cache_repo_id) +# # Verify we are able to fetch the cached entry for the model +# model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id) +# assert len(model_entries) == 1 +# # Clear the local cache +# for root, dirs, files in os.walk(cache_path): +# for f in files: +# os.unlink(os.path.join(root, f)) +# for d in dirs: +# shutil.rmtree(os.path.join(root, d)) +# assert local_cache_size(cache_path) == 0 +# # Export the model again: the compilation artifacts should be fetched from the Hub +# model = export_stable_diffusion_xl_model(model_id) +# check_stable_diffusion_inference(model) +# # Verify the local cache directory has not been populated +# assert len(get_local_cached_files(cache_path, ".neuron")) == 0 @is_inferentia_test From e4d8732dadffa08492bf6d779bd0c771999109ae Mon Sep 17 00:00:00 2001 From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com> Date: Fri, 5 Apr 2024 14:50:45 +0000 Subject: [PATCH 3/3] pytest skip --- tests/cache/test_neuronx_cache.py | 58 +++++++++++++++---------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py index 8a90e8de8..83a0bc5e1 100644 --- a/tests/cache/test_neuronx_cache.py +++ b/tests/cache/test_neuronx_cache.py @@ -272,35 +272,35 @@ def test_stable_diffusion_cache(cache_repos): assert len(get_local_cached_files(cache_path, ".neuron")) == 0 -# TODO: Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859 -# @is_inferentia_test -# @requires_neuronx -# def test_stable_diffusion_xl_cache(cache_repos): -# cache_path, cache_repo_id = cache_repos -# model_id = "echarlaix/tiny-random-stable-diffusion-xl" -# # Export the model a first time to populate the local cache -# model = export_stable_diffusion_xl_model(model_id) -# check_stable_diffusion_inference(model) -# # check registry -# check_traced_cache_entry(cache_path) -# # Synchronize the hub cache with the local cache -# synchronize_hub_cache(cache_repo_id=cache_repo_id) -# assert_local_and_hub_cache_sync(cache_path, cache_repo_id) -# # Verify we are able to fetch the cached entry for the model -# model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id) -# assert len(model_entries) == 1 -# # Clear the local cache -# for root, dirs, files in os.walk(cache_path): -# for f in files: -# os.unlink(os.path.join(root, f)) -# for d in dirs: -# shutil.rmtree(os.path.join(root, d)) -# assert local_cache_size(cache_path) == 0 -# # Export the model again: the compilation artifacts should be fetched from the Hub -# model = export_stable_diffusion_xl_model(model_id) -# check_stable_diffusion_inference(model) -# # Verify the local cache directory has not been populated -# assert len(get_local_cached_files(cache_path, ".neuron")) == 0 +@is_inferentia_test +@requires_neuronx +@pytest.mark.skip("Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859") +def test_stable_diffusion_xl_cache(cache_repos): + cache_path, cache_repo_id = cache_repos + model_id = "echarlaix/tiny-random-stable-diffusion-xl" + # Export the model a first time to populate the local cache + model = export_stable_diffusion_xl_model(model_id) + check_stable_diffusion_inference(model) + # check registry + check_traced_cache_entry(cache_path) + # Synchronize the hub cache with the local cache + synchronize_hub_cache(cache_repo_id=cache_repo_id) + assert_local_and_hub_cache_sync(cache_path, cache_repo_id) + # Verify we are able to fetch the cached entry for the model + model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id) + assert len(model_entries) == 1 + # Clear the local cache + for root, dirs, files in os.walk(cache_path): + for f in files: + os.unlink(os.path.join(root, f)) + for d in dirs: + shutil.rmtree(os.path.join(root, d)) + assert local_cache_size(cache_path) == 0 + # Export the model again: the compilation artifacts should be fetched from the Hub + model = export_stable_diffusion_xl_model(model_id) + check_stable_diffusion_inference(model) + # Verify the local cache directory has not been populated + assert len(get_local_cached_files(cache_path, ".neuron")) == 0 @is_inferentia_test