From 7729722c927ef0be8abac6881ecc26ec240d3bfe Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 5 Apr 2024 14:05:38 +0000
Subject: [PATCH 1/3] temp patch

---
 optimum/exporters/neuron/convert.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/optimum/exporters/neuron/convert.py b/optimum/exporters/neuron/convert.py
index 4438a7414..40c87bf62 100644
--- a/optimum/exporters/neuron/convert.py
+++ b/optimum/exporters/neuron/convert.py
@@ -342,6 +342,14 @@ def export_models(
         output_path.parent.mkdir(parents=True, exist_ok=True)
 
         try:
+
+            # TODO: Remove after the weights/neff separation compilation of sdxl is patched by a neuron sdk release: https://github.com/aws-neuron/aws-neuron-sdk/issues/859
+            if not inline_weights_to_neff and getattr(sub_neuron_config, "is_sdxl", False):
+                logger.warning(
+                    "The compilation of SDXL's unet with the weights/neff separation is broken since the Neuron sdk 2.18 release. `inline_weights_to_neff` will be set to True and the caching will be disabled. If you still want to separate the neff and weights, please downgrade your Neuron setup to the 2.17.1 release."
+                )
+                inline_weights_to_neff = True
+
             start_time = time.time()
             neuron_inputs, neuron_outputs = export(
                 model=submodel,

From bd3af8b752fe0cd8aef63d6d1d185fd60099ad36 Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 5 Apr 2024 14:36:46 +0000
Subject: [PATCH 2/3] disable sdxl caching test

---
 tests/cache/test_neuronx_cache.py | 57 ++++++++++++++++---------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py
index 319efe3a5..8a90e8de8 100644
--- a/tests/cache/test_neuronx_cache.py
+++ b/tests/cache/test_neuronx_cache.py
@@ -272,34 +272,35 @@ def test_stable_diffusion_cache(cache_repos):
     assert len(get_local_cached_files(cache_path, ".neuron")) == 0
 
 
-@is_inferentia_test
-@requires_neuronx
-def test_stable_diffusion_xl_cache(cache_repos):
-    cache_path, cache_repo_id = cache_repos
-    model_id = "echarlaix/tiny-random-stable-diffusion-xl"
-    # Export the model a first time to populate the local cache
-    model = export_stable_diffusion_xl_model(model_id)
-    check_stable_diffusion_inference(model)
-    # check registry
-    check_traced_cache_entry(cache_path)
-    # Synchronize the hub cache with the local cache
-    synchronize_hub_cache(cache_repo_id=cache_repo_id)
-    assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
-    # Verify we are able to fetch the cached entry for the model
-    model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
-    assert len(model_entries) == 1
-    # Clear the local cache
-    for root, dirs, files in os.walk(cache_path):
-        for f in files:
-            os.unlink(os.path.join(root, f))
-        for d in dirs:
-            shutil.rmtree(os.path.join(root, d))
-    assert local_cache_size(cache_path) == 0
-    # Export the model again: the compilation artifacts should be fetched from the Hub
-    model = export_stable_diffusion_xl_model(model_id)
-    check_stable_diffusion_inference(model)
-    # Verify the local cache directory has not been populated
-    assert len(get_local_cached_files(cache_path, ".neuron")) == 0
+# TODO: Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859
+# @is_inferentia_test
+# @requires_neuronx
+# def test_stable_diffusion_xl_cache(cache_repos):
+#     cache_path, cache_repo_id = cache_repos
+#     model_id = "echarlaix/tiny-random-stable-diffusion-xl"
+#     # Export the model a first time to populate the local cache
+#     model = export_stable_diffusion_xl_model(model_id)
+#     check_stable_diffusion_inference(model)
+#     # check registry
+#     check_traced_cache_entry(cache_path)
+#     # Synchronize the hub cache with the local cache
+#     synchronize_hub_cache(cache_repo_id=cache_repo_id)
+#     assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
+#     # Verify we are able to fetch the cached entry for the model
+#     model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
+#     assert len(model_entries) == 1
+#     # Clear the local cache
+#     for root, dirs, files in os.walk(cache_path):
+#         for f in files:
+#             os.unlink(os.path.join(root, f))
+#         for d in dirs:
+#             shutil.rmtree(os.path.join(root, d))
+#     assert local_cache_size(cache_path) == 0
+#     # Export the model again: the compilation artifacts should be fetched from the Hub
+#     model = export_stable_diffusion_xl_model(model_id)
+#     check_stable_diffusion_inference(model)
+#     # Verify the local cache directory has not been populated
+#     assert len(get_local_cached_files(cache_path, ".neuron")) == 0
 
 
 @is_inferentia_test

From e4d8732dadffa08492bf6d779bd0c771999109ae Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 5 Apr 2024 14:50:45 +0000
Subject: [PATCH 3/3] pytest skip

---
 tests/cache/test_neuronx_cache.py | 58 +++++++++++++++----------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/tests/cache/test_neuronx_cache.py b/tests/cache/test_neuronx_cache.py
index 8a90e8de8..83a0bc5e1 100644
--- a/tests/cache/test_neuronx_cache.py
+++ b/tests/cache/test_neuronx_cache.py
@@ -272,35 +272,35 @@ def test_stable_diffusion_cache(cache_repos):
     assert len(get_local_cached_files(cache_path, ".neuron")) == 0
 
 
-# TODO: Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859
-# @is_inferentia_test
-# @requires_neuronx
-# def test_stable_diffusion_xl_cache(cache_repos):
-#     cache_path, cache_repo_id = cache_repos
-#     model_id = "echarlaix/tiny-random-stable-diffusion-xl"
-#     # Export the model a first time to populate the local cache
-#     model = export_stable_diffusion_xl_model(model_id)
-#     check_stable_diffusion_inference(model)
-#     # check registry
-#     check_traced_cache_entry(cache_path)
-#     # Synchronize the hub cache with the local cache
-#     synchronize_hub_cache(cache_repo_id=cache_repo_id)
-#     assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
-#     # Verify we are able to fetch the cached entry for the model
-#     model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
-#     assert len(model_entries) == 1
-#     # Clear the local cache
-#     for root, dirs, files in os.walk(cache_path):
-#         for f in files:
-#             os.unlink(os.path.join(root, f))
-#         for d in dirs:
-#             shutil.rmtree(os.path.join(root, d))
-#     assert local_cache_size(cache_path) == 0
-#     # Export the model again: the compilation artifacts should be fetched from the Hub
-#     model = export_stable_diffusion_xl_model(model_id)
-#     check_stable_diffusion_inference(model)
-#     # Verify the local cache directory has not been populated
-#     assert len(get_local_cached_files(cache_path, ".neuron")) == 0
+@is_inferentia_test
+@requires_neuronx
+@pytest.mark.skip("Disable the test due to https://github.com/aws-neuron/aws-neuron-sdk/issues/859")
+def test_stable_diffusion_xl_cache(cache_repos):
+    cache_path, cache_repo_id = cache_repos
+    model_id = "echarlaix/tiny-random-stable-diffusion-xl"
+    # Export the model a first time to populate the local cache
+    model = export_stable_diffusion_xl_model(model_id)
+    check_stable_diffusion_inference(model)
+    # check registry
+    check_traced_cache_entry(cache_path)
+    # Synchronize the hub cache with the local cache
+    synchronize_hub_cache(cache_repo_id=cache_repo_id)
+    assert_local_and_hub_cache_sync(cache_path, cache_repo_id)
+    # Verify we are able to fetch the cached entry for the model
+    model_entries = get_hub_cached_entries(model_id, "inference", cache_repo_id=cache_repo_id)
+    assert len(model_entries) == 1
+    # Clear the local cache
+    for root, dirs, files in os.walk(cache_path):
+        for f in files:
+            os.unlink(os.path.join(root, f))
+        for d in dirs:
+            shutil.rmtree(os.path.join(root, d))
+    assert local_cache_size(cache_path) == 0
+    # Export the model again: the compilation artifacts should be fetched from the Hub
+    model = export_stable_diffusion_xl_model(model_id)
+    check_stable_diffusion_inference(model)
+    # Verify the local cache directory has not been populated
+    assert len(get_local_cached_files(cache_path, ".neuron")) == 0
 
 
 @is_inferentia_test