fix(generate): remove experimental compilation flag

Using this flag speeds up the compilation, but also increases inference latency by 25 to 35 %.
huggingface · Sep 14, 2023 · 1a64c76 · 1a64c76
1 parent 1c4afc8
commit 1a64c76
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/optimum/neuron/modeling_decoder.py b/optimum/neuron/modeling_decoder.py
@@ -207,7 +207,7 @@ def _from_pretrained(
         # Compile the Neuron model (if present compiled artifacts will be reloaded instead of compiled)
         neuron_cc_flags = os.environ.get("NEURON_CC_FLAGS", "")
         os.environ["NEURON_CC_FLAGS"] = (
-            neuron_cc_flags + " --model-type=transformer-inference --enable-experimental-O1"
+            neuron_cc_flags + " --model-type=transformer-inference"
         )
         neuronx_model.to_neuron()
         os.environ["NEURON_CC_FLAGS"] = neuron_cc_flags