Skip to content

Commit

Permalink
fix(generate): remove experimental compilation flag
Browse files Browse the repository at this point in the history
Using this flag speeds up the compilation, but also increases inference
latency by 25 to 35 %.
  • Loading branch information
dacorvo committed Sep 14, 2023
1 parent 1c4afc8 commit 1a64c76
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion optimum/neuron/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _from_pretrained(
# Compile the Neuron model (if present compiled artifacts will be reloaded instead of compiled)
neuron_cc_flags = os.environ.get("NEURON_CC_FLAGS", "")
os.environ["NEURON_CC_FLAGS"] = (
neuron_cc_flags + " --model-type=transformer-inference --enable-experimental-O1"
neuron_cc_flags + " --model-type=transformer-inference"
)
neuronx_model.to_neuron()
os.environ["NEURON_CC_FLAGS"] = neuron_cc_flags
Expand Down

0 comments on commit 1a64c76

Please sign in to comment.