From fe49cc565ffc9668fdcff6b99788235bd8da0fcb Mon Sep 17 00:00:00 2001
From: Karthikeyan Bhargavan <karthik.bhargavan@gmail.com>
Date: Tue, 3 Dec 2024 18:40:41 +0100
Subject: [PATCH 1/7] fixing code to address review comments

---
 .../extraction/Libcrux_ml_kem.Invert_ntt.fst  |  16 +--
 .../extraction/Libcrux_ml_kem.Mlkem1024.fsti  |  59 +++------
 .../extraction/Libcrux_ml_kem.Mlkem768.fsti   |  59 +++------
 .../fstar/extraction/Libcrux_ml_kem.Ntt.fst   |  16 +--
 .../extraction/Libcrux_ml_kem.Polynomial.fst  |  10 +-
 .../extraction/Libcrux_ml_kem.Polynomial.fsti |   2 +-
 libcrux-ml-kem/src/hash_functions.rs          |  12 --
 libcrux-ml-kem/src/ind_cca.rs                 |   7 +-
 libcrux-ml-kem/src/ind_cpa.rs                 |   1 +
 libcrux-ml-kem/src/invert_ntt.rs              |  18 +--
 libcrux-ml-kem/src/mlkem512.rs                |  30 ++---
 libcrux-ml-kem/src/ntt.rs                     |  18 +--
 libcrux-ml-kem/src/polynomial.rs              |  21 ++--
 libcrux-ml-kem/src/vector/avx2.rs             |   8 --
 libcrux-ml-kem/src/vector/avx2/arithmetic.rs  |   5 -
 libcrux-ml-kem/src/vector/portable/ntt.rs     |  15 ---
 .../src/vector/portable/serialize.rs          | 112 ------------------
 libcrux-ml-kem/src/vector/traits.rs           |   2 +
 18 files changed, 100 insertions(+), 311 deletions(-)

diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst
index aeccf049f..53290fba7 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst
@@ -84,10 +84,10 @@ let invert_ntt_at_layer_1_
                 (Libcrux_ml_kem.Vector.Traits.f_inv_ntt_layer_1_step #v_Vector
                     #FStar.Tactics.Typeclasses.solve
                     (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector)
-                    (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 1 <: usize) <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 2 <: usize) <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 3 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 1 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 2 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 3 <: usize) <: i16)
                   <:
                   v_Vector)
             }
@@ -165,8 +165,8 @@ let invert_ntt_at_layer_2_
                 (Libcrux_ml_kem.Vector.Traits.f_inv_ntt_layer_2_step #v_Vector
                     #FStar.Tactics.Typeclasses.solve
                     (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector)
-                    (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 1 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 1 <: usize) <: i16)
                   <:
                   v_Vector)
             }
@@ -244,7 +244,7 @@ let invert_ntt_at_layer_3_
                 (Libcrux_ml_kem.Vector.Traits.f_inv_ntt_layer_3_step #v_Vector
                     #FStar.Tactics.Typeclasses.solve
                     (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector)
-                    (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
                   <:
                   v_Vector)
             }
@@ -317,7 +317,7 @@ let invert_ntt_at_layer_4_plus
                       (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ j +! step_vec <: usize ]
                         <:
                         v_Vector)
-                      (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
+                      (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
                   in
                   let re:Libcrux_ml_kem.Polynomial.t_PolynomialRingElement v_Vector =
                     {
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti
index 007e5c86f..b31f845fc 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti
@@ -3,64 +3,39 @@ module Libcrux_ml_kem.Mlkem1024
 open Core
 open FStar.Mul
 
-let v_ETA1: usize = sz 2
+let v_C1_BLOCK_SIZE_1024_: usize = sz 352
 
-let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64
+let v_C1_SIZE_1024_: usize = sz 1408
 
-let v_ETA2: usize = sz 2
+let v_C2_SIZE_1024_: usize = sz 160
 
-let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64
+let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = sz 1568
 
-let v_RANK_1024_: usize = sz 4
+let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = sz 1568
 
-let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize =
-  ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
-    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
-    <:
-    usize) /!
-  sz 8
+let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize = sz 1536
 
-let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize =
-  (v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8
+let v_ETA1: usize = sz 2
 
-let v_T_AS_NTT_ENCODED_SIZE_1024_: usize =
-  ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
-    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
-    <:
-    usize) /!
-  sz 8
+let v_ETA1_RANDOMNESS_SIZE: usize = sz 128
 
-let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = v_T_AS_NTT_ENCODED_SIZE_1024_ +! sz 32
+let v_ETA2: usize = sz 2
 
-let v_SECRET_KEY_SIZE_1024_: usize =
-  ((v_CPA_PKE_SECRET_KEY_SIZE_1024_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_1024_ <: usize) +!
-    Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE
-    <:
-    usize) +!
-  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE
+let v_ETA2_RANDOMNESS_SIZE: usize = sz 128
 
-let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11
+let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1600
 
-let v_C1_BLOCK_SIZE_1024_: usize =
-  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_1024_
-    <:
-    usize) /!
-  sz 8
+let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize = sz 1536
 
-let v_C1_SIZE_1024_: usize = v_C1_BLOCK_SIZE_1024_ *! v_RANK_1024_
+let v_RANK_1024_: usize = sz 4
 
-let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5
+let v_SECRET_KEY_SIZE_1024_: usize = sz 3168
 
-let v_C2_SIZE_1024_: usize =
-  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_1024_
-    <:
-    usize) /!
-  sz 8
+let v_T_AS_NTT_ENCODED_SIZE_1024_: usize = sz 1536
 
-let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = v_C1_SIZE_1024_ +! v_C2_SIZE_1024_
+let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11
 
-let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize =
-  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_1024_
+let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5
 
 /// Validate a private key.
 /// Returns `true` if valid, and `false` otherwise.
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti
index d1d7c217f..928e6a233 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti
@@ -3,64 +3,39 @@ module Libcrux_ml_kem.Mlkem768
 open Core
 open FStar.Mul
 
-let v_ETA1: usize = sz 2
+let v_C1_BLOCK_SIZE_768_: usize = sz 320
 
-let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64
+let v_C1_SIZE_768_: usize = sz 960
 
-let v_ETA2: usize = sz 2
+let v_C2_SIZE_768_: usize = sz 128
 
-let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64
+let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = sz 1088
 
-let v_RANK_768_: usize = sz 3
+let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = sz 1184
 
-let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize =
-  ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
-    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
-    <:
-    usize) /!
-  sz 8
+let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize = sz 1152
 
-let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize =
-  (v_RANK_768_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8
+let v_ETA1: usize = sz 2
 
-let v_T_AS_NTT_ENCODED_SIZE_768_: usize =
-  ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
-    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
-    <:
-    usize) /!
-  sz 8
+let v_ETA1_RANDOMNESS_SIZE: usize = sz 128
 
-let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = v_T_AS_NTT_ENCODED_SIZE_768_ +! sz 32
+let v_ETA2: usize = sz 2
 
-let v_SECRET_KEY_SIZE_768_: usize =
-  ((v_CPA_PKE_SECRET_KEY_SIZE_768_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_768_ <: usize) +!
-    Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE
-    <:
-    usize) +!
-  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE
+let v_ETA2_RANDOMNESS_SIZE: usize = sz 128
 
-let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10
+let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1120
 
-let v_C1_BLOCK_SIZE_768_: usize =
-  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_768_
-    <:
-    usize) /!
-  sz 8
+let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize = sz 1152
 
-let v_C1_SIZE_768_: usize = v_C1_BLOCK_SIZE_768_ *! v_RANK_768_
+let v_RANK_768_: usize = sz 3
 
-let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4
+let v_SECRET_KEY_SIZE_768_: usize = sz 2400
 
-let v_C2_SIZE_768_: usize =
-  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_768_
-    <:
-    usize) /!
-  sz 8
+let v_T_AS_NTT_ENCODED_SIZE_768_: usize = sz 1152
 
-let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = v_C1_SIZE_768_ +! v_C2_SIZE_768_
+let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10
 
-let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize =
-  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_768_
+let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4
 
 /// Validate a private key.
 /// Returns `true` if valid, and `false` otherwise.
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst
index 2c5a30cb2..41d6dfad3 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst
@@ -81,10 +81,10 @@ let ntt_at_layer_1_
                 (Libcrux_ml_kem.Vector.Traits.f_ntt_layer_1_step #v_Vector
                     #FStar.Tactics.Typeclasses.solve
                     (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector)
-                    (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 1 <: usize) <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 2 <: usize) <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 3 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 1 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 2 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 3 <: usize) <: i16)
                   <:
                   v_Vector)
             }
@@ -163,8 +163,8 @@ let ntt_at_layer_2_
                 (Libcrux_ml_kem.Vector.Traits.f_ntt_layer_2_step #v_Vector
                     #FStar.Tactics.Typeclasses.solve
                     (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector)
-                    (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
-                    (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 1 <: usize) <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 1 <: usize) <: i16)
                   <:
                   v_Vector)
             }
@@ -243,7 +243,7 @@ let ntt_at_layer_3_
                 (Libcrux_ml_kem.Vector.Traits.f_ntt_layer_3_step #v_Vector
                     #FStar.Tactics.Typeclasses.solve
                     (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector)
-                    (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
+                    (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
                   <:
                   v_Vector)
             }
@@ -315,7 +315,7 @@ let ntt_at_layer_4_plus
                       (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ j +! step_vec <: usize ]
                         <:
                         v_Vector)
-                      (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16)
+                      (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16)
                   in
                   let re:Libcrux_ml_kem.Polynomial.t_PolynomialRingElement v_Vector =
                     {
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst
index 4dcc55b91..fec53d917 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst
@@ -9,7 +9,7 @@ let _ =
   let open Libcrux_ml_kem.Vector.Traits in
   ()
 
-let get_zeta (i: usize) =
+let zeta (i: usize) =
   let result:i16 = v_ZETAS_TIMES_MONTGOMERY_R.[ i ] in
   let _:Prims.unit = admit () (* Panic freedom *) in
   result
@@ -355,10 +355,10 @@ let impl_2__ntt_multiply
                   #FStar.Tactics.Typeclasses.solve
                   (self.f_coefficients.[ i ] <: v_Vector)
                   (rhs.f_coefficients.[ i ] <: v_Vector)
-                  (get_zeta (sz 64 +! (sz 4 *! i <: usize) <: usize) <: i16)
-                  (get_zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 1 <: usize) <: i16)
-                  (get_zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 2 <: usize) <: i16)
-                  (get_zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 3 <: usize) <: i16)
+                  (zeta (sz 64 +! (sz 4 *! i <: usize) <: usize) <: i16)
+                  (zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 1 <: usize) <: i16)
+                  (zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 2 <: usize) <: i16)
+                  (zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 3 <: usize) <: i16)
                 <:
                 v_Vector)
             <:
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti
index 6ad4d7a0b..6dd0db075 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti
@@ -29,7 +29,7 @@ let v_ZETAS_TIMES_MONTGOMERY_R: t_Array i16 (sz 128) =
   FStar.Pervasives.assert_norm (Prims.eq2 (List.Tot.length list) 128);
   Rust_primitives.Hax.array_of_list 128 list
 
-val get_zeta (i: usize)
+val zeta (i: usize)
     : Prims.Pure i16
       (requires i <. sz 128)
       (ensures
diff --git a/libcrux-ml-kem/src/hash_functions.rs b/libcrux-ml-kem/src/hash_functions.rs
index 7641a7266..17d34fdc2 100644
--- a/libcrux-ml-kem/src/hash_functions.rs
+++ b/libcrux-ml-kem/src/hash_functions.rs
@@ -171,7 +171,6 @@ pub(crate) mod portable {
 
     #[hax_lib::attributes]
     impl<const K: usize> Hash<K> for PortableHash<K> {
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("$out == Spec.Utils.v_G $input"))
         ]
@@ -180,7 +179,6 @@ pub(crate) mod portable {
             G(input)
         }
 
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("$out == Spec.Utils.v_H $input"))
         ]
@@ -190,7 +188,6 @@ pub(crate) mod portable {
         }
 
         #[requires(fstar!("v $LEN < pow2 32"))]
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784
             fstar!("v $LEN < pow2 32 ==> $out == Spec.Utils.v_PRF $LEN $input"))
@@ -201,7 +198,6 @@ pub(crate) mod portable {
         }
 
         #[requires(fstar!("v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)"))]
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("(v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)) ==>
                 $out == Spec.Utils.v_PRFxN $K $LEN $input"))
@@ -428,7 +424,6 @@ pub(crate) mod avx2 {
 
     #[hax_lib::attributes]
     impl<const K: usize> Hash<K> for Simd256Hash {
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("$out == Spec.Utils.v_G $input"))
         ]
@@ -437,7 +432,6 @@ pub(crate) mod avx2 {
             G(input)
         }
 
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("$out == Spec.Utils.v_H $input"))
         ]
@@ -447,7 +441,6 @@ pub(crate) mod avx2 {
         }
 
         #[requires(fstar!("v $LEN < pow2 32"))]
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[hax_lib::ensures(|out|
             // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784
             fstar!("v $LEN < pow2 32 ==> $out == Spec.Utils.v_PRF $LEN $input"))
@@ -458,7 +451,6 @@ pub(crate) mod avx2 {
         }
 
         #[requires(fstar!("v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)"))]
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("(v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)) ==>
                 $out == Spec.Utils.v_PRFxN $K $LEN $input"))
@@ -710,7 +702,6 @@ pub(crate) mod neon {
 
     #[hax_lib::attributes]
     impl<const K: usize> Hash<K> for Simd128Hash {
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("$out == Spec.Utils.v_G $input"))
         ]
@@ -719,7 +710,6 @@ pub(crate) mod neon {
             G(input)
         }
 
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             fstar!("$out == Spec.Utils.v_H $input"))
         ]
@@ -729,7 +719,6 @@ pub(crate) mod neon {
         }
 
         #[requires(fstar!("v $LEN < pow2 32"))]
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784
             fstar!("v $LEN < pow2 32 ==> $out == Spec.Utils.v_PRF $LEN $input"))
@@ -740,7 +729,6 @@ pub(crate) mod neon {
         }
 
         #[requires(fstar!("v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)"))]
-        // Output name has be `out` https://github.com/hacspec/hax/issues/832
         #[ensures(|out|
             // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784
             fstar!("(v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)) ==>
diff --git a/libcrux-ml-kem/src/ind_cca.rs b/libcrux-ml-kem/src/ind_cca.rs
index 33ec390e5..18ae0db4a 100644
--- a/libcrux-ml-kem/src/ind_cca.rs
+++ b/libcrux-ml-kem/src/ind_cca.rs
@@ -427,13 +427,12 @@ pub(crate) fn decapsulate<
         Scheme::kdf::<K, CIPHERTEXT_SIZE, Hasher>(&implicit_rejection_shared_secret, ciphertext);
     let shared_secret = Scheme::kdf::<K, CIPHERTEXT_SIZE, Hasher>(shared_secret, ciphertext);
 
-    let shared_secret = compare_ciphertexts_select_shared_secret_in_constant_time(
+    compare_ciphertexts_select_shared_secret_in_constant_time(
         ciphertext.as_ref(),
         &expected_ciphertext,
         &shared_secret,
         &implicit_rejection_shared_secret,
-    );
-    shared_secret
+    )
 }
 
 /// Types for the unpacked API.
@@ -821,7 +820,7 @@ pub(crate) mod unpacked {
                 Seq.index (Seq.index $result i) j ==
                     Seq.index (Seq.index $ind_cpa_a j) i)"))
     ]
-    pub(crate) fn transpose_a<const K: usize, Vector: Operations>(
+    fn transpose_a<const K: usize, Vector: Operations>(
         ind_cpa_a: [[PolynomialRingElement<Vector>; K]; K],
     ) -> [[PolynomialRingElement<Vector>; K]; K] {
         // We need to un-transpose the A_transpose matrix provided by IND-CPA
diff --git a/libcrux-ml-kem/src/ind_cpa.rs b/libcrux-ml-kem/src/ind_cpa.rs
index 935ef0c95..b40bd07ae 100644
--- a/libcrux-ml-kem/src/ind_cpa.rs
+++ b/libcrux-ml-kem/src/ind_cpa.rs
@@ -200,6 +200,7 @@ fn sample_ring_element_cbd<
 ) -> ([PolynomialRingElement<Vector>; K], u8) {
     let mut error_1 = from_fn(|_i| PolynomialRingElement::<Vector>::ZERO());
     let mut prf_inputs = [prf_input; K];
+    // See https://github.com/hacspec/hax/issues/1167
     let _domain_separator_init = domain_separator;
     domain_separator = prf_input_inc::<K>(&mut prf_inputs, domain_separator);
     hax_lib::fstar!("let lemma_aux (i:nat{ i < v $K }) : Lemma (${prf_inputs}.[sz i] == (Seq.append (Seq.slice $prf_input 0 32) 
diff --git a/libcrux-ml-kem/src/invert_ntt.rs b/libcrux-ml-kem/src/invert_ntt.rs
index 24866eb82..7f9506731 100644
--- a/libcrux-ml-kem/src/invert_ntt.rs
+++ b/libcrux-ml-kem/src/invert_ntt.rs
@@ -1,6 +1,6 @@
 use crate::{
     hax_utils::hax_debug_assert,
-    polynomial::{get_zeta, PolynomialRingElement},
+    polynomial::{zeta, PolynomialRingElement},
     vector::{montgomery_multiply_fe, Operations, FIELD_ELEMENTS_IN_VECTOR},
 };
 
@@ -55,10 +55,10 @@ pub(crate) fn invert_ntt_at_layer_1<Vector: Operations>(
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
         re.coefficients[round] = Vector::inv_ntt_layer_1_step(
             re.coefficients[round],
-            get_zeta(*zeta_i),
-            get_zeta(*zeta_i - 1),
-            get_zeta(*zeta_i - 2),
-            get_zeta(*zeta_i - 3),
+            zeta(*zeta_i),
+            zeta(*zeta_i - 1),
+            zeta(*zeta_i - 2),
+            zeta(*zeta_i - 3),
         );
         *zeta_i -= 3;
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
@@ -104,8 +104,8 @@ pub(crate) fn invert_ntt_at_layer_2<Vector: Operations>(
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
         re.coefficients[round] = Vector::inv_ntt_layer_2_step(
             re.coefficients[round],
-            get_zeta(*zeta_i),
-            get_zeta(*zeta_i - 1),
+            zeta(*zeta_i),
+            zeta(*zeta_i - 1),
         );
         *zeta_i -= 1;
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
@@ -150,7 +150,7 @@ pub(crate) fn invert_ntt_at_layer_3<Vector: Operations>(
                         (Spec.Utils.is_i16b_array_opaque 3328 
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
         re.coefficients[round] =
-            Vector::inv_ntt_layer_3_step(re.coefficients[round], get_zeta(*zeta_i));
+            Vector::inv_ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i));
         hax_lib::fstar!(
             "reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
             (Spec.Utils.is_i16b_array_opaque 3328 
@@ -210,7 +210,7 @@ pub(crate) fn invert_ntt_at_layer_4_plus<Vector: Operations>(
             let (x, y) = inv_ntt_layer_int_vec_step_reduce(
                 re.coefficients[j],
                 re.coefficients[j + step_vec],
-                get_zeta(*zeta_i),
+                zeta(*zeta_i),
             );
             re.coefficients[j] = x;
             re.coefficients[j + step_vec] = y;
diff --git a/libcrux-ml-kem/src/mlkem512.rs b/libcrux-ml-kem/src/mlkem512.rs
index 0d82a07a8..1af827529 100644
--- a/libcrux-ml-kem/src/mlkem512.rs
+++ b/libcrux-ml-kem/src/mlkem512.rs
@@ -3,31 +3,25 @@ use super::{constants::*, ind_cca::*, types::*, *};
 
 // Kyber 512 parameters
 const RANK_512: usize = 2;
-const RANKED_BYTES_PER_RING_ELEMENT_512: usize = 768;
-const T_AS_NTT_ENCODED_SIZE_512: usize = 768;
+const RANKED_BYTES_PER_RING_ELEMENT_512: usize = RANK_512 * BITS_PER_RING_ELEMENT / 8;
+const T_AS_NTT_ENCODED_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8;
 const VECTOR_U_COMPRESSION_FACTOR_512: usize = 10;
-// [hax]: hacspec/hacspec-v2#27 stealing error
-// block_len::<VECTOR_U_COMPRESSION_FACTOR_512>()
-const C1_BLOCK_SIZE_512: usize = 320;
-// [hax]: hacspec/hacspec-v2#27 stealing error
-// serialized_len::<RANK_512, C1_BLOCK_SIZE_512>()
-const C1_SIZE_512: usize = 640;
+const C1_BLOCK_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_U_COMPRESSION_FACTOR_512) / 8;
+const C1_SIZE_512: usize = C1_BLOCK_SIZE_512 * RANK_512;
 const VECTOR_V_COMPRESSION_FACTOR_512: usize = 4;
-// [hax]: hacspec/hacspec-v2#27 stealing error
-// block_len::<VECTOR_V_COMPRESSION_FACTOR_512>()
-const C2_SIZE_512: usize = 128;
-const CPA_PKE_SECRET_KEY_SIZE_512: usize = 768;
-pub(crate) const CPA_PKE_PUBLIC_KEY_SIZE_512: usize = 800;
-const CPA_PKE_CIPHERTEXT_SIZE_512: usize = 768;
+const C2_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_V_COMPRESSION_FACTOR_512) / 8;
+const CPA_PKE_SECRET_KEY_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8;
+pub(crate) const CPA_PKE_PUBLIC_KEY_SIZE_512: usize = T_AS_NTT_ENCODED_SIZE_512 + 32;
+const CPA_PKE_CIPHERTEXT_SIZE_512: usize = C1_SIZE_512 + C2_SIZE_512;
 
-pub(crate) const SECRET_KEY_SIZE_512: usize = 1632;
+pub(crate) const SECRET_KEY_SIZE_512: usize = CPA_PKE_SECRET_KEY_SIZE_512 + CPA_PKE_PUBLIC_KEY_SIZE_512 + H_DIGEST_SIZE + SHARED_SECRET_SIZE;
 
 const ETA1: usize = 3;
-const ETA1_RANDOMNESS_SIZE: usize = 192;
+const ETA1_RANDOMNESS_SIZE: usize = ETA1 * 64;
 const ETA2: usize = 2;
-const ETA2_RANDOMNESS_SIZE: usize = 128;
+const ETA2_RANDOMNESS_SIZE: usize = ETA2 * 64;
 
-const IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = 800;
+const IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = SHARED_SECRET_SIZE + CPA_PKE_CIPHERTEXT_SIZE_512;
 
 // Kyber 512 types
 /// An ML-KEM 512 Ciphertext
diff --git a/libcrux-ml-kem/src/ntt.rs b/libcrux-ml-kem/src/ntt.rs
index bb769cf1a..973a6d945 100644
--- a/libcrux-ml-kem/src/ntt.rs
+++ b/libcrux-ml-kem/src/ntt.rs
@@ -1,6 +1,6 @@
 use crate::{
     hax_utils::hax_debug_assert,
-    polynomial::{get_zeta, PolynomialRingElement, VECTORS_IN_RING_ELEMENT},
+    polynomial::{zeta, PolynomialRingElement, VECTORS_IN_RING_ELEMENT},
     vector::{montgomery_multiply_fe, Operations},
 };
 
@@ -56,10 +56,10 @@ pub(crate) fn ntt_at_layer_1<Vector: Operations>(
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
         re.coefficients[round] = Vector::ntt_layer_1_step(
             re.coefficients[round],
-            get_zeta(*zeta_i),
-            get_zeta(*zeta_i + 1),
-            get_zeta(*zeta_i + 2),
-            get_zeta(*zeta_i + 3),
+            zeta(*zeta_i),
+            zeta(*zeta_i + 1),
+            zeta(*zeta_i + 2),
+            zeta(*zeta_i + 3),
         );
         *zeta_i += 3;
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
@@ -116,8 +116,8 @@ pub(crate) fn ntt_at_layer_2<Vector: Operations>(
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
         re.coefficients[round] = Vector::ntt_layer_2_step(
             re.coefficients[round],
-            get_zeta(*zeta_i),
-            get_zeta(*zeta_i + 1),
+            zeta(*zeta_i),
+            zeta(*zeta_i + 1),
         );
         *zeta_i += 1;
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
@@ -173,7 +173,7 @@ pub(crate) fn ntt_at_layer_3<Vector: Operations>(
                         (Spec.Utils.is_i16b_array_opaque (11207+3*3328)
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
         re.coefficients[round] =
-            Vector::ntt_layer_3_step(re.coefficients[round], get_zeta(*zeta_i));
+            Vector::ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i));
         hax_lib::fstar!(
             "reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
             (Spec.Utils.is_i16b_array_opaque (11207+4*3328)
@@ -243,7 +243,7 @@ pub(crate) fn ntt_at_layer_4_plus<Vector: Operations>(
             let (x, y) = ntt_layer_int_vec_step(
                 re.coefficients[j],
                 re.coefficients[j + step_vec],
-                get_zeta(*zeta_i),
+                zeta(*zeta_i),
             );
             re.coefficients[j] = x;
             re.coefficients[j + step_vec] = y;
diff --git a/libcrux-ml-kem/src/polynomial.rs b/libcrux-ml-kem/src/polynomial.rs
index 9460a0cba..cb6f0fe8b 100644
--- a/libcrux-ml-kem/src/polynomial.rs
+++ b/libcrux-ml-kem/src/polynomial.rs
@@ -15,11 +15,12 @@ pub(crate) const ZETAS_TIMES_MONTGOMERY_R: [i16; 128] = {
     ]
 };
 
+// A function to retrieve zetas so that we can add a post-condition
 #[inline(always)]
 #[hax_lib::fstar::verification_status(panic_free)]
 #[hax_lib::requires(i < 128)]
 #[hax_lib::ensures(|result| fstar!("Spec.Utils.is_i16b 1664 result"))]
-pub fn get_zeta(i: usize) -> i16 {
+pub fn zeta(i: usize) -> i16 {
     ZETAS_TIMES_MONTGOMERY_R[i]
 }
 
@@ -67,7 +68,6 @@ impl<Vector: Operations> PolynomialRingElement<Vector> {
     #[allow(non_snake_case)]
     pub(crate) fn ZERO() -> Self {
         Self {
-            // FIXME:  The THIR body of item DefId(0:415 ~ libcrux_ml_kem[9000]::polynomial::{impl#0}::ZERO::{constant#0}) was stolen.
             coefficients: [Vector::ZERO(); 16],
         }
     }
@@ -213,13 +213,13 @@ impl<Vector: Operations> PolynomialRingElement<Vector> {
     ///
     /// The NIST FIPS 203 standard can be found at
     /// <https://csrc.nist.gov/pubs/fips/203/ipd>.
+    
     // TODO: Remove or replace with something that works and is useful for the proof.
     // #[cfg_attr(hax, hax_lib::requires(
     //     hax_lib::forall(|i:usize|
     //         hax_lib::implies(i < COEFFICIENTS_IN_RING_ELEMENT, ||
     //             (lhs.coefficients[i] >= 0 && lhs.coefficients[i] < 4096) &&
     //             (rhs.coefficients[i].abs() <= FIELD_MODULUS)
-
     // ))))]
     // #[cfg_attr(hax, hax_lib::ensures(|result|
     //     hax_lib::forall(|i:usize|
@@ -228,23 +228,18 @@ impl<Vector: Operations> PolynomialRingElement<Vector> {
     // ))))]
     #[inline(always)]
     pub(crate) fn ntt_multiply(&self, rhs: &Self) -> Self {
-        // Using `hax_lib::fstar::verification_status(lax)` works but produces an error while extracting
         hax_lib::fstar!("admit ()");
-        // hax_debug_debug_assert!(lhs
-        //     .coefficients
-        //     .into_iter()
-        //     .all(|coefficient| coefficient >= 0 && coefficient < 4096));
-
+        
         let mut out = PolynomialRingElement::ZERO();
 
         for i in 0..VECTORS_IN_RING_ELEMENT {
             out.coefficients[i] = Vector::ntt_multiply(
                 &self.coefficients[i],
                 &rhs.coefficients[i],
-                get_zeta(64 + 4 * i),
-                get_zeta(64 + 4 * i + 1),
-                get_zeta(64 + 4 * i + 2),
-                get_zeta(64 + 4 * i + 3),
+                zeta(64 + 4 * i),
+                zeta(64 + 4 * i + 1),
+                zeta(64 + 4 * i + 2),
+                zeta(64 + 4 * i + 3),
             );
         }
 
diff --git a/libcrux-ml-kem/src/vector/avx2.rs b/libcrux-ml-kem/src/vector/avx2.rs
index 9f3035fde..61c7ae159 100644
--- a/libcrux-ml-kem/src/vector/avx2.rs
+++ b/libcrux-ml-kem/src/vector/avx2.rs
@@ -285,7 +285,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(fstar!("Spec.MLKEM.serialize_pre 1 (impl.f_repr $vector)"))]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 1 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 1 (impl.f_repr $vector) $out"))]
     #[inline(always)]
     fn serialize_1(vector: Self) -> [u8; 2] {
@@ -293,7 +292,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(bytes.len() == 2)]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 2 ==> Spec.MLKEM.deserialize_post 1 $bytes (impl.f_repr $out)"))]
     #[inline(always)]
     fn deserialize_1(bytes: &[u8]) -> Self {
@@ -303,7 +301,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(fstar!("Spec.MLKEM.serialize_pre 4 (impl.f_repr $vector)"))]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 4 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 4 (impl.f_repr $vector) $out"))]
     #[inline(always)]
     fn serialize_4(vector: Self) -> [u8; 8] {
@@ -311,7 +308,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(bytes.len() == 8)]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 8 ==> Spec.MLKEM.deserialize_post 4 $bytes (impl.f_repr $out)"))]
     #[inline(always)]
     fn deserialize_4(bytes: &[u8]) -> Self {
@@ -336,7 +332,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(fstar!("Spec.MLKEM.serialize_pre 10 (impl.f_repr $vector)"))]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 10 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 10 (impl.f_repr $vector) $out"))]
     #[inline(always)]
     fn serialize_10(vector: Self) -> [u8; 20] {
@@ -344,7 +339,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(bytes.len() == 20)]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 20 ==> Spec.MLKEM.deserialize_post 10 $bytes (impl.f_repr $out)"))]
     #[inline(always)]
     fn deserialize_10(bytes: &[u8]) -> Self {
@@ -367,7 +361,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(fstar!("Spec.MLKEM.serialize_pre 12 (impl.f_repr $vector)"))]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 12 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 12 (impl.f_repr $vector) $out"))]
     #[inline(always)]
     fn serialize_12(vector: Self) -> [u8; 24] {
@@ -375,7 +368,6 @@ impl Operations for SIMD256Vector {
     }
 
     #[requires(bytes.len() == 24)]
-    // Output name has be `out` https://github.com/hacspec/hax/issues/832
     #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 24 ==> Spec.MLKEM.deserialize_post 12 $bytes (impl.f_repr $out)"))]
     #[inline(always)]
     fn deserialize_12(bytes: &[u8]) -> Self {
diff --git a/libcrux-ml-kem/src/vector/avx2/arithmetic.rs b/libcrux-ml-kem/src/vector/avx2/arithmetic.rs
index 1032ee28d..8c9f3ae9a 100644
--- a/libcrux-ml-kem/src/vector/avx2/arithmetic.rs
+++ b/libcrux-ml-kem/src/vector/avx2/arithmetic.rs
@@ -94,11 +94,6 @@ pub(crate) fn shift_right<const SHIFT_BY: i32>(vector: Vec256) -> Vec256 {
     result
 }
 
-// #[inline(always)]
-// pub(crate) fn shift_left<const SHIFT_BY: i32>(vector: Vec256) -> Vec256 {
-//     mm256_slli_epi16::<{ SHIFT_BY }>(vector)
-// }
-
 #[inline(always)]
 #[cfg_attr(hax, hax_lib::fstar::options("--z3rlimit 100"))]
 #[hax_lib::requires(fstar!("Spec.Utils.is_i16b_array (pow2 12 - 1) (Libcrux_intrinsics.Avx2_extract.vec256_as_i16x16 $vector)"))]
diff --git a/libcrux-ml-kem/src/vector/portable/ntt.rs b/libcrux-ml-kem/src/vector/portable/ntt.rs
index 3cfafc9ea..46ef118d5 100644
--- a/libcrux-ml-kem/src/vector/portable/ntt.rs
+++ b/libcrux-ml-kem/src/vector/portable/ntt.rs
@@ -367,21 +367,6 @@ pub(crate) fn ntt_multiply_binomials(
     );
 }
 
-// #[inline(always)]
-// pub(crate) fn ntt_multiply_binomials(
-//     (a0, a1): (FieldElement, FieldElement),
-//     (b0, b1): (FieldElement, FieldElement),
-//     zeta: FieldElementTimesMontgomeryR,
-// ) -> (MontgomeryFieldElement, MontgomeryFieldElement) {
-//     (
-//         montgomery_reduce_element(
-//             (a0 as i32) * (b0 as i32)
-//                 + (montgomery_reduce_element((a1 as i32) * (b1 as i32)) as i32) * (zeta as i32),
-//         ),
-//         montgomery_reduce_element((a0 as i32) * (b1 as i32) + (a1 as i32) * (b0 as i32)),
-//     )
-// }
-
 #[inline(always)]
 #[hax_lib::fstar::verification_status(panic_free)]
 #[hax_lib::fstar::options("--z3rlimit 100")]
diff --git a/libcrux-ml-kem/src/vector/portable/serialize.rs b/libcrux-ml-kem/src/vector/portable/serialize.rs
index 550ed5170..9a6522847 100644
--- a/libcrux-ml-kem/src/vector/portable/serialize.rs
+++ b/libcrux-ml-kem/src/vector/portable/serialize.rs
@@ -332,35 +332,6 @@ pub(crate) fn serialize_5_int(v: &[i16]) -> (u8, u8, u8, u8, u8) {
     (r0, r1, r2, r3, r4)
 }
 
-// #[cfg_attr(hax, hax_lib::fstar::after(interface, "
-// val serialize_5_lemma (inputs: Libcrux_ml_kem.Vector.Portable.Vector_type.t_PortableVector) : Lemma
-//   (requires (forall i. Rust_primitives.bounded (Seq.index inputs.f_elements i) 5))
-//   (ensures bit_vec_of_int_t_array (${serialize_5} inputs) 8 == bit_vec_of_int_t_array inputs.f_elements 5)
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--z3rlimit 300\"
-
-// let serialize_5_lemma inputs =
-//   serialize_5_bit_vec_lemma inputs.f_elements ();
-//   BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${serialize_5} inputs) 8)
-//     (BitVecEq.retype (bit_vec_of_int_t_array inputs.f_elements 5))
-
-// #pop-options
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\"
-
-// let serialize_5_bit_vec_lemma (v: t_Array i16 (sz 16))
-//   (_: squash (forall i. Rust_primitives.bounded (Seq.index v i) 5))
-//    : squash (
-//      let inputs = bit_vec_of_int_t_array v 5 in
-//      let outputs = bit_vec_of_int_t_array (${serialize_5} ({ f_elements = v })) 8 in
-//      (forall (i: nat {i < 80}). inputs i == outputs i)
-//    ) =
-//   _ by (Tactics.GetBit.prove_bit_vector_equality' ())
-
-// #pop-options
-// "))]
 #[inline(always)]
 pub(crate) fn serialize_5(v: PortableVector) -> [u8; 10] {
     let r0_4 = serialize_5_int(&v.elements[0..8]);
@@ -386,33 +357,6 @@ pub(crate) fn deserialize_5_int(bytes: &[u8]) -> (i16, i16, i16, i16, i16, i16,
     (v0, v1, v2, v3, v4, v5, v6, v7)
 }
 
-// #[cfg_attr(hax, hax_lib::fstar::after(interface, "
-// val deserialize_5_lemma (inputs: t_Array u8 (sz 10)) : Lemma
-//   (ensures bit_vec_of_int_t_array (${deserialize_5} inputs).f_elements 5 == bit_vec_of_int_t_array inputs 8)
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--z3rlimit 300\"
-
-// let deserialize_5_lemma inputs =
-//   deserialize_5_bit_vec_lemma inputs;
-//   BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${deserialize_5} inputs).f_elements 5)
-//     (BitVecEq.retype (bit_vec_of_int_t_array inputs 8))
-
-// #pop-options
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\"
-
-// let deserialize_5_bit_vec_lemma (v: t_Array u8 (sz 10))
-//    : squash (
-//      let inputs = bit_vec_of_int_t_array v 8 in
-//      let outputs = bit_vec_of_int_t_array (${deserialize_5} v).f_elements 5 in
-//      (forall (i: nat {i < 80}). inputs i == outputs i)
-//    ) =
-//   _ by (Tactics.GetBit.prove_bit_vector_equality' ())
-
-// #pop-options
-// "))]
 #[hax_lib::requires(fstar!(r#"
      ${bytes.len() == 10}
 "#))]
@@ -601,35 +545,6 @@ pub(crate) fn serialize_11_int(v: &[i16]) -> (u8, u8, u8, u8, u8, u8, u8, u8, u8
     (r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10)
 }
 
-// #[cfg_attr(hax, hax_lib::fstar::after(interface, "
-// val serialize_11_lemma (inputs: Libcrux_ml_kem.Vector.Portable.Vector_type.t_PortableVector) : Lemma
-//   (requires (forall i. Rust_primitives.bounded (Seq.index inputs.f_elements i) 11))
-//   (ensures bit_vec_of_int_t_array (${serialize_11} inputs) 8 == bit_vec_of_int_t_array inputs.f_elements 11)
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--z3rlimit 300\"
-
-// let serialize_11_lemma inputs =
-//   serialize_11_bit_vec_lemma inputs.f_elements ();
-//   BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${serialize_11} inputs) 8)
-//     (BitVecEq.retype (bit_vec_of_int_t_array inputs.f_elements 11))
-
-// #pop-options
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\"
-
-// let serialize_11_bit_vec_lemma (v: t_Array i16 (sz 16))
-//   (_: squash (forall i. Rust_primitives.bounded (Seq.index v i) 11))
-//    : squash (
-//      let inputs = bit_vec_of_int_t_array v 11 in
-//      let outputs = bit_vec_of_int_t_array (${serialize_11} ({ f_elements = v })) 8 in
-//      (forall (i: nat {i < 176}). inputs i == outputs i)
-//    ) =
-//   _ by (Tactics.GetBit.prove_bit_vector_equality' ())
-
-// #pop-options
-// "))]
 #[inline(always)]
 pub(crate) fn serialize_11(v: PortableVector) -> [u8; 22] {
     let r0_10 = serialize_11_int(&v.elements[0..8]);
@@ -657,33 +572,6 @@ pub(crate) fn deserialize_11_int(bytes: &[u8]) -> (i16, i16, i16, i16, i16, i16,
     (r0, r1, r2, r3, r4, r5, r6, r7)
 }
 
-// #[cfg_attr(hax, hax_lib::fstar::after(interface, "
-// val deserialize_11_lemma (inputs: t_Array u8 (sz 22)) : Lemma
-//   (ensures bit_vec_of_int_t_array (${deserialize_11} inputs).f_elements 11 == bit_vec_of_int_t_array inputs 8)
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--z3rlimit 300\"
-
-// let deserialize_11_lemma inputs =
-//   deserialize_11_bit_vec_lemma inputs;
-//   BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${deserialize_11} inputs).f_elements 11)
-//     (BitVecEq.retype (bit_vec_of_int_t_array inputs 8))
-
-// #pop-options
-// "))]
-// #[cfg_attr(hax, hax_lib::fstar::after("
-// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\"
-
-// let deserialize_11_bit_vec_lemma (v: t_Array u8 (sz 22))
-//    : squash (
-//      let inputs = bit_vec_of_int_t_array v 8 in
-//      let outputs = bit_vec_of_int_t_array (${deserialize_11} v).f_elements 11 in
-//      (forall (i: nat {i < 176}). inputs i == outputs i)
-//    ) =
-//   _ by (Tactics.GetBit.prove_bit_vector_equality' ())
-
-// #pop-options
-// "))]
 #[hax_lib::requires(fstar!(r#"
      ${bytes.len() == 22}
 "#))]
diff --git a/libcrux-ml-kem/src/vector/traits.rs b/libcrux-ml-kem/src/vector/traits.rs
index 62e67a770..193d0edf6 100644
--- a/libcrux-ml-kem/src/vector/traits.rs
+++ b/libcrux-ml-kem/src/vector/traits.rs
@@ -5,6 +5,8 @@ pub const INVERSE_OF_MODULUS_MOD_MONTGOMERY_R: u32 = 62209; // FIELD_MODULUS^{-1
 pub const BARRETT_SHIFT: i32 = 26;
 pub const BARRETT_R: i32 = 1 << BARRETT_SHIFT;
 
+// We define a trait that allows us to talk about the contents of a vector.
+// This is used extensively in pre- and post-conditions to reason about the code.
 #[cfg(hax)]
 #[hax_lib::attributes]
 pub trait Repr: Copy + Clone {

From 0e587d6e842717408ea9357e00d47e372e505c80 Mon Sep 17 00:00:00 2001
From: Karthikeyan Bhargavan <karthik.bhargavan@gmail.com>
Date: Tue, 3 Dec 2024 19:44:12 +0100
Subject: [PATCH 2/7] assert to help proofs

---
 libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst b/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst
index f598ee0ff..0ea02db6c 100644
--- a/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst
+++ b/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst
@@ -11,13 +11,14 @@ open Spec.MLKEM
 
 let mlkem768_rank : rank = sz 3
 
-#push-options "--z3rlimit 300"
+#set-options "--z3rlimit 350"
 let mlkem768_generate_keypair (randomness:t_Array u8 (sz 64)):
                               (t_Array u8 (sz 2400) & t_Array u8 (sz 1184)) & bool =
     ind_cca_generate_keypair mlkem768_rank randomness
 
 let mlkem768_encapsulate (public_key: t_Array u8 (sz 1184)) (randomness: t_Array u8 (sz 32)):
                          (t_Array u8 (sz 1088) & t_Array u8 (sz 32)) & bool =
+    assert (v_CPA_CIPHERTEXT_SIZE mlkem768_rank == sz 1088);            
     ind_cca_encapsulate mlkem768_rank public_key randomness
 
 let mlkem768_decapsulate (secret_key: t_Array u8 (sz 2400)) (ciphertext: t_Array u8 (sz 1088)):
@@ -32,7 +33,6 @@ let mlkem1024_generate_keypair (randomness:t_Array u8 (sz 64)):
                                (t_Array u8 (sz 3168) & t_Array u8 (sz 1568)) & bool =
     ind_cca_generate_keypair mlkem1024_rank randomness
 
-#set-options "--z3rlimit 100"
 let mlkem1024_encapsulate (public_key: t_Array u8 (sz 1568)) (randomness: t_Array u8 (sz 32)):
                           (t_Array u8 (sz 1568) & t_Array u8 (sz 32)) & bool  =
     assert (v_CPA_CIPHERTEXT_SIZE mlkem1024_rank == sz 1568);            

From cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 Mon Sep 17 00:00:00 2001
From: karthikbhargavan <karthik.bhargavan@gmail.com>
Date: Tue, 3 Dec 2024 18:45:17 +0000
Subject: [PATCH 3/7] fmt

---
 libcrux-ml-kem/c/code_gen.txt                 |   10 +-
 libcrux-ml-kem/c/internal/libcrux_core.h      |   10 +-
 .../c/internal/libcrux_mlkem_avx2.h           |   10 +-
 .../c/internal/libcrux_mlkem_portable.h       |   12 +-
 libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h |   10 +-
 .../c/internal/libcrux_sha3_internal.h        |   78 +-
 libcrux-ml-kem/c/libcrux_core.c               |   10 +-
 libcrux-ml-kem/c/libcrux_core.h               |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024.h          |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c     |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h     |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_portable.c |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_portable.h |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512.h           |   70 +-
 libcrux-ml-kem/c/libcrux_mlkem512_avx2.c      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512_avx2.h      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512_portable.c  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512_portable.h  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768.h           |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_avx2.c      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_avx2.h      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_portable.c  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_portable.h  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem_avx2.c         | 1385 +++++++++++++----
 libcrux-ml-kem/c/libcrux_mlkem_avx2.h         |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem_portable.c     |  455 ++++--
 libcrux-ml-kem/c/libcrux_mlkem_portable.h     |   10 +-
 libcrux-ml-kem/c/libcrux_sha3.h               |   10 +-
 libcrux-ml-kem/c/libcrux_sha3_avx2.c          |   52 +-
 libcrux-ml-kem/c/libcrux_sha3_avx2.h          |   10 +-
 libcrux-ml-kem/c/libcrux_sha3_internal.h      |   16 +-
 libcrux-ml-kem/c/libcrux_sha3_neon.c          |   30 +-
 libcrux-ml-kem/c/libcrux_sha3_neon.h          |   10 +-
 libcrux-ml-kem/src/invert_ntt.rs              |    7 +-
 libcrux-ml-kem/src/mlkem512.rs                |   12 +-
 libcrux-ml-kem/src/ntt.rs                     |   10 +-
 libcrux-ml-kem/src/polynomial.rs              |    4 +-
 37 files changed, 1739 insertions(+), 642 deletions(-)

diff --git a/libcrux-ml-kem/c/code_gen.txt b/libcrux-ml-kem/c/code_gen.txt
index 420446603..8606206e0 100644
--- a/libcrux-ml-kem/c/code_gen.txt
+++ b/libcrux-ml-kem/c/code_gen.txt
@@ -1,6 +1,6 @@
 This code was generated with the following revisions:
-Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
-Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
-Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
-F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
-Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
diff --git a/libcrux-ml-kem/c/internal/libcrux_core.h b/libcrux-ml-kem/c/internal/libcrux_core.h
index 69032a33e..fe0dc7d7d 100644
--- a/libcrux-ml-kem/c/internal/libcrux_core.h
+++ b/libcrux-ml-kem/c/internal/libcrux_core.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __internal_libcrux_core_H
diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h
index 9baf58ca5..48345a968 100644
--- a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h
+++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __internal_libcrux_mlkem_avx2_H
diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h
index 7ba532d5e..e89d87311 100644
--- a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h
+++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __internal_libcrux_mlkem_portable_H
@@ -23,7 +23,7 @@ extern "C" {
 #include "internal/libcrux_core.h"
 #include "internal/libcrux_sha3_internal.h"
 
-int16_t libcrux_ml_kem_polynomial_get_zeta(size_t i);
+int16_t libcrux_ml_kem_polynomial_zeta(size_t i);
 
 #define LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT  \
   (LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / \
diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h
index 0d99b2edd..78fe0a95b 100644
--- a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h
+++ b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __internal_libcrux_sha3_avx2_H
diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h
index 6d47ffcbc..92381f50f 100644
--- a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h
+++ b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __internal_libcrux_sha3_internal_H
@@ -273,8 +273,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (self->buf_len + input_len >= (size_t)136U) {
-      consumed = (size_t)136U - self->buf_len;
+    if (
+        /* There's something buffered internally to consume. */ self->buf_len +
+            input_len >=
+        (size_t)136U) {
+      consumed = (size_t)136U - /* We have enough data when combining the
+                                   internal buffer and the input. */
+                 self->buf_len;
       {
         size_t i = (size_t)0U;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -380,7 +385,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs);
-  if (input_remainder_len > (size_t)0U) {
+  if (
+      /* ... buffer the rest if there's not enough input (left). */
+      input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     {
       size_t i = (size_t)0U;
@@ -727,8 +734,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (self->buf_len + input_len >= (size_t)168U) {
-      consumed = (size_t)168U - self->buf_len;
+    if (
+        /* There's something buffered internally to consume. */ self->buf_len +
+            input_len >=
+        (size_t)168U) {
+      consumed = (size_t)168U - /* We have enough data when combining the
+                                   internal buffer and the input. */
+                 self->buf_len;
       {
         size_t i = (size_t)0U;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -834,7 +846,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs);
-  if (input_remainder_len > (size_t)0U) {
+  if (
+      /* ... buffer the rest if there's not enough input (left). */
+      input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     {
       size_t i = (size_t)0U;
@@ -1224,7 +1238,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   size_t blocks = out_len / (size_t)136U;
   size_t last = out_len - out_len % (size_t)136U;
   size_t mid;
-  if ((size_t)136U >= out_len) {
+  if ((size_t)136U >=
+      /* Squeeze out one to start with. XXX: Eurydice does not extract
+         `core::cmp::min`, so we do this instead. (cf.
+         https://github.com/AeneasVerif/eurydice/issues/49) */
+      out_len
+
+  ) {
     mid = out_len;
   } else {
     mid = (size_t)136U;
@@ -1238,8 +1258,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
-                                             .end = blocks}),
+          (CLITERAL(core_ops_range_Range_08){
+              .start = (size_t)1U,
+              .end = /* If we got asked for more than one block, squeeze out
+                        more. */
+              blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -1248,7 +1271,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
+                                                            always have full
+                                                            blocks to write out.
+                                                          */
+                                                         out_rest,
                                                          (size_t)136U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));
@@ -1343,7 +1370,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   size_t blocks = out_len / (size_t)168U;
   size_t last = out_len - out_len % (size_t)168U;
   size_t mid;
-  if ((size_t)168U >= out_len) {
+  if ((size_t)168U >=
+      /* Squeeze out one to start with. XXX: Eurydice does not extract
+         `core::cmp::min`, so we do this instead. (cf.
+         https://github.com/AeneasVerif/eurydice/issues/49) */
+      out_len
+
+  ) {
     mid = out_len;
   } else {
     mid = (size_t)168U;
@@ -1357,8 +1390,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
-                                             .end = blocks}),
+          (CLITERAL(core_ops_range_Range_08){
+              .start = (size_t)1U,
+              .end = /* If we got asked for more than one block, squeeze out
+                        more. */
+              blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -1367,7 +1403,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
+                                                            always have full
+                                                            blocks to write out.
+                                                          */
+                                                         out_rest,
                                                          (size_t)168U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));
diff --git a/libcrux-ml-kem/c/libcrux_core.c b/libcrux-ml-kem/c/libcrux_core.c
index 03c9cddb6..de354115a 100644
--- a/libcrux-ml-kem/c/libcrux_core.c
+++ b/libcrux-ml-kem/c/libcrux_core.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "internal/libcrux_core.h"
diff --git a/libcrux-ml-kem/c/libcrux_core.h b/libcrux-ml-kem/c/libcrux_core.h
index f1e63c7a9..55c5c5d8e 100644
--- a/libcrux-ml-kem/c/libcrux_core.h
+++ b/libcrux-ml-kem/c/libcrux_core.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_core_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024.h b/libcrux-ml-kem/c/libcrux_mlkem1024.h
index 6ba68daf6..37334a9b1 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem1024_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c
index 6aa0b5776..778d6fbf3 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "libcrux_mlkem1024_avx2.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h
index c662e3584..854751c45 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem1024_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c
index bc4294748..e463cb267 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "libcrux_mlkem1024_portable.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h
index 90211f1e5..430c904d1 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem1024_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512.h b/libcrux-ml-kem/c/libcrux_mlkem512.h
index d27735aa5..fb7755a5a 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem512.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem512_H
@@ -21,28 +21,52 @@ extern "C" {
 #include "eurydice_glue.h"
 #include "libcrux_core.h"
 
-#define LIBCRUX_ML_KEM_MLKEM512_C1_BLOCK_SIZE_512 ((size_t)320U)
+#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_U_COMPRESSION_FACTOR_512 ((size_t)10U)
+
+#define LIBCRUX_ML_KEM_MLKEM512_C1_BLOCK_SIZE_512          \
+  (LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT * \
+   LIBCRUX_ML_KEM_MLKEM512_VECTOR_U_COMPRESSION_FACTOR_512 / (size_t)8U)
+
+#define LIBCRUX_ML_KEM_MLKEM512_RANK_512 ((size_t)2U)
+
+#define LIBCRUX_ML_KEM_MLKEM512_C1_SIZE_512 \
+  (LIBCRUX_ML_KEM_MLKEM512_C1_BLOCK_SIZE_512 * LIBCRUX_ML_KEM_MLKEM512_RANK_512)
+
+#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_V_COMPRESSION_FACTOR_512 ((size_t)4U)
 
-#define LIBCRUX_ML_KEM_MLKEM512_C1_SIZE_512 ((size_t)640U)
+#define LIBCRUX_ML_KEM_MLKEM512_C2_SIZE_512                \
+  (LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT * \
+   LIBCRUX_ML_KEM_MLKEM512_VECTOR_V_COMPRESSION_FACTOR_512 / (size_t)8U)
 
-#define LIBCRUX_ML_KEM_MLKEM512_C2_SIZE_512 ((size_t)128U)
+#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_CIPHERTEXT_SIZE_512 \
+  (LIBCRUX_ML_KEM_MLKEM512_C1_SIZE_512 + LIBCRUX_ML_KEM_MLKEM512_C2_SIZE_512)
 
-#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_CIPHERTEXT_SIZE_512 ((size_t)768U)
+#define LIBCRUX_ML_KEM_MLKEM512_T_AS_NTT_ENCODED_SIZE_512  \
+  (LIBCRUX_ML_KEM_MLKEM512_RANK_512 *                      \
+   LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT * \
+   LIBCRUX_ML_KEM_CONSTANTS_BITS_PER_COEFFICIENT / (size_t)8U)
 
-#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_PUBLIC_KEY_SIZE_512 ((size_t)800U)
+#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_PUBLIC_KEY_SIZE_512 \
+  (LIBCRUX_ML_KEM_MLKEM512_T_AS_NTT_ENCODED_SIZE_512 + (size_t)32U)
 
-#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_SECRET_KEY_SIZE_512 ((size_t)768U)
+#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_SECRET_KEY_SIZE_512 \
+  (LIBCRUX_ML_KEM_MLKEM512_RANK_512 *                       \
+   LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT *  \
+   LIBCRUX_ML_KEM_CONSTANTS_BITS_PER_COEFFICIENT / (size_t)8U)
 
 #define LIBCRUX_ML_KEM_MLKEM512_ETA1 ((size_t)3U)
 
-#define LIBCRUX_ML_KEM_MLKEM512_ETA1_RANDOMNESS_SIZE ((size_t)192U)
+#define LIBCRUX_ML_KEM_MLKEM512_ETA1_RANDOMNESS_SIZE \
+  (LIBCRUX_ML_KEM_MLKEM512_ETA1 * (size_t)64U)
 
 #define LIBCRUX_ML_KEM_MLKEM512_ETA2 ((size_t)2U)
 
-#define LIBCRUX_ML_KEM_MLKEM512_ETA2_RANDOMNESS_SIZE ((size_t)128U)
+#define LIBCRUX_ML_KEM_MLKEM512_ETA2_RANDOMNESS_SIZE \
+  (LIBCRUX_ML_KEM_MLKEM512_ETA2 * (size_t)64U)
 
 #define LIBCRUX_ML_KEM_MLKEM512_IMPLICIT_REJECTION_HASH_INPUT_SIZE \
-  ((size_t)800U)
+  (LIBCRUX_ML_KEM_CONSTANTS_SHARED_SECRET_SIZE +                   \
+   LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_CIPHERTEXT_SIZE_512)
 
 typedef libcrux_ml_kem_types_MlKemCiphertext_1a
     libcrux_ml_kem_mlkem512_MlKem512Ciphertext;
@@ -56,17 +80,15 @@ typedef libcrux_ml_kem_types_MlKemPrivateKey_fa
 typedef libcrux_ml_kem_types_MlKemPublicKey_52
     libcrux_ml_kem_mlkem512_MlKem512PublicKey;
 
-#define LIBCRUX_ML_KEM_MLKEM512_RANKED_BYTES_PER_RING_ELEMENT_512 ((size_t)768U)
+#define LIBCRUX_ML_KEM_MLKEM512_RANKED_BYTES_PER_RING_ELEMENT_512 \
+  (LIBCRUX_ML_KEM_MLKEM512_RANK_512 *                             \
+   LIBCRUX_ML_KEM_CONSTANTS_BITS_PER_RING_ELEMENT / (size_t)8U)
 
-#define LIBCRUX_ML_KEM_MLKEM512_RANK_512 ((size_t)2U)
-
-#define LIBCRUX_ML_KEM_MLKEM512_SECRET_KEY_SIZE_512 ((size_t)1632U)
-
-#define LIBCRUX_ML_KEM_MLKEM512_T_AS_NTT_ENCODED_SIZE_512 ((size_t)768U)
-
-#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_U_COMPRESSION_FACTOR_512 ((size_t)10U)
-
-#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_V_COMPRESSION_FACTOR_512 ((size_t)4U)
+#define LIBCRUX_ML_KEM_MLKEM512_SECRET_KEY_SIZE_512      \
+  (LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_SECRET_KEY_SIZE_512 + \
+   LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_PUBLIC_KEY_SIZE_512 + \
+   LIBCRUX_ML_KEM_CONSTANTS_H_DIGEST_SIZE +              \
+   LIBCRUX_ML_KEM_CONSTANTS_SHARED_SECRET_SIZE)
 
 #if defined(__cplusplus)
 }
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c
index b45c8295b..3e9fbd0cc 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "libcrux_mlkem512_avx2.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h
index d5ec40d83..79012290d 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem512_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c
index 2fc72d307..8639c4603 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "libcrux_mlkem512_portable.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h
index 6e3d9755b..faea31c8a 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem512_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768.h b/libcrux-ml-kem/c/libcrux_mlkem768.h
index bcfb76ff3..474b96082 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem768.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem768_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c
index fdf226bd8..a7a0f7e7d 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "libcrux_mlkem768_avx2.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h
index 08c3fa5b7..35608499b 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem768_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c
index c59bc0046..2d21b9d89 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "libcrux_mlkem768_portable.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h
index 03f9d22a4..514894426 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem768_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c
index 61f343a77..64e5d2462 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "internal/libcrux_mlkem_avx2.h"
@@ -141,11 +141,16 @@ KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) {
   __m256i field_modulus =
       mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
-  __m256i v_minus_field_modulus = mm256_sub_epi16(vector, field_modulus);
+  __m256i v_minus_field_modulus =
+      mm256_sub_epi16(/* Compute v_i - Q and crate a mask from the sign bit of
+                         each of these quantities. */
+                      vector,
+                      field_modulus);
   __m256i sign_mask =
       mm256_srai_epi16((int32_t)15, v_minus_field_modulus, __m256i);
-  __m256i conditional_add_field_modulus =
-      mm256_and_si256(sign_mask, field_modulus);
+  __m256i conditional_add_field_modulus = mm256_and_si256(
+      /* If v_i - Q < 0 then add back Q to (v_i - Q). */ sign_mask,
+      field_modulus);
   return mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus);
 }
 
@@ -450,6 +455,7 @@ libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(__m256i vec) {
 KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
     __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2,
     int16_t zeta3) {
+  /* Compute the first term of the product */
   __m256i shuffle_with = mm256_set_epi8(
       (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6,
       (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8,
@@ -457,7 +463,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2,
       (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4,
       (int8_t)1, (int8_t)0);
-  __m256i lhs_shuffled = mm256_shuffle_epi8(lhs, shuffle_with);
+  __m256i lhs_shuffled =
+      mm256_shuffle_epi8(/* Prepare the left hand side */ lhs, shuffle_with);
   __m256i lhs_shuffled0 =
       mm256_permute4x64_epi64((int32_t)216, lhs_shuffled, __m256i);
   __m128i lhs_evens = mm256_castsi256_si128(lhs_shuffled0);
@@ -465,7 +472,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i lhs_odds =
       mm256_extracti128_si256((int32_t)1, lhs_shuffled0, __m128i);
   __m256i lhs_odds0 = mm256_cvtepi16_epi32(lhs_odds);
-  __m256i rhs_shuffled = mm256_shuffle_epi8(rhs, shuffle_with);
+  __m256i rhs_shuffled =
+      mm256_shuffle_epi8(/* Prepare the right hand side */ rhs, shuffle_with);
   __m256i rhs_shuffled0 =
       mm256_permute4x64_epi64((int32_t)216, rhs_shuffled, __m256i);
   __m128i rhs_evens = mm256_castsi256_si128(rhs_shuffled0);
@@ -473,7 +481,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i rhs_odds =
       mm256_extracti128_si256((int32_t)1, rhs_shuffled0, __m128i);
   __m256i rhs_odds0 = mm256_cvtepi16_epi32(rhs_odds);
-  __m256i left = mm256_mullo_epi32(lhs_evens0, rhs_evens0);
+  __m256i left =
+      mm256_mullo_epi32(/* Start operating with them */ lhs_evens0, rhs_evens0);
   __m256i right = mm256_mullo_epi32(lhs_odds0, rhs_odds0);
   __m256i right0 =
       libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(right);
@@ -486,7 +495,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(
           products_left);
   __m256i rhs_adjacent_swapped = mm256_shuffle_epi8(
-      rhs,
+      /* Compute the second term of the product */ rhs,
       mm256_set_epi8((int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9,
                      (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4,
                      (int8_t)7, (int8_t)6, (int8_t)1, (int8_t)0, (int8_t)3,
@@ -500,8 +509,9 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
           products_right);
   __m256i products_right1 =
       mm256_slli_epi32((int32_t)16, products_right0, __m256i);
-  return mm256_blend_epi16((int32_t)170, products_left0, products_right1,
-                           __m256i);
+  return mm256_blend_epi16((int32_t)170,
+                           /* Combine them into one vector */ products_left0,
+                           products_right1, __m256i);
 }
 
 /**
@@ -517,11 +527,44 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09(
 
 KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1(
     __m256i vector, uint8_t ret[2U]) {
-  __m256i lsb_to_msb = mm256_slli_epi16((int32_t)15, vector, __m256i);
-  __m128i low_msbs = mm256_castsi256_si128(lsb_to_msb);
-  __m128i high_msbs = mm256_extracti128_si256((int32_t)1, lsb_to_msb, __m128i);
-  __m128i msbs = mm_packs_epi16(low_msbs, high_msbs);
-  int32_t bits_packed = mm_movemask_epi8(msbs);
+  __m256i lsb_to_msb = mm256_slli_epi16(
+      (int32_t)15,
+      /* Suppose |vector| is laid out as follows (superscript number indicates
+         the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀
+         0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least
+         significant bit in each lane, move it to the most significant position
+         to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵
+         d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵
+         n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */
+      vector, __m256i);
+  __m128i low_msbs = mm256_castsi256_si128(
+      /* Get the first 8 16-bit elements ... */ lsb_to_msb);
+  __m128i high_msbs = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i);
+  __m128i msbs =
+      mm_packs_epi16(/* ... and then pack them into 8-bit values using signed
+                        saturation. This function packs all the |low_msbs|, and
+                        then the high ones. low_msbs = a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ |
+                        e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ j₀0¹⁵ k₀0¹⁵
+                        l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ We shifted by 15 above
+                        to take advantage of the signed saturation performed by
+                        mm_packs_epi16: - if the sign bit of the 16-bit element
+                        being packed is 1, the corresponding 8-bit element in
+                        |msbs| will be 0xFF. - if the sign bit of the 16-bit
+                        element being packed is 0, the corresponding 8-bit
+                        element in |msbs| will be 0. Thus, if, for example, a₀ =
+                        1, e₀ = 1, and p₀ = 1, and every other bit is 0, after
+                        packing into 8 bit value, |msbs| will look like: 0xFF
+                        0x00 0x00 0x00 | 0xFF 0x00 0x00 0x00 | 0x00 0x00 0x00
+                        0x00 | 0x00 0x00 0x00 0xFF */
+                     low_msbs,
+                     high_msbs);
+  int32_t bits_packed =
+      mm_movemask_epi8(/* Now that every element is either 0xFF or 0x00, we just
+                          extract the most significant bit from each element and
+                          collate them into two bytes. */
+                       msbs);
   uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)};
   memcpy(ret, result, (size_t)2U * sizeof(uint8_t));
 }
@@ -539,16 +582,39 @@ KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s(
     int16_t a, int16_t b) {
   __m256i coefficients =
-      mm256_set_epi16(b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a);
-  __m256i coefficients_in_msb = mm256_mullo_epi16(
-      coefficients,
-      mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U,
-                      (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U,
-                      (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U,
-                      (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U,
-                      (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U,
-                      (int16_t)-32768));
-  return mm256_srli_epi16((int32_t)15, coefficients_in_msb, __m256i);
+      mm256_set_epi16(/* We need to take each bit from the 2 bytes of input and
+                         put them into their own 16-bit lane. Ideally, we'd load
+                         the two bytes into the vector, duplicate them, and
+                         right-shift the 0th element by 0 bits, the first
+                         element by 1 bit, the second by 2 bits and so on before
+                         AND-ing with 0x1 to leave only the least signifinicant
+                         bit. But since |_mm256_srlv_epi16| does not exist, so
+                         we have to resort to a workaround. Rather than shifting
+                         each element by a different amount, we'll multiply each
+                         element by a value such that the bit we're interested
+                         in becomes the most significant bit. The coefficients
+                         are loaded as follows: */
+                      b,
+                      b, b, b, b, b, b, b, a, a, a, a, a, a, a, a);
+  __m256i coefficients_in_msb =
+      mm256_mullo_epi16(/* And this vector, when multiplied with the previous
+                           one, ensures that the bit we'd like to keep in each
+                           lane becomes the most significant bit upon
+                           multiplication. */
+                        coefficients,
+                        mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U,
+                                        (int16_t)1 << 10U, (int16_t)1 << 11U,
+                                        (int16_t)1 << 12U, (int16_t)1 << 13U,
+                                        (int16_t)1 << 14U, (int16_t)-32768,
+                                        (int16_t)1 << 8U, (int16_t)1 << 9U,
+                                        (int16_t)1 << 10U, (int16_t)1 << 11U,
+                                        (int16_t)1 << 12U, (int16_t)1 << 13U,
+                                        (int16_t)1 << 14U, (int16_t)-32768));
+  return mm256_srli_epi16(
+      (int32_t)15,
+      /* Now that they're all in the most significant bit position, shift them
+         down to the least significant bit. */
+      coefficients_in_msb, __m256i);
 }
 
 KRML_MUSTINLINE __m256i
@@ -561,7 +627,23 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s(
 KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s(
-      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(
+          bytes,
+          /* We need to take each bit from the 2 bytes of input and put them
+             into their own 16-bit lane. Ideally, we'd load the two bytes into
+             the vector, duplicate them, and right-shift the 0th element by 0
+             bits, the first element by 1 bit, the second by 2 bits and so on
+             before AND-ing with 0x1 to leave only the least signifinicant bit.
+             But since |_mm256_srlv_epi16| does not exist, so we have to resort
+             to a workaround. Rather than shifting each element by a different
+             amount, we'll multiply each element by a value such that the bit
+             we're interested in becomes the most significant bit. The
+             coefficients are loaded as follows: And this vector, when
+             multiplied with the previous one, ensures that the bit we'd like to
+             keep in each lane becomes the most significant bit upon
+             multiplication. Now that they're all in the most significant bit
+             position, shift them down to the least significant bit. */
+          (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *));
 }
 
@@ -594,23 +676,47 @@ KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4(
     __m256i vector, uint8_t ret[8U]) {
   uint8_t serialized[16U] = {0U};
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector);
-  __m256i adjacent_8_combined = mm256_shuffle_epi8(
-      adjacent_2_combined,
-      mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                     (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                     (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4,
-                     (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                     (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                     (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8,
-                     (int8_t)4, (int8_t)0));
-  __m256i combined = mm256_permutevar8x32_epi32(
-      adjacent_8_combined,
-      mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0,
-                      (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
+          4U,
+          /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D |
+             0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be
+             laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA
+             0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */
+          vector);
+  __m256i adjacent_8_combined =
+      mm256_shuffle_epi8(/* Recall that |adjacent_2_combined| goes as follows:
+                            0x00_00_00_BA 0x00_00_00_DC | 0x00_00_00_FE
+                            0x00_00_00_HG | ... Out of this, we only need the
+                            first byte, the 4th byte, the 8th byte and so on
+                            from the bottom and the top 128 bits. */
+                         adjacent_2_combined,
+                         mm256_set_epi8(
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0,
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0));
+  __m256i combined =
+      mm256_permutevar8x32_epi32(/* |adjacent_8_combined| looks like this: 0:
+                                    0xHG_FE_DC_BA 1: 0x00_00_00_00 | 2:
+                                    0x00_00_00_00 3: 0x00_00_00_00 | 4:
+                                    0xPO_NM_LK_JI .... We put the element at 4
+                                    after the element at 0 ... */
+                                 adjacent_8_combined,
+                                 mm256_set_epi32((int32_t)0, (int32_t)0,
+                                                 (int32_t)0, (int32_t)0,
+                                                 (int32_t)0, (int32_t)0,
+                                                 (int32_t)4, (int32_t)0));
   __m128i combined0 = mm256_castsi256_si128(combined);
   mm_storeu_bytes_si128(
-      Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0);
+      Eurydice_array_to_slice(
+          (size_t)16U,
+          /* ... so that we can read them out in one go. */ serialized,
+          uint8_t),
+      combined0);
   uint8_t ret0[8U];
   core_result_Result_15 dst;
   Eurydice_slice_to_array2(
@@ -634,8 +740,23 @@ KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
     int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5,
     int16_t b6, int16_t b7) {
-  __m256i coefficients = mm256_set_epi16(b7, b7, b6, b6, b5, b5, b4, b4, b3, b3,
-                                         b2, b2, b1, b1, b0, b0);
+  __m256i coefficients =
+      mm256_set_epi16(/* Every 4 bits from each byte of input should be put into
+                         its own 16-bit lane. Since |_mm256_srlv_epi16| does not
+                         exist, we have to resort to a workaround. Rather than
+                         shifting each element by a different amount, we'll
+                         multiply each element by a value such that the bits
+                         we're interested in become the most significant bits
+                         (of an 8-bit value). In this lane, the 4 bits we need
+                         to put are already the most significant bits of
+                         |bytes[7]| (that is, b7). */
+                      b7,
+                      /* In this lane, the 4 bits we need to put are the least
+                         significant bits, so we need to shift the 4
+                         least-significant bits of |b7| to the most significant
+                         bits (of an 8-bit value). */
+                      b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0,
+                      b0);
   __m256i coefficients_in_msb = mm256_mullo_epi16(
       coefficients,
       mm256_set_epi16((int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
@@ -644,9 +765,12 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
                       (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U,
                       (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
                       (int16_t)1 << 4U));
-  __m256i coefficients_in_lsb =
-      mm256_srli_epi16((int32_t)4, coefficients_in_msb, __m256i);
-  return mm256_and_si256(coefficients_in_lsb,
+  __m256i coefficients_in_lsb = mm256_srli_epi16(
+      (int32_t)4,
+      /* Once the 4-bit coefficients are in the most significant positions (of
+         an 8-bit value), shift them all down by 4. */
+      coefficients_in_msb, __m256i);
+  return mm256_and_si256(/* Zero the remaining bits. */ coefficients_in_lsb,
                          mm256_set1_epi16(((int16_t)1 << 4U) - (int16_t)1));
 }
 
@@ -662,7 +786,23 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s(
 KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s(
-      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(
+          bytes,
+          /* Every 4 bits from each byte of input should be put into its own
+             16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to
+             resort to a workaround. Rather than shifting each element by a
+             different amount, we'll multiply each element by a value such that
+             the bits we're interested in become the most significant bits (of
+             an 8-bit value). In this lane, the 4 bits we need to put are
+             already the most significant bits of |bytes[7]| (that is, b7). In
+             this lane, the 4 bits we need to put are the least significant
+             bits, so we need to shift the 4 least-significant bits of |b7| to
+             the most significant bits (of an 8-bit value). These constants are
+             chosen to shift the bits of the values that we loaded into
+             |coefficients|. Once the 4-bit coefficients are in the most
+             significant positions (of an 8-bit value), shift them all down
+             by 4. Zero the remaining bits. */
+          (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *),
@@ -684,32 +824,78 @@ libcrux_ml_kem_vector_avx2_deserialize_4_09(Eurydice_slice bytes) {
 KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5(
     __m256i vector, uint8_t ret[10U]) {
   uint8_t serialized[32U] = {0U};
-  __m256i adjacent_2_combined = mm256_madd_epi16(
-      vector, mm256_set_epi16(
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1));
-  __m256i adjacent_4_combined = mm256_sllv_epi32(
-      adjacent_2_combined,
-      mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22,
-                      (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22));
-  __m256i adjacent_4_combined0 =
-      mm256_srli_epi64((int32_t)22, adjacent_4_combined, __m256i);
-  __m256i adjacent_8_combined =
-      mm256_shuffle_epi32((int32_t)8, adjacent_4_combined0, __m256i);
-  __m256i adjacent_8_combined0 = mm256_sllv_epi32(
-      adjacent_8_combined,
-      mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12,
-                      (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12));
+  __m256i adjacent_2_combined =
+      mm256_madd_epi16(/* If |vector| is laid out as follows (superscript number
+                          indicates the corresponding bit is duplicated that
+                          many times): 0¹¹a₄a₃a₂a₁a₀ 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀
+                          0¹¹d₄d₃d₂d₁d₀ | ↩ 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀
+                          0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | ↩ |adjacent_2_combined|
+                          will be laid out as a series of 32-bit integers, as
+                          follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
+                          0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
+                          0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... */
+                       vector,
+                       mm256_set_epi16(
+                           (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U,
+                           (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                           (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U,
+                           (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                           (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U,
+                           (int16_t)1));
+  __m256i adjacent_4_combined =
+      mm256_sllv_epi32(/* Recall that |adjacent_2_combined| is laid out as
+                          follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
+                          0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
+                          0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... This shift results
+                          in: b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ |
+                          ↩ f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
+                          .... */
+                       adjacent_2_combined,
+                       mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0,
+                                       (int32_t)22, (int32_t)0, (int32_t)22,
+                                       (int32_t)0, (int32_t)22));
+  __m256i adjacent_4_combined0 = mm256_srli_epi64(
+      (int32_t)22,
+      /* |adjacent_4_combined|, when viewed as 64-bit lanes, is:
+         0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩
+         0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift
+         down by 22 bits to remove the least significant 0 bits that aren't part
+         of the bits we need. */
+      adjacent_4_combined, __m256i);
+  __m256i adjacent_8_combined = mm256_shuffle_epi32(
+      (int32_t)8,
+      /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks
+         like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³²
+         2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to
+         read out the bytes in one go, we need to shifts the bits in position 2
+         to position 1 in each 128-bit lane. */
+      adjacent_4_combined0, __m256i);
+  __m256i adjacent_8_combined0 =
+      mm256_sllv_epi32(/* |adjacent_8_combined|, when viewed as a set of 32-bit
+                          values, now looks like:
+                          0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
+                          0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 0³² 0³² |
+                          ↩ Once again, we line these bits up by shifting the up
+                          values at indices 0 and 5 by 12, viewing the resulting
+                          register as a set of 64-bit values, and then shifting
+                          down the 64-bit values by 12 bits. */
+                       adjacent_8_combined,
+                       mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0,
+                                       (int32_t)12, (int32_t)0, (int32_t)0,
+                                       (int32_t)0, (int32_t)12));
   __m256i adjacent_8_combined1 =
       mm256_srli_epi64((int32_t)12, adjacent_8_combined0, __m256i);
-  __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined1);
+  __m128i lower_8 =
+      mm256_castsi256_si128(/* We now have 40 bits starting at position 0 in the
+                               lower 128-bit lane, ... */
+                            adjacent_8_combined1);
   mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t),
       lower_8);
-  __m128i upper_8 =
-      mm256_extracti128_si256((int32_t)1, adjacent_8_combined1, __m128i);
+  __m128i upper_8 = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ... and the second 40 bits at position 0 in the upper 128-bit lane */
+      adjacent_8_combined1, __m128i);
   mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t),
       upper_8);
@@ -803,25 +989,67 @@ core_core_arch_x86___m128i_x2
 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
     __m256i vector) {
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector);
-  __m256i adjacent_4_combined = mm256_sllv_epi32(
-      adjacent_2_combined,
-      mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12,
-                      (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
+          10U,
+          /* If |vector| is laid out as follows (superscript number indicates
+             the corresponding bit is duplicated that many times):
+             0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
+             0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩
+             0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
+             0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ...
+             |adjacent_2_combined| will be laid out as a series of 32-bit
+             integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+             0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩
+             0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+             0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */
+          vector);
+  __m256i adjacent_4_combined =
+      mm256_sllv_epi32(/* Shifting up the values at the even indices by 12, we
+                          get: b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+                          0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩
+                          f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+                          0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ ... */
+                       adjacent_2_combined,
+                       mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0,
+                                       (int32_t)12, (int32_t)0, (int32_t)12,
+                                       (int32_t)0, (int32_t)12));
   __m256i adjacent_4_combined0 =
-      mm256_srli_epi64((int32_t)12, adjacent_4_combined, __m256i);
-  __m256i adjacent_8_combined = mm256_shuffle_epi8(
-      adjacent_4_combined0,
-      mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                     (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9,
-                     (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1,
-                     (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                     (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10,
-                     (int8_t)9, (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2,
-                     (int8_t)1, (int8_t)0));
-  __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined);
-  __m128i upper_8 =
-      mm256_extracti128_si256((int32_t)1, adjacent_8_combined, __m128i);
+      mm256_srli_epi64((int32_t)12,
+                       /* Viewing this as a set of 64-bit integers we get:
+                          0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+                          | ↩
+                          0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+                          | ↩ ... Shifting down by 12 gives us:
+                          0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+                          | ↩
+                          0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+                          | ↩ ... */
+                       adjacent_4_combined, __m256i);
+  __m256i adjacent_8_combined =
+      mm256_shuffle_epi8(/* |adjacent_4_combined|, when the bottom and top 128
+                            bit-lanes are grouped into bytes, looks like:
+                            0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ In
+                            each 128-bit lane, we want to put bytes 8, 9, 10,
+                            11, 12 after bytes 0, 1, 2, 3 to allow for
+                            sequential reading. */
+                         adjacent_4_combined0,
+                         mm256_set_epi8(
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11,
+                             (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4,
+                             (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0,
+                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                             (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11,
+                             (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4,
+                             (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0));
+  __m128i lower_8 =
+      mm256_castsi256_si128(/* We now have 64 bits starting at position 0 in the
+                               lower 128-bit lane, ... */
+                            adjacent_8_combined);
+  __m128i upper_8 = mm256_extracti128_si256(
+      (int32_t)1,
+      /* and 64 bits starting at position 0 in the upper 128-bit lane. */
+      adjacent_8_combined, __m128i);
   return (
       CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8});
 }
@@ -829,8 +1057,167 @@ libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
 KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10(
     __m256i vector, uint8_t ret[20U]) {
   core_core_arch_x86___m128i_x2 uu____0 =
-      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
-          vector);
+      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If
+                                                                            |vector|
+                                                                            is
+                                                                            laid
+                                                                            out
+                                                                            as
+                                                                            follows
+                                                                            (superscript
+                                                                            number
+                                                                            indicates
+                                                                            the
+                                                                            corresponding
+                                                                            bit
+                                                                            is
+                                                                            duplicated
+                                                                            that
+                                                                            many
+                                                                            times):
+                                                                            0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+                                                                            0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
+                                                                            0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
+                                                                            0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀
+                                                                            | ↩
+                                                                            0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+                                                                            0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
+                                                                            0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
+                                                                            0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀
+                                                                            | ↩
+                                                                            ...
+                                                                            |adjacent_2_combined|
+                                                                            will
+                                                                            be
+                                                                            laid
+                                                                            out
+                                                                            as a
+                                                                            series
+                                                                            of
+                                                                            32-bit
+                                                                            integers,
+                                                                            as
+                                                                            follows:
+                                                                            0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
+                                                                            | ↩
+                                                                            0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
+                                                                            | ↩
+                                                                            ....
+                                                                            Shifting
+                                                                            up
+                                                                            the
+                                                                            values
+                                                                            at
+                                                                            the
+                                                                            even
+                                                                            indices
+                                                                            by
+                                                                            12,
+                                                                            we
+                                                                            get:
+                                                                            b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
+                                                                            | ↩
+                                                                            f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
+                                                                            | ↩
+                                                                            ...
+                                                                            Viewing
+                                                                            this
+                                                                            as a
+                                                                            set
+                                                                            of
+                                                                            64-bit
+                                                                            integers
+                                                                            we
+                                                                            get:
+                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+                                                                            | ↩
+                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+                                                                            | ↩
+                                                                            ...
+                                                                            Shifting
+                                                                            down
+                                                                            by
+                                                                            12
+                                                                            gives
+                                                                            us:
+                                                                            0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+                                                                            | ↩
+                                                                            0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+                                                                            | ↩
+                                                                            ...
+                                                                            |adjacent_4_combined|,
+                                                                            when
+                                                                            the
+                                                                            bottom
+                                                                            and
+                                                                            top
+                                                                            128
+                                                                            bit-lanes
+                                                                            are
+                                                                            grouped
+                                                                            into
+                                                                            bytes,
+                                                                            looks
+                                                                            like:
+                                                                            0₇0₆0₅B₄B₃B₂B₁B₀
+                                                                            | ↩
+                                                                            0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈
+                                                                            | ↩
+                                                                            In
+                                                                            each
+                                                                            128-bit
+                                                                            lane,
+                                                                            we
+                                                                            want
+                                                                            to
+                                                                            put
+                                                                            bytes
+                                                                            8,
+                                                                            9,
+                                                                            10,
+                                                                            11,
+                                                                            12
+                                                                            after
+                                                                            bytes
+                                                                            0,
+                                                                            1,
+                                                                            2, 3
+                                                                            to
+                                                                            allow
+                                                                            for
+                                                                            sequential
+                                                                            reading.
+                                                                            We
+                                                                            now
+                                                                            have
+                                                                            64
+                                                                            bits
+                                                                            starting
+                                                                            at
+                                                                            position
+                                                                            0 in
+                                                                            the
+                                                                            lower
+                                                                            128-bit
+                                                                            lane,
+                                                                            ...
+                                                                            and
+                                                                            64
+                                                                            bits
+                                                                            starting
+                                                                            at
+                                                                            position
+                                                                            0 in
+                                                                            the
+                                                                            upper
+                                                                            128-bit
+                                                                            lane.
+                                                                          */
+                                                                         vector);
   __m128i lower_8 = uu____0.fst;
   __m128i upper_8 = uu____0.snd;
   uint8_t serialized[32U] = {0U};
@@ -880,14 +1267,16 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
                       (int16_t)1 << 0U, (int16_t)1 << 2U, (int16_t)1 << 4U,
                       (int16_t)1 << 6U));
   __m256i coefficients1 = mm256_srli_epi16((int32_t)6, coefficients0, __m256i);
-  return mm256_and_si256(coefficients1,
-                         mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1));
+  return mm256_and_si256(
+      /* Here I can prove this `and` is not useful */ coefficients1,
+      mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1));
 }
 
 KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) {
-  Eurydice_slice lower_coefficients =
-      Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t);
+  Eurydice_slice lower_coefficients = Eurydice_slice_subslice2(
+      /* Here I can prove this `and` is not useful */ bytes, (size_t)0U,
+      (size_t)16U, uint8_t);
   Eurydice_slice upper_coefficients =
       Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t);
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
@@ -1053,26 +1442,64 @@ KRML_MUSTINLINE size_t libcrux_ml_kem_vector_avx2_sampling_rejection_sample(
   __m256i field_modulus =
       mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i potential_coefficients =
-      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input);
+      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can
+                                                             be interpreted as a
+                                                             sequence of
+                                                             serialized 12-bit
+                                                             (i.e. uncompressed)
+                                                             coefficients. Not
+                                                             all coefficients
+                                                             may be less than
+                                                             FIELD_MODULUS
+                                                             though. */
+                                                          input);
   __m256i compare_with_field_modulus =
-      mm256_cmpgt_epi16(field_modulus, potential_coefficients);
+      mm256_cmpgt_epi16(/* Suppose we view |potential_coefficients| as follows
+                           (grouping 64-bit elements): A B C D | E F G H | ....
+                           and A < 3329, D < 3329 and H < 3329,
+                           |compare_with_field_modulus| will look like: 0xFF 0 0
+                           0xFF | 0 0 0 0xFF | ... */
+                        field_modulus,
+                        potential_coefficients);
   uint8_t good[2U];
-  libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus,
+  libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each
+                                                      lane is either 0 or 1, we
+                                                      only need one bit from
+                                                      each lane in the register
+                                                      to tell us what
+                                                      coefficients to keep and
+                                                      what to throw-away.
+                                                      Combine all the bits
+                                                      (there are 16) into two
+                                                      bytes. */
+                                                   compare_with_field_modulus,
                                                    good);
   uint8_t lower_shuffles[16U];
   memcpy(lower_shuffles,
+         /* Each bit (and its corresponding position) represents an element we
+            want to sample. We'd like all such elements to be next to each other
+            starting at index 0, so that they can be read from the vector
+            easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level
+            shuffling indices needed to make this happen. For e.g. if good[0] =
+            0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit
+            lane to the first. To do this, we need the byte-level shuffle
+            indices to be 2 3 X X X X ... */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[0U]],
          (size_t)16U * sizeof(uint8_t));
-  __m128i lower_shuffles0 = mm_loadu_si128(
-      Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t));
+  __m128i lower_shuffles0 = mm_loadu_si128(Eurydice_array_to_slice(
+      (size_t)16U,
+      /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles,
+      uint8_t));
   __m128i lower_coefficients = mm256_castsi256_si128(potential_coefficients);
   __m128i lower_coefficients0 =
       mm_shuffle_epi8(lower_coefficients, lower_shuffles0);
-  mm_storeu_si128(output, lower_coefficients0);
+  mm_storeu_si128(/* ... then write them out ... */ output,
+                  lower_coefficients0);
   size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]);
   uint8_t upper_shuffles[16U];
   memcpy(upper_shuffles,
+         /* Do the same for |goood[1]| */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[1U]],
          (size_t)16U * sizeof(uint8_t));
@@ -1425,9 +1852,13 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
-  H_a9_e0(Eurydice_array_to_subslice2(
-              private_key->value, (size_t)384U * (size_t)3U,
-              (size_t)768U * (size_t)3U + (size_t)32U, uint8_t),
+  H_a9_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
+                                         on the types. We need to go to the
+                                         `value` directly. */
+                                      private_key->value,
+                                      (size_t)384U * (size_t)3U,
+                                      (size_t)768U * (size_t)3U + (size_t)32U,
+                                      uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)3U + (size_t)32U,
@@ -1914,6 +2345,10 @@ static KRML_MUSTINLINE void sample_from_xof_6c1(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_ed(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -1972,7 +2407,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c1(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-        if (transpose) {
+        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -2187,7 +2622,12 @@ with const generics
 static KRML_MUSTINLINE void ntt_at_layer_7_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U; i < step; i++) {
+  for (size_t i = (size_t)0U;
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       step;
+       i++) {
     size_t j = i;
     __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09(
         re->coefficients[j + step], (int16_t)-1600);
@@ -2239,7 +2679,13 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -2248,9 +2694,9 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_61(
     for (size_t i = offset_vec; i < offset_vec + step_vec; i++) {
       size_t j = i;
       libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 =
-          ntt_layer_int_vec_step_61(
-              re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+          ntt_layer_int_vec_step_61(re->coefficients[j],
+                                    re->coefficients[j + step_vec],
+                                    libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       __m256i x = uu____0.fst;
       __m256i y = uu____0.snd;
       re->coefficients[j] = x;
@@ -2272,7 +2718,7 @@ static KRML_MUSTINLINE void ntt_at_layer_3_61(
       zeta_i[0U] = zeta_i[0U] + (size_t)1U;
       re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_3_step_09(
           re->coefficients[round],
-          libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])););
+          libcrux_ml_kem_polynomial_zeta(zeta_i[0U])););
 }
 
 /**
@@ -2287,9 +2733,8 @@ static KRML_MUSTINLINE void ntt_at_layer_2_61(
       i, (size_t)0U, (size_t)16U, (size_t)1U, size_t round = i;
       zeta_i[0U] = zeta_i[0U] + (size_t)1U;
       re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_2_step_09(
-          re->coefficients[round],
-          libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-          libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U));
+          re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+          libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U));
       zeta_i[0U] = zeta_i[0U] + (size_t)1U;);
 }
 
@@ -2305,11 +2750,10 @@ static KRML_MUSTINLINE void ntt_at_layer_1_61(
       i, (size_t)0U, (size_t)16U, (size_t)1U, size_t round = i;
       zeta_i[0U] = zeta_i[0U] + (size_t)1U;
       re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_1_step_09(
-          re->coefficients[round],
-          libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-          libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U),
-          libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U),
-          libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U));
+          re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+          libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U),
+          libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U),
+          libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U));
       zeta_i[0U] = zeta_i[0U] + (size_t)3U;);
 }
 
@@ -2327,7 +2771,11 @@ with const generics
 static KRML_MUSTINLINE void poly_barrett_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     self->coefficients[i0] =
         libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]);
@@ -2342,7 +2790,9 @@ with const generics
 */
 static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
-  ntt_at_layer_7_61(re);
+  ntt_at_layer_7_61(/* Due to the small coefficient bound, we can skip the first
+                       round of Montgomery reductions. */
+                    re);
   size_t zeta_i = (size_t)1U;
   ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U);
   ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)5U);
@@ -2449,13 +2899,13 @@ ntt_multiply_ef_61(libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     size_t i0 = i;
     out.coefficients[i0] = libcrux_ml_kem_vector_avx2_ntt_multiply_09(
         &self->coefficients[i0], &rhs->coefficients[i0],
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0),
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                           (size_t)1U),
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                           (size_t)2U),
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                           (size_t)3U));
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0),
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                       (size_t)1U),
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                       (size_t)2U),
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                       (size_t)3U));
   }
   return out;
 }
@@ -2475,9 +2925,14 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i < Eurydice_slice_len(Eurydice_array_to_slice(
-                                  (size_t)16U, self->coefficients, __m256i),
-                              __m256i);
+       i <
+       Eurydice_slice_len(Eurydice_array_to_slice(
+                              (size_t)16U,
+                              /* The semicolon and parentheses at the end of
+                                 loop are a workaround for the following bug
+                                 https://github.com/hacspec/hax/issues/720 */
+                              self->coefficients, __m256i),
+                          __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -2511,10 +2966,17 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
-    __m256i coefficient_normal_form =
-        to_standard_domain_61(self->coefficients[j]);
+    __m256i coefficient_normal_form = to_standard_domain_61(
+        self->coefficients[/* The coefficients are of the form aR^{-1} mod q,
+                              which means calling to_montgomery_domain() on them
+                              should return a mod q. */
+                           j]);
     self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form,
                                           &error->coefficients[j]));
@@ -2544,6 +3006,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_ab(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -2619,7 +3083,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221(
     IndCpaPrivateKeyUnpacked_63 *private_key,
     IndCpaPublicKeyUnpacked_63 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_be(key_generation_seed, hashed);
+  cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
+                           ML-KEM */
+                        key_generation_seed,
+                        hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -2649,8 +3116,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221(
       sample_vector_cbd_then_ntt_out_b41(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
-  compute_As_plus_e_ab(public_key->t_as_ntt, public_key->A,
-                       private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_ab(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
+                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -2675,11 +3142,13 @@ serialize_unpacked_secret_key_8c(IndCpaPublicKeyUnpacked_63 *public_key,
                                  IndCpaPrivateKeyUnpacked_63 *private_key) {
   uint8_t public_key_serialized[1184U];
   serialize_public_key_ed(
-      public_key->t_as_ntt,
+      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  serialize_secret_key_ed(private_key->secret_as_ntt, secret_key_serialized);
+  serialize_secret_key_ed(
+      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
+      secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -2866,11 +3335,15 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa1(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_63 *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
   deserialize_ring_elements_reduced_ab(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key,
+                                   (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -2979,10 +3452,10 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_1_61(
       re->coefficients[round] =
           libcrux_ml_kem_vector_avx2_inv_ntt_layer_1_step_09(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U));
       zeta_i[0U] = zeta_i[0U] - (size_t)3U;);
 }
 
@@ -3000,8 +3473,8 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_2_61(
       re->coefficients[round] =
           libcrux_ml_kem_vector_avx2_inv_ntt_layer_2_step_09(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U));
       zeta_i[0U] = zeta_i[0U] - (size_t)1U;);
 }
 
@@ -3018,7 +3491,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_3_61(
                    re->coefficients[round] =
                        libcrux_ml_kem_vector_avx2_inv_ntt_layer_3_step_09(
                            re->coefficients[round],
-                           libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])););
+                           libcrux_ml_kem_polynomial_zeta(zeta_i[0U])););
 }
 
 /**
@@ -3047,7 +3520,13 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -3060,7 +3539,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_61(
       libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 =
           inv_ntt_layer_int_vec_step_reduce_61(
               re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       __m256i x = uu____0.fst;
       __m256i y = uu____0.snd;
       re->coefficients[j] = x;
@@ -3078,7 +3557,10 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   invert_ntt_at_layer_1_61(&zeta_i, re);
   invert_ntt_at_layer_2_61(&zeta_i, re);
   invert_ntt_at_layer_3_61(&zeta_i, re);
@@ -3104,7 +3586,11 @@ static KRML_MUSTINLINE void add_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
@@ -3217,8 +3703,26 @@ add_message_error_reduce_ef_61(
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
             result.coefficients[i0], (int16_t)1441);
-    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0],
-                                                    &message->coefficients[i0]);
+    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(
+        self->coefficients
+            [/* FIXME: Eurydice crashes with: Warning 11: in top-level
+                declaration
+                libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
+                this expression is not Low*; the enclosing function cannot be
+                translated into C*: let mutable ret(Mark.Present,(Mark.AtMost
+                2), ): int16_t[16size_t] = $any in
+                libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
+                ((@9:
+                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
+                &(((@8:
+                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
+                @0; @0 Warning 11 is fatal, exiting. On the following code:
+                ```rust result.coefficients[i] =
+                Vector::barrett_reduce(Vector::add( coefficient_normal_form,
+                &Vector::add(self.coefficients[i], &message.coefficients[i]),
+                )); ``` */
+             i0],
+        &message->coefficients[i0]);
     __m256i tmp0 =
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp);
     result.coefficients[i0] =
@@ -3266,8 +3770,18 @@ compress_ciphertext_coefficient_ef(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
-  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
+  __m128i coefficients_low =
+      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
+                               the bottom 128 bits, i.e. the first 8 16-bit
+                               coefficients */
+                            vector);
+  __m256i coefficients_low0 =
+      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
+                              coefficients_low[16:31] = B
+                              coefficients_low[32:63] = C and so on ... after
+                              this step: coefficients_low[0:31] = A
+                              coefficients_low[32:63] = B and so on ... */
+                           coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)10, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -3275,12 +3789,18 @@ compress_ciphertext_coefficient_ef(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 =
-      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
+  __m256i compressed_low2 = mm256_srli_epi32(
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)10, coefficients_high0, __m256i);
@@ -3293,8 +3813,20 @@ compress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        compressed_low3,
+                        compressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -3348,8 +3880,18 @@ compress_ciphertext_coefficient_c4(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
-  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
+  __m128i coefficients_low =
+      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
+                               the bottom 128 bits, i.e. the first 8 16-bit
+                               coefficients */
+                            vector);
+  __m256i coefficients_low0 =
+      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
+                              coefficients_low[16:31] = B
+                              coefficients_low[32:63] = C and so on ... after
+                              this step: coefficients_low[0:31] = A
+                              coefficients_low[32:63] = B and so on ... */
+                           coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)11, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -3357,12 +3899,18 @@ compress_ciphertext_coefficient_c4(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 =
-      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
+  __m256i compressed_low2 = mm256_srli_epi32(
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)11, coefficients_high0, __m256i);
@@ -3375,8 +3923,20 @@ compress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        compressed_low3,
+                        compressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -3454,8 +4014,18 @@ compress_ciphertext_coefficient_d1(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
-  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
+  __m128i coefficients_low =
+      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
+                               the bottom 128 bits, i.e. the first 8 16-bit
+                               coefficients */
+                            vector);
+  __m256i coefficients_low0 =
+      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
+                              coefficients_low[16:31] = B
+                              coefficients_low[32:63] = C and so on ... after
+                              this step: coefficients_low[0:31] = A
+                              coefficients_low[32:63] = B and so on ... */
+                           coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)4, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -3463,12 +4033,18 @@ compress_ciphertext_coefficient_d1(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 =
-      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
+  __m256i compressed_low2 = mm256_srli_epi32(
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)4, coefficients_high0, __m256i);
@@ -3481,8 +4057,20 @@ compress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        compressed_low3,
+                        compressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -3508,7 +4096,11 @@ static KRML_MUSTINLINE void compress_then_serialize_4_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     __m256i coefficient =
         compress_09_d1(to_unsigned_field_modulus_61(re.coefficients[i0]));
@@ -3535,8 +4127,18 @@ compress_ciphertext_coefficient_f4(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
-  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
+  __m128i coefficients_low =
+      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
+                               the bottom 128 bits, i.e. the first 8 16-bit
+                               coefficients */
+                            vector);
+  __m256i coefficients_low0 =
+      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
+                              coefficients_low[16:31] = B
+                              coefficients_low[32:63] = C and so on ... after
+                              this step: coefficients_low[0:31] = A
+                              coefficients_low[32:63] = B and so on ... */
+                           coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)5, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -3544,12 +4146,18 @@ compress_ciphertext_coefficient_f4(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 =
-      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
+  __m256i compressed_low2 = mm256_srli_epi32(
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)5, coefficients_high0, __m256i);
@@ -3562,8 +4170,20 @@ compress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        compressed_low3,
+                        compressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -3589,7 +4209,11 @@ static KRML_MUSTINLINE void compress_then_serialize_5_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     __m256i coefficients =
         compress_09_f4(to_unsigned_representative_61(re.coefficients[i0]));
@@ -3677,7 +4301,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
     IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness,
+                                            prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -3689,6 +4317,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_23 uu____3 =
       sample_ring_element_cbd_b41(copy_of_prf_input, domain_separator0);
@@ -3697,7 +4326,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   PRF_a9_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -3705,9 +4334,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
       sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U];
-  compute_vector_u_ab(public_key->A, r_as_ntt, error_1, u);
+  compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
+                      r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       deserialize_then_decompress_message_61(copy_of_message);
@@ -3716,12 +4347,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
                                 &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   compress_then_serialize_u_8c(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   compress_then_serialize_ring_element_v_ed(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -3916,7 +4549,8 @@ decompress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)10);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m128i coefficients_low = mm256_castsi256_si128(
+      /* ---- Compress the first 8 coefficients ---- */ vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -3924,12 +4558,16 @@ decompress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 =
-      mm256_srli_epi32((int32_t)10, decompressed_low1, __m256i);
+  __m256i decompressed_low2 = mm256_srli_epi32(
+      (int32_t)10,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -3937,12 +4575,27 @@ decompress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 =
-      mm256_srli_epi32((int32_t)10, decompressed_high1, __m256i);
+  __m256i decompressed_high2 = mm256_srli_epi32(
+      (int32_t)10,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        decompressed_low3,
+                        decompressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -3997,7 +4650,8 @@ decompress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)11);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m128i coefficients_low = mm256_castsi256_si128(
+      /* ---- Compress the first 8 coefficients ---- */ vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -4005,12 +4659,16 @@ decompress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 =
-      mm256_srli_epi32((int32_t)11, decompressed_low1, __m256i);
+  __m256i decompressed_low2 = mm256_srli_epi32(
+      (int32_t)11,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -4018,12 +4676,27 @@ decompress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 =
-      mm256_srli_epi32((int32_t)11, decompressed_high1, __m256i);
+  __m256i decompressed_high2 = mm256_srli_epi32(
+      (int32_t)11,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        decompressed_low3,
+                        decompressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -4146,7 +4819,8 @@ decompress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)4);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m128i coefficients_low = mm256_castsi256_si128(
+      /* ---- Compress the first 8 coefficients ---- */ vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -4154,12 +4828,16 @@ decompress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 =
-      mm256_srli_epi32((int32_t)4, decompressed_low1, __m256i);
+  __m256i decompressed_low2 = mm256_srli_epi32(
+      (int32_t)4,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -4167,12 +4845,27 @@ decompress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 =
-      mm256_srli_epi32((int32_t)4, decompressed_high1, __m256i);
+  __m256i decompressed_high2 = mm256_srli_epi32(
+      (int32_t)4,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        decompressed_low3,
+                        decompressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -4222,7 +4915,8 @@ decompress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)5);
-  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m128i coefficients_low = mm256_castsi256_si128(
+      /* ---- Compress the first 8 coefficients ---- */ vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -4230,12 +4924,16 @@ decompress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 =
-      mm256_srli_epi32((int32_t)5, decompressed_low1, __m256i);
+  __m256i decompressed_low2 = mm256_srli_epi32(
+      (int32_t)5,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high =
-      mm256_extracti128_si256((int32_t)1, vector, __m128i);
+  __m128i coefficients_high = mm256_extracti128_si256(
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -4243,12 +4941,27 @@ decompress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 =
-      mm256_srli_epi32((int32_t)5, decompressed_high1, __m256i);
+  __m256i decompressed_high2 = mm256_srli_epi32(
+      (int32_t)5,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
-  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
+  __m256i compressed =
+      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
+                           this function results in: 0: low low low low | 1:
+                           high high high high | 2: low low low low | 3: high
+                           high high high where each |low| and |high| is a
+                           16-bit element */
+                        decompressed_low3,
+                        decompressed_high3);
+  return mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -4415,11 +5128,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_2f(
     IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U];
-  deserialize_then_decompress_u_ed(ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_ed(
+      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       deserialize_then_decompress_ring_element_v_ed(
-          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
-                                          (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)1088U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -4440,7 +5156,8 @@ with const generics
 static KRML_MUSTINLINE void decrypt_2f(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U];
-  deserialize_secret_key_ab(secret_key, secret_as_ntt);
+  deserialize_secret_key_ab(/* sˆ := Decode_12(sk) */ secret_key,
+                            secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U];
   memcpy(
@@ -4555,17 +5272,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_a11(
   kdf_d8_ae(Eurydice_array_to_slice((size_t)32U,
                                     implicit_rejection_shared_secret0, uint8_t),
             implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  kdf_d8_ae(shared_secret0, shared_secret1);
   uint8_t shared_secret[32U];
+  kdf_d8_ae(shared_secret0, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_80(ciphertext),
       Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -4760,9 +5477,13 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_5e(
     libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) {
   uint8_t t[32U];
-  H_a9_ac(Eurydice_array_to_subslice2(
-              private_key->value, (size_t)384U * (size_t)4U,
-              (size_t)768U * (size_t)4U + (size_t)32U, uint8_t),
+  H_a9_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
+                                         on the types. We need to go to the
+                                         `value` directly. */
+                                      private_key->value,
+                                      (size_t)384U * (size_t)4U,
+                                      (size_t)768U * (size_t)4U + (size_t)32U,
+                                      uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)4U + (size_t)32U,
@@ -5239,6 +5960,10 @@ static KRML_MUSTINLINE void sample_from_xof_6c(
   memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_78(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -5297,7 +6022,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-        if (transpose) {
+        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -5448,9 +6173,14 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_42(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i < Eurydice_slice_len(Eurydice_array_to_slice(
-                                  (size_t)16U, self->coefficients, __m256i),
-                              __m256i);
+       i <
+       Eurydice_slice_len(Eurydice_array_to_slice(
+                              (size_t)16U,
+                              /* The semicolon and parentheses at the end of
+                                 loop are a workaround for the following bug
+                                 https://github.com/hacspec/hax/issues/720 */
+                              self->coefficients, __m256i),
+                          __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -5481,6 +6211,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_42(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -5556,7 +6288,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22(
     IndCpaPrivateKeyUnpacked_39 *private_key,
     IndCpaPublicKeyUnpacked_39 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_6a(key_generation_seed, hashed);
+  cpa_keygen_seed_d8_6a(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
+                           ML-KEM */
+                        key_generation_seed,
+                        hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -5586,8 +6321,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22(
       sample_vector_cbd_then_ntt_out_b4(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
-  compute_As_plus_e_42(public_key->t_as_ntt, public_key->A,
-                       private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_42(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
+                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -5612,11 +6347,13 @@ serialize_unpacked_secret_key_c9(IndCpaPublicKeyUnpacked_39 *public_key,
                                  IndCpaPrivateKeyUnpacked_39 *private_key) {
   uint8_t public_key_serialized[1568U];
   serialize_public_key_1e(
-      public_key->t_as_ntt,
+      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1536U];
-  serialize_secret_key_78(private_key->secret_as_ntt, secret_key_serialized);
+  serialize_secret_key_78(
+      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
+      secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1536U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -5803,11 +6540,15 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa0(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_39 *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t);
   deserialize_ring_elements_reduced_42(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key,
+                                   (size_t)1536U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[4U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -5899,7 +6640,10 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_42(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   invert_ntt_at_layer_1_61(&zeta_i, re);
   invert_ntt_at_layer_2_61(&zeta_i, re);
   invert_ntt_at_layer_3_61(&zeta_i, re);
@@ -6129,7 +6873,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
     IndCpaPublicKeyUnpacked_39 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1568U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness,
+                                            prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -6141,6 +6889,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_dd uu____3 =
       sample_ring_element_cbd_b4(copy_of_prf_input, domain_separator0);
@@ -6149,7 +6898,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
       error_1, uu____3.fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   PRF_a9_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -6157,9 +6906,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
       sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[4U];
-  compute_vector_u_42(public_key->A, r_as_ntt, error_1, u);
+  compute_vector_u_42(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
+                      r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       deserialize_then_decompress_message_61(copy_of_message);
@@ -6168,12 +6919,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
                                 &message_as_ring_element);
   uint8_t ciphertext[1568U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[4U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   compress_then_serialize_u_c9(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U,
                                            (size_t)1408U, uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   compress_then_serialize_ring_element_v_1e(
       uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
@@ -6487,11 +7240,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_37(
     IndCpaPrivateKeyUnpacked_39 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[4U];
-  deserialize_then_decompress_u_1e(ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_1e(
+      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       deserialize_then_decompress_ring_element_v_78(
-          Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
-                                          (size_t)1408U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)1568U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)1408U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       compute_message_42(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -6512,7 +7268,8 @@ with const generics
 static KRML_MUSTINLINE void decrypt_37(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[4U];
-  deserialize_secret_key_42(secret_key, secret_as_ntt);
+  deserialize_secret_key_42(/* sˆ := Decode_12(sk) */ secret_key,
+                            secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[4U];
   memcpy(
@@ -6615,17 +7372,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_a10(
   kdf_d8_5e(Eurydice_array_to_slice((size_t)32U,
                                     implicit_rejection_shared_secret0, uint8_t),
             implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  kdf_d8_5e(shared_secret0, shared_secret1);
   uint8_t shared_secret[32U];
+  kdf_d8_5e(shared_secret0, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_af(ciphertext),
       Eurydice_array_to_slice((size_t)1568U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -6820,9 +7577,13 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_4d(
     libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) {
   uint8_t t[32U];
-  H_a9_fd(Eurydice_array_to_subslice2(
-              private_key->value, (size_t)384U * (size_t)2U,
-              (size_t)768U * (size_t)2U + (size_t)32U, uint8_t),
+  H_a9_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
+                                         on the types. We need to go to the
+                                         `value` directly. */
+                                      private_key->value,
+                                      (size_t)384U * (size_t)2U,
+                                      (size_t)768U * (size_t)2U + (size_t)32U,
+                                      uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)2U + (size_t)32U,
@@ -7273,6 +8034,10 @@ static KRML_MUSTINLINE void sample_from_xof_6c0(
   memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_29(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -7331,7 +8096,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c0(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-        if (transpose) {
+        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -7487,9 +8252,14 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_89(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i < Eurydice_slice_len(Eurydice_array_to_slice(
-                                  (size_t)16U, self->coefficients, __m256i),
-                              __m256i);
+       i <
+       Eurydice_slice_len(Eurydice_array_to_slice(
+                              (size_t)16U,
+                              /* The semicolon and parentheses at the end of
+                                 loop are a workaround for the following bug
+                                 https://github.com/hacspec/hax/issues/720 */
+                              self->coefficients, __m256i),
+                          __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -7520,6 +8290,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_89(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -7595,7 +8367,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220(
     IndCpaPrivateKeyUnpacked_94 *private_key,
     IndCpaPublicKeyUnpacked_94 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_f8(key_generation_seed, hashed);
+  cpa_keygen_seed_d8_f8(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
+                           ML-KEM */
+                        key_generation_seed,
+                        hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -7625,8 +8400,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220(
       sample_vector_cbd_then_ntt_out_b40(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
-  compute_As_plus_e_89(public_key->t_as_ntt, public_key->A,
-                       private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_89(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
+                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -7651,11 +8426,13 @@ serialize_unpacked_secret_key_2d(IndCpaPublicKeyUnpacked_94 *public_key,
                                  IndCpaPrivateKeyUnpacked_94 *private_key) {
   uint8_t public_key_serialized[800U];
   serialize_public_key_ba(
-      public_key->t_as_ntt,
+      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[768U];
-  serialize_secret_key_29(private_key->secret_as_ntt, secret_key_serialized);
+  serialize_secret_key_29(
+      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
+      secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[768U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -7842,11 +8619,15 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_94 *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t);
   deserialize_ring_elements_reduced_89(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key,
+                                   (size_t)768U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[2U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -7984,7 +8765,10 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_89(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   invert_ntt_at_layer_1_61(&zeta_i, re);
   invert_ntt_at_layer_2_61(&zeta_i, re);
   invert_ntt_at_layer_3_61(&zeta_i, re);
@@ -8176,7 +8960,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
     IndCpaPublicKeyUnpacked_94 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[768U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness,
+                                            prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -8188,6 +8976,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_40 uu____3 =
       sample_ring_element_cbd_b40(copy_of_prf_input, domain_separator0);
@@ -8196,7 +8985,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
       error_1, uu____3.fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   PRF_a9_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -8204,9 +8993,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
       sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[2U];
-  compute_vector_u_89(public_key->A, r_as_ntt, error_1, u);
+  compute_vector_u_89(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
+                      r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       deserialize_then_decompress_message_61(copy_of_message);
@@ -8215,12 +9006,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
                                 &message_as_ring_element);
   uint8_t ciphertext[768U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[2U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   compress_then_serialize_u_2d(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U,
                                            uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   compress_then_serialize_ring_element_v_ba(
       uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
@@ -8504,11 +9297,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_4b(
     IndCpaPrivateKeyUnpacked_94 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[2U];
-  deserialize_then_decompress_u_ba(ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_ba(
+      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       deserialize_then_decompress_ring_element_v_29(
-          Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
-                                          (size_t)640U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)768U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)640U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       compute_message_89(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -8529,7 +9325,8 @@ with const generics
 static KRML_MUSTINLINE void decrypt_4b(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[2U];
-  deserialize_secret_key_89(secret_key, secret_as_ntt);
+  deserialize_secret_key_89(/* sˆ := Decode_12(sk) */ secret_key,
+                            secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[2U];
   memcpy(
@@ -8631,15 +9428,15 @@ void libcrux_ml_kem_ind_cca_decapsulate_a1(
   kdf_d8_4d(Eurydice_array_to_slice((size_t)32U,
                                     implicit_rejection_shared_secret0, uint8_t),
             implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  kdf_d8_4d(shared_secret0, shared_secret1);
   uint8_t shared_secret[32U];
+  kdf_d8_4d(shared_secret0, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_d0(ciphertext),
       Eurydice_array_to_slice((size_t)768U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h
index c127a7b25..addfdaf30 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.c b/libcrux-ml-kem/c/libcrux_mlkem_portable.c
index 128049b3b..fddae347c 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "internal/libcrux_mlkem_portable.h"
@@ -66,7 +66,7 @@ static const int16_t ZETAS_TIMES_MONTGOMERY_R[128U] = {
     (int16_t)-108,  (int16_t)-308,  (int16_t)996,   (int16_t)991,
     (int16_t)958,   (int16_t)-1460, (int16_t)1522,  (int16_t)1628};
 
-int16_t libcrux_ml_kem_polynomial_get_zeta(size_t i) {
+int16_t libcrux_ml_kem_polynomial_zeta(size_t i) {
   return ZETAS_TIMES_MONTGOMERY_R[i];
 }
 
@@ -1152,11 +1152,28 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
 */
 uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient(
     uint16_t fe) {
-  int16_t shifted = (int16_t)1664 - (int16_t)fe;
-  int16_t mask = shifted >> 15U;
+  int16_t shifted =
+      (int16_t)1664 -
+      (int16_t) /* The approach used here is inspired by:
+                   https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150
+                   If 833 <= fe <= 2496, then -832 <= shifted <= 831 */
+      fe;
+  int16_t mask =
+      /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) =
+         -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive
+         <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so
+         if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */
+      shifted
+
+      >> 15U;
   int16_t shifted_to_positive = mask ^ shifted;
   int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832;
-  int16_t r0 = shifted_positive_in_range >> 15U;
+  int16_t r0 =
+      /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the
+         most significant bit of shifted_positive_in_range will be 1. */
+      shifted_positive_in_range
+
+      >> 15U;
   int16_t r1 = r0 & (int16_t)1;
   return (uint8_t)r1;
 }
@@ -1192,7 +1209,16 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits(
 
 int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient(
     uint8_t coefficient_bits, uint16_t fe) {
-  uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits;
+  uint64_t compressed =
+      (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits
+                    == 5 || coefficient_bits == 10 || coefficient_bits == 11 );
+                    hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to
+                    be constant time due to:
+                    https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ
+                  */
+      fe
+
+      << (uint32_t)coefficient_bits;
   compressed = compressed + 1664ULL;
   compressed = compressed * 10321340ULL;
   compressed = compressed >> 35U;
@@ -2712,9 +2738,13 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_60(
     libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) {
   uint8_t t[32U];
-  H_f1_ac(Eurydice_array_to_subslice2(
-              private_key->value, (size_t)384U * (size_t)4U,
-              (size_t)768U * (size_t)4U + (size_t)32U, uint8_t),
+  H_f1_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
+                                         on the types. We need to go to the
+                                         `value` directly. */
+                                      private_key->value,
+                                      (size_t)384U * (size_t)4U,
+                                      (size_t)768U * (size_t)4U + (size_t)32U,
+                                      uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)4U + (size_t)32U,
@@ -3204,6 +3234,10 @@ static KRML_MUSTINLINE void sample_from_xof_2b(
   memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_ff(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -3263,7 +3297,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-        if (transpose) {
+        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -3461,7 +3495,12 @@ with const generics
 static KRML_MUSTINLINE void ntt_at_layer_7_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U; i < step; i++) {
+  for (size_t i = (size_t)0U;
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       step;
+       i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector t =
         libcrux_ml_kem_vector_portable_multiply_by_constant_0d(
@@ -3523,7 +3562,13 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -3532,9 +3577,9 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_8c(
     for (size_t i = offset_vec; i < offset_vec + step_vec; i++) {
       size_t j = i;
       libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 =
-          ntt_layer_int_vec_step_8c(
-              re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+          ntt_layer_int_vec_step_8c(re->coefficients[j],
+                                    re->coefficients[j + step_vec],
+                                    libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst;
       libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd;
       re->coefficients[j] = x;
@@ -3557,7 +3602,7 @@ static KRML_MUSTINLINE void ntt_at_layer_3_8c(
       libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
           libcrux_ml_kem_vector_portable_ntt_layer_3_step_0d(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       re->coefficients[round] = uu____0;);
 }
 
@@ -3575,8 +3620,8 @@ static KRML_MUSTINLINE void ntt_at_layer_2_8c(
       re->coefficients[round] =
           libcrux_ml_kem_vector_portable_ntt_layer_2_step_0d(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U));
       zeta_i[0U] = zeta_i[0U] + (size_t)1U;);
 }
 
@@ -3594,10 +3639,10 @@ static KRML_MUSTINLINE void ntt_at_layer_1_8c(
       re->coefficients[round] =
           libcrux_ml_kem_vector_portable_ntt_layer_1_step_0d(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U));
       zeta_i[0U] = zeta_i[0U] + (size_t)3U;);
 }
 
@@ -3615,7 +3660,11 @@ with const generics
 static KRML_MUSTINLINE void poly_barrett_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
@@ -3632,7 +3681,9 @@ with const generics
 */
 static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
-  ntt_at_layer_7_8c(re);
+  ntt_at_layer_7_8c(/* Due to the small coefficient bound, we can skip the first
+                       round of Montgomery reductions. */
+                    re);
   size_t zeta_i = (size_t)1U;
   ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U);
   ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)5U);
@@ -3742,13 +3793,13 @@ ntt_multiply_ef_8c(libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_ntt_multiply_0d(
             &self->coefficients[i0], &rhs->coefficients[i0],
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0),
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                               (size_t)1U),
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                               (size_t)2U),
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                               (size_t)3U));
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0),
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                           (size_t)1U),
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                           (size_t)2U),
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                           (size_t)3U));
     out.coefficients[i0] = uu____0;
   }
   return out;
@@ -3771,7 +3822,11 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_d0(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U, self->coefficients,
+                   (size_t)16U,
+                   /* The semicolon and parentheses at the end of loop are a
+                      workaround for the following bug
+                      https://github.com/hacspec/hax/issues/720 */
+                   self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -3811,10 +3866,18 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
-        coefficient_normal_form = to_standard_domain_8c(self->coefficients[j]);
+        coefficient_normal_form = to_standard_domain_8c(
+            self->coefficients[/* The coefficients are of the form aR^{-1} mod
+                                  q, which means calling to_montgomery_domain()
+                                  on them should return a mod q. */
+                               j]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
             libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form,
@@ -3846,6 +3909,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_d0(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -3921,7 +3986,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c(
     IndCpaPrivateKeyUnpacked_af *private_key,
     IndCpaPublicKeyUnpacked_af *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_03(key_generation_seed, hashed);
+  cpa_keygen_seed_d8_03(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
+                           ML-KEM */
+                        key_generation_seed,
+                        hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -3951,8 +4019,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c(
       sample_vector_cbd_then_ntt_out_3b(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
-  compute_As_plus_e_d0(public_key->t_as_ntt, public_key->A,
-                       private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_d0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
+                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -3977,11 +4045,13 @@ serialize_unpacked_secret_key_2f(IndCpaPublicKeyUnpacked_af *public_key,
                                  IndCpaPrivateKeyUnpacked_af *private_key) {
   uint8_t public_key_serialized[1568U];
   serialize_public_key_00(
-      public_key->t_as_ntt,
+      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1536U];
-  serialize_secret_key_ff(private_key->secret_as_ntt, secret_key_serialized);
+  serialize_secret_key_ff(
+      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
+      secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1536U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -4169,11 +4239,15 @@ generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_af *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t);
   deserialize_ring_elements_reduced_d0(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key,
+                                   (size_t)1536U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[4U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -4284,10 +4358,10 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_1_8c(
       re->coefficients[round] =
           libcrux_ml_kem_vector_portable_inv_ntt_layer_1_step_0d(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U));
       zeta_i[0U] = zeta_i[0U] - (size_t)3U;);
 }
 
@@ -4305,8 +4379,8 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_2_8c(
       re->coefficients[round] =
           libcrux_ml_kem_vector_portable_inv_ntt_layer_2_step_0d(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U));
       zeta_i[0U] = zeta_i[0U] - (size_t)1U;);
 }
 
@@ -4324,7 +4398,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_3_8c(
       libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
           libcrux_ml_kem_vector_portable_inv_ntt_layer_3_step_0d(
               re->coefficients[round],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       re->coefficients[round] = uu____0;);
 }
 
@@ -4360,7 +4434,13 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -4373,7 +4453,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_8c(
       libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 =
           inv_ntt_layer_int_vec_step_reduce_8c(
               re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst;
       libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd;
       re->coefficients[j] = x;
@@ -4391,7 +4471,10 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_d0(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   invert_ntt_at_layer_1_8c(&zeta_i, re);
   invert_ntt_at_layer_2_8c(&zeta_i, re);
   invert_ntt_at_layer_3_8c(&zeta_i, re);
@@ -4417,7 +4500,11 @@ static KRML_MUSTINLINE void add_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
         coefficient_normal_form =
@@ -4543,8 +4630,27 @@ add_message_error_reduce_ef_8c(
             libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
                 result.coefficients[i0], (int16_t)1441);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp =
-        libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0],
-                                              &message->coefficients[i0]);
+        libcrux_ml_kem_vector_portable_add_0d(
+            self->coefficients[/* FIXME: Eurydice crashes with: Warning 11: in
+                                  top-level declaration
+                                  libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
+                                  this expression is not Low*; the enclosing
+                                  function cannot be translated into C*: let
+                                  mutable ret(Mark.Present,(Mark.AtMost 2), ):
+                                  int16_t[16size_t] = $any in
+                                  libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
+                                  ((@9:
+                                  libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
+                                  &(((@8:
+                                  libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
+                                  @0; @0 Warning 11 is fatal, exiting. On the
+                                  following code: ```rust result.coefficients[i]
+                                  = Vector::barrett_reduce(Vector::add(
+                                  coefficient_normal_form,
+                                  &Vector::add(self.coefficients[i],
+                                  &message.coefficients[i]), )); ``` */
+                               i0],
+            &message->coefficients[i0]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 =
         libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
@@ -4757,7 +4863,11 @@ static KRML_MUSTINLINE void compress_then_serialize_4_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient =
         compress_0d_d1(to_unsigned_field_modulus_8c(re.coefficients[i0]));
@@ -4812,7 +4922,11 @@ static KRML_MUSTINLINE void compress_then_serialize_5_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients =
         compress_0d_f4(to_unsigned_representative_8c(re.coefficients[i0]));
@@ -4901,7 +5015,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
     IndCpaPublicKeyUnpacked_af *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1568U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness,
+                                            prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -4913,6 +5031,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_dd0 uu____3 =
       sample_ring_element_cbd_3b(copy_of_prf_input, domain_separator0);
@@ -4921,7 +5040,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
       error_1, uu____3.fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   PRF_f1_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -4929,9 +5048,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
       sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[4U];
-  compute_vector_u_d0(public_key->A, r_as_ntt, error_1, u);
+  compute_vector_u_d0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
+                      r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       deserialize_then_decompress_message_8c(copy_of_message);
@@ -4940,12 +5061,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
                                 &message_as_ring_element);
   uint8_t ciphertext[1568U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[4U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   compress_then_serialize_u_2f(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U,
                                            (size_t)1408U, uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   compress_then_serialize_ring_element_v_00(
       uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
@@ -5584,11 +5707,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_7d(
     IndCpaPrivateKeyUnpacked_af *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[4U];
-  deserialize_then_decompress_u_00(ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_00(
+      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       deserialize_then_decompress_ring_element_v_ff(
-          Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
-                                          (size_t)1408U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)1568U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)1408U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       compute_message_d0(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -5609,7 +5735,8 @@ with const generics
 static KRML_MUSTINLINE void decrypt_7d(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[4U];
-  deserialize_secret_key_d0(secret_key, secret_as_ntt);
+  deserialize_secret_key_d0(/* sˆ := Decode_12(sk) */ secret_key,
+                            secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[4U];
   memcpy(
@@ -5724,17 +5851,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_621(
   kdf_d8_60(Eurydice_array_to_slice((size_t)32U,
                                     implicit_rejection_shared_secret0, uint8_t),
             implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  kdf_d8_60(shared_secret0, shared_secret1);
   uint8_t shared_secret[32U];
+  kdf_d8_60(shared_secret0, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_af(ciphertext),
       Eurydice_array_to_slice((size_t)1568U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -5929,9 +6056,13 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_30(
     libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) {
   uint8_t t[32U];
-  H_f1_fd(Eurydice_array_to_subslice2(
-              private_key->value, (size_t)384U * (size_t)2U,
-              (size_t)768U * (size_t)2U + (size_t)32U, uint8_t),
+  H_f1_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
+                                         on the types. We need to go to the
+                                         `value` directly. */
+                                      private_key->value,
+                                      (size_t)384U * (size_t)2U,
+                                      (size_t)768U * (size_t)2U + (size_t)32U,
+                                      uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)2U + (size_t)32U,
@@ -6381,6 +6512,10 @@ static KRML_MUSTINLINE void sample_from_xof_2b0(
   memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_64(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -6440,7 +6575,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b0(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-        if (transpose) {
+        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -6586,7 +6721,11 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_a0(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U, self->coefficients,
+                   (size_t)16U,
+                   /* The semicolon and parentheses at the end of loop are a
+                      workaround for the following bug
+                      https://github.com/hacspec/hax/issues/720 */
+                   self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -6621,6 +6760,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_a0(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -6696,7 +6837,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0(
     IndCpaPrivateKeyUnpacked_d4 *private_key,
     IndCpaPublicKeyUnpacked_d4 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_10(key_generation_seed, hashed);
+  cpa_keygen_seed_d8_10(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
+                           ML-KEM */
+                        key_generation_seed,
+                        hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -6726,8 +6870,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0(
       sample_vector_cbd_then_ntt_out_3b0(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
-  compute_As_plus_e_a0(public_key->t_as_ntt, public_key->A,
-                       private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_a0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
+                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6752,11 +6896,13 @@ serialize_unpacked_secret_key_6d(IndCpaPublicKeyUnpacked_d4 *public_key,
                                  IndCpaPrivateKeyUnpacked_d4 *private_key) {
   uint8_t public_key_serialized[800U];
   serialize_public_key_86(
-      public_key->t_as_ntt,
+      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[768U];
-  serialize_secret_key_64(private_key->secret_as_ntt, secret_key_serialized);
+  serialize_secret_key_64(
+      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
+      secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[768U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -6944,11 +7090,15 @@ generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f0(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_d4 *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t);
   deserialize_ring_elements_reduced_a0(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key,
+                                   (size_t)768U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[2U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -7074,7 +7224,10 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_a0(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   invert_ntt_at_layer_1_8c(&zeta_i, re);
   invert_ntt_at_layer_2_8c(&zeta_i, re);
   invert_ntt_at_layer_3_8c(&zeta_i, re);
@@ -7305,7 +7458,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
     IndCpaPublicKeyUnpacked_d4 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[768U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness,
+                                            prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -7318,6 +7475,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_400 uu____3 =
       sample_ring_element_cbd_3b0(copy_of_prf_input, domain_separator0);
@@ -7326,7 +7484,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
       error_1, uu____3.fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   PRF_f1_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -7334,9 +7492,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
       sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[2U];
-  compute_vector_u_a0(public_key->A, r_as_ntt, error_1, u);
+  compute_vector_u_a0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
+                      r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       deserialize_then_decompress_message_8c(copy_of_message);
@@ -7345,12 +7505,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
                                 &message_as_ring_element);
   uint8_t ciphertext[768U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[2U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   compress_then_serialize_u_6d(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U,
                                            uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   compress_then_serialize_ring_element_v_86(
       uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
@@ -7665,11 +7827,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_d1(
     IndCpaPrivateKeyUnpacked_d4 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[2U];
-  deserialize_then_decompress_u_86(ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_86(
+      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       deserialize_then_decompress_ring_element_v_64(
-          Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
-                                          (size_t)640U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)768U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)640U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       compute_message_a0(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -7690,7 +7855,8 @@ with const generics
 static KRML_MUSTINLINE void decrypt_d1(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[2U];
-  deserialize_secret_key_a0(secret_key, secret_as_ntt);
+  deserialize_secret_key_a0(/* sˆ := Decode_12(sk) */ secret_key,
+                            secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[2U];
   memcpy(
@@ -7793,17 +7959,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_620(
   kdf_d8_30(Eurydice_array_to_slice((size_t)32U,
                                     implicit_rejection_shared_secret0, uint8_t),
             implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  kdf_d8_30(shared_secret0, shared_secret1);
   uint8_t shared_secret[32U];
+  kdf_d8_30(shared_secret0, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_d0(ciphertext),
       Eurydice_array_to_slice((size_t)768U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -7998,9 +8164,13 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
-  H_f1_e0(Eurydice_array_to_subslice2(
-              private_key->value, (size_t)384U * (size_t)3U,
-              (size_t)768U * (size_t)3U + (size_t)32U, uint8_t),
+  H_f1_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
+                                         on the types. We need to go to the
+                                         `value` directly. */
+                                      private_key->value,
+                                      (size_t)384U * (size_t)3U,
+                                      (size_t)768U * (size_t)3U + (size_t)32U,
+                                      uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)3U + (size_t)32U,
@@ -8456,6 +8626,10 @@ static KRML_MUSTINLINE void sample_from_xof_2b1(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_89(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -8515,7 +8689,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b1(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-        if (transpose) {
+        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -8650,7 +8824,11 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_1b(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U, self->coefficients,
+                   (size_t)16U,
+                   /* The semicolon and parentheses at the end of loop are a
+                      workaround for the following bug
+                      https://github.com/hacspec/hax/issues/720 */
+                   self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -8685,6 +8863,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_1b(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -8760,7 +8940,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1(
     IndCpaPrivateKeyUnpacked_a0 *private_key,
     IndCpaPublicKeyUnpacked_a0 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_9c(key_generation_seed, hashed);
+  cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
+                           ML-KEM */
+                        key_generation_seed,
+                        hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -8790,8 +8973,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1(
       sample_vector_cbd_then_ntt_out_3b1(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
-  compute_As_plus_e_1b(public_key->t_as_ntt, public_key->A,
-                       private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_1b(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
+                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -8816,11 +8999,13 @@ serialize_unpacked_secret_key_43(IndCpaPublicKeyUnpacked_a0 *public_key,
                                  IndCpaPrivateKeyUnpacked_a0 *private_key) {
   uint8_t public_key_serialized[1184U];
   serialize_public_key_6c(
-      public_key->t_as_ntt,
+      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  serialize_secret_key_89(private_key->secret_as_ntt, secret_key_serialized);
+  serialize_secret_key_89(
+      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
+      secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -9008,11 +9193,15 @@ generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f1(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
   deserialize_ring_elements_reduced_1b(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key,
+                                   (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -9106,7 +9295,10 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_1b(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   invert_ntt_at_layer_1_8c(&zeta_i, re);
   invert_ntt_at_layer_2_8c(&zeta_i, re);
   invert_ntt_at_layer_3_8c(&zeta_i, re);
@@ -9299,7 +9491,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
     IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness,
+                                            prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -9312,6 +9508,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_230 uu____3 =
       sample_ring_element_cbd_3b1(copy_of_prf_input, domain_separator0);
@@ -9320,7 +9517,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   PRF_f1_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -9328,9 +9525,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
       sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U];
-  compute_vector_u_1b(public_key->A, r_as_ntt, error_1, u);
+  compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
+                      r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       deserialize_then_decompress_message_8c(copy_of_message);
@@ -9339,12 +9538,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
                                 &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   compress_then_serialize_u_43(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   compress_then_serialize_ring_element_v_6c(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -9629,11 +9830,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_42(
     IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U];
-  deserialize_then_decompress_u_6c(ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_6c(
+      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       deserialize_then_decompress_ring_element_v_89(
-          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
-                                          (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)1088U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -9654,7 +9858,8 @@ with const generics
 static KRML_MUSTINLINE void decrypt_42(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U];
-  deserialize_secret_key_1b(secret_key, secret_as_ntt);
+  deserialize_secret_key_1b(/* sˆ := Decode_12(sk) */ secret_key,
+                            secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U];
   memcpy(
@@ -9756,15 +9961,15 @@ void libcrux_ml_kem_ind_cca_decapsulate_62(
   kdf_d8_d6(Eurydice_array_to_slice((size_t)32U,
                                     implicit_rejection_shared_secret0, uint8_t),
             implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  kdf_d8_d6(shared_secret0, shared_secret1);
   uint8_t shared_secret[32U];
+  kdf_d8_d6(shared_secret0, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_80(ciphertext),
       Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/libcrux_mlkem_portable.h
index 33fff6338..012f00992 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_mlkem_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_sha3.h b/libcrux-ml-kem/c/libcrux_sha3.h
index 3101a818f..16a61b7e6 100644
--- a/libcrux-ml-kem/c/libcrux_sha3.h
+++ b/libcrux-ml-kem/c/libcrux_sha3.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_sha3_H
diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.c b/libcrux-ml-kem/c/libcrux_sha3_avx2.c
index 4e234ddec..23fa30cd5 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "internal/libcrux_sha3_avx2.h"
@@ -77,7 +77,8 @@ static KRML_MUSTINLINE __m256i and_not_xor_ef(__m256i a, __m256i b, __m256i c) {
 }
 
 static KRML_MUSTINLINE __m256i _veorq_n_u64(__m256i a, uint64_t c) {
-  __m256i c0 = mm256_set1_epi64x((int64_t)c);
+  __m256i c0 = mm256_set1_epi64x(
+      (int64_t) /* Casting here is required, doesn't change the value. */ c);
   return mm256_xor_si256(a, c0);
 }
 
@@ -1430,13 +1431,13 @@ static KRML_MUSTINLINE void store_block_5b(__m256i (*s)[5U],
         s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)2U) % (size_t)5U],
         __m256i);
-    __m256i v1h =
-        mm256_permute2x128_si256((int32_t)32,
-                                 s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
-                                  [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
-                                 s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
-                                  [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
-                                 __m256i);
+    __m256i v1h = mm256_permute2x128_si256(
+        (int32_t)32,
+        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
+         [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
+        s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
+         [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
+        __m256i);
     __m256i v2l = mm256_permute2x128_si256(
         (int32_t)49,
         s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U],
@@ -1747,7 +1748,16 @@ void libcrux_sha3_avx2_x4_shake256(Eurydice_slice input0, Eurydice_slice input1,
                                    Eurydice_slice input2, Eurydice_slice input3,
                                    Eurydice_slice out0, Eurydice_slice out1,
                                    Eurydice_slice out2, Eurydice_slice out3) {
-  Eurydice_slice buf0[4U] = {input0, input1, input2, input3};
+  Eurydice_slice buf0[4U] = {
+      /* XXX: These functions could alternatively implement the same with the
+         portable implementation #[cfg(feature = "simd128")] { keccakx2::<136,
+         0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136,
+         0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136,
+         0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]);
+         keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136,
+         0x1fu8>([input3], [out3]); } */
+      input0,
+      input1, input2, input3};
   Eurydice_slice buf[4U] = {out0, out1, out2, out3};
   keccak_fb(buf0, buf);
 }
@@ -1962,13 +1972,13 @@ static KRML_MUSTINLINE void store_block_3a(__m256i (*s)[5U],
         s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)2U) % (size_t)5U],
         __m256i);
-    __m256i v1h =
-        mm256_permute2x128_si256((int32_t)32,
-                                 s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
-                                  [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
-                                 s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
-                                  [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
-                                 __m256i);
+    __m256i v1h = mm256_permute2x128_si256(
+        (int32_t)32,
+        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
+         [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
+        s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
+         [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
+        __m256i);
     __m256i v2l = mm256_permute2x128_si256(
         (int32_t)49,
         s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U],
diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/libcrux_sha3_avx2.h
index 7a6e0c8cb..645f80b34 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_sha3_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_sha3_internal.h b/libcrux-ml-kem/c/libcrux_sha3_internal.h
index 7c140d2b8..74eeb47a3 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_internal.h
+++ b/libcrux-ml-kem/c/libcrux_sha3_internal.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_sha3_internal_H
@@ -1811,6 +1811,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out);
 }
@@ -2159,6 +2160,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out);
 }
@@ -2507,6 +2509,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out);
 }
@@ -2695,6 +2698,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out);
 }
@@ -2813,6 +2817,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out);
 }
@@ -3161,6 +3166,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out);
 }
diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.c b/libcrux-ml-kem/c/libcrux_sha3_neon.c
index c16b77594..5e4416bcd 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_neon.c
+++ b/libcrux-ml-kem/c/libcrux_sha3_neon.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #include "libcrux_sha3_neon.h"
@@ -62,6 +62,7 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0,
                                                    Eurydice_slice input1,
                                                    Eurydice_slice out0,
                                                    Eurydice_slice out1) {
+  /* TODO: make argument ordering consistent */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -72,6 +73,9 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0,
 */
 KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState
 libcrux_sha3_neon_x2_incremental_init(void) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let s0 = KeccakState::new(); let s1 =
+   * KeccakState::new(); [s0, s1] } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -83,6 +87,10 @@ libcrux_sha3_neon_x2_incremental_init(void) {
 KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
+   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -96,6 +104,10 @@ KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_squeeze_first_three_blocks(&mut s0, out0);
+   * shake128_squeeze_first_three_blocks(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -109,6 +121,10 @@ KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_squeeze_next_block(&mut s0, out0);
+   * shake128_squeeze_next_block(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -132,6 +148,10 @@ libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_five_blocks(
 KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
+   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.h b/libcrux-ml-kem/c/libcrux_sha3_neon.h
index 2f179ee38..6e264c84f 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_neon.h
+++ b/libcrux-ml-kem/c/libcrux_sha3_neon.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
  */
 
 #ifndef __libcrux_sha3_neon_H
diff --git a/libcrux-ml-kem/src/invert_ntt.rs b/libcrux-ml-kem/src/invert_ntt.rs
index 7f9506731..87bc90fed 100644
--- a/libcrux-ml-kem/src/invert_ntt.rs
+++ b/libcrux-ml-kem/src/invert_ntt.rs
@@ -102,11 +102,8 @@ pub(crate) fn invert_ntt_at_layer_2<Vector: Operations>(
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
                         (Spec.Utils.is_i16b_array_opaque 3328 
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
-        re.coefficients[round] = Vector::inv_ntt_layer_2_step(
-            re.coefficients[round],
-            zeta(*zeta_i),
-            zeta(*zeta_i - 1),
-        );
+        re.coefficients[round] =
+            Vector::inv_ntt_layer_2_step(re.coefficients[round], zeta(*zeta_i), zeta(*zeta_i - 1));
         *zeta_i -= 1;
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
                         (Spec.Utils.is_i16b_array_opaque 3328 
diff --git a/libcrux-ml-kem/src/mlkem512.rs b/libcrux-ml-kem/src/mlkem512.rs
index 1af827529..b9b33596d 100644
--- a/libcrux-ml-kem/src/mlkem512.rs
+++ b/libcrux-ml-kem/src/mlkem512.rs
@@ -4,17 +4,21 @@ use super::{constants::*, ind_cca::*, types::*, *};
 // Kyber 512 parameters
 const RANK_512: usize = 2;
 const RANKED_BYTES_PER_RING_ELEMENT_512: usize = RANK_512 * BITS_PER_RING_ELEMENT / 8;
-const T_AS_NTT_ENCODED_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8;
+const T_AS_NTT_ENCODED_SIZE_512: usize =
+    (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8;
 const VECTOR_U_COMPRESSION_FACTOR_512: usize = 10;
-const C1_BLOCK_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_U_COMPRESSION_FACTOR_512) / 8;
+const C1_BLOCK_SIZE_512: usize =
+    (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_U_COMPRESSION_FACTOR_512) / 8;
 const C1_SIZE_512: usize = C1_BLOCK_SIZE_512 * RANK_512;
 const VECTOR_V_COMPRESSION_FACTOR_512: usize = 4;
 const C2_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_V_COMPRESSION_FACTOR_512) / 8;
-const CPA_PKE_SECRET_KEY_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8;
+const CPA_PKE_SECRET_KEY_SIZE_512: usize =
+    (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8;
 pub(crate) const CPA_PKE_PUBLIC_KEY_SIZE_512: usize = T_AS_NTT_ENCODED_SIZE_512 + 32;
 const CPA_PKE_CIPHERTEXT_SIZE_512: usize = C1_SIZE_512 + C2_SIZE_512;
 
-pub(crate) const SECRET_KEY_SIZE_512: usize = CPA_PKE_SECRET_KEY_SIZE_512 + CPA_PKE_PUBLIC_KEY_SIZE_512 + H_DIGEST_SIZE + SHARED_SECRET_SIZE;
+pub(crate) const SECRET_KEY_SIZE_512: usize =
+    CPA_PKE_SECRET_KEY_SIZE_512 + CPA_PKE_PUBLIC_KEY_SIZE_512 + H_DIGEST_SIZE + SHARED_SECRET_SIZE;
 
 const ETA1: usize = 3;
 const ETA1_RANDOMNESS_SIZE: usize = ETA1 * 64;
diff --git a/libcrux-ml-kem/src/ntt.rs b/libcrux-ml-kem/src/ntt.rs
index 973a6d945..fa08e35e5 100644
--- a/libcrux-ml-kem/src/ntt.rs
+++ b/libcrux-ml-kem/src/ntt.rs
@@ -114,11 +114,8 @@ pub(crate) fn ntt_at_layer_2<Vector: Operations>(
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
                         (Spec.Utils.is_i16b_array_opaque (11207+4*3328)
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
-        re.coefficients[round] = Vector::ntt_layer_2_step(
-            re.coefficients[round],
-            zeta(*zeta_i),
-            zeta(*zeta_i + 1),
-        );
+        re.coefficients[round] =
+            Vector::ntt_layer_2_step(re.coefficients[round], zeta(*zeta_i), zeta(*zeta_i + 1));
         *zeta_i += 1;
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
                         (Spec.Utils.is_i16b_array_opaque (11207+5*3328)
@@ -172,8 +169,7 @@ pub(crate) fn ntt_at_layer_3<Vector: Operations>(
         hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
                         (Spec.Utils.is_i16b_array_opaque (11207+3*3328)
                         (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))");
-        re.coefficients[round] =
-            Vector::ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i));
+        re.coefficients[round] = Vector::ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i));
         hax_lib::fstar!(
             "reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) 
             (Spec.Utils.is_i16b_array_opaque (11207+4*3328)
diff --git a/libcrux-ml-kem/src/polynomial.rs b/libcrux-ml-kem/src/polynomial.rs
index cb6f0fe8b..5bad1d43a 100644
--- a/libcrux-ml-kem/src/polynomial.rs
+++ b/libcrux-ml-kem/src/polynomial.rs
@@ -213,7 +213,7 @@ impl<Vector: Operations> PolynomialRingElement<Vector> {
     ///
     /// The NIST FIPS 203 standard can be found at
     /// <https://csrc.nist.gov/pubs/fips/203/ipd>.
-    
+
     // TODO: Remove or replace with something that works and is useful for the proof.
     // #[cfg_attr(hax, hax_lib::requires(
     //     hax_lib::forall(|i:usize|
@@ -229,7 +229,7 @@ impl<Vector: Operations> PolynomialRingElement<Vector> {
     #[inline(always)]
     pub(crate) fn ntt_multiply(&self, rhs: &Self) -> Self {
         hax_lib::fstar!("admit ()");
-        
+
         let mut out = PolynomialRingElement::ZERO();
 
         for i in 0..VECTORS_IN_RING_ELEMENT {

From fbef3649fa222b800fc7dcc349855bcd7de48e36 Mon Sep 17 00:00:00 2001
From: karthikbhargavan <karthik.bhargavan@gmail.com>
Date: Tue, 3 Dec 2024 18:47:29 +0000
Subject: [PATCH 4/7] c code refresh

---
 libcrux-ml-kem/cg/code_gen.txt                |   10 +-
 libcrux-ml-kem/cg/libcrux_core.h              |   10 +-
 libcrux-ml-kem/cg/libcrux_ct_ops.h            |   10 +-
 libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h     | 1378 ++++++++++++++---
 libcrux-ml-kem/cg/libcrux_mlkem768_portable.h |  302 +++-
 libcrux-ml-kem/cg/libcrux_sha3_avx2.h         |   28 +-
 libcrux-ml-kem/cg/libcrux_sha3_portable.h     |  104 +-
 7 files changed, 1463 insertions(+), 379 deletions(-)

diff --git a/libcrux-ml-kem/cg/code_gen.txt b/libcrux-ml-kem/cg/code_gen.txt
index 420446603..7e79f022e 100644
--- a/libcrux-ml-kem/cg/code_gen.txt
+++ b/libcrux-ml-kem/cg/code_gen.txt
@@ -1,6 +1,6 @@
 This code was generated with the following revisions:
-Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
-Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
-Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
-F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
-Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
diff --git a/libcrux-ml-kem/cg/libcrux_core.h b/libcrux-ml-kem/cg/libcrux_core.h
index b5a34d0e2..ca8a53171 100644
--- a/libcrux-ml-kem/cg/libcrux_core.h
+++ b/libcrux-ml-kem/cg/libcrux_core.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
  */
 
 #ifndef __libcrux_core_H
diff --git a/libcrux-ml-kem/cg/libcrux_ct_ops.h b/libcrux-ml-kem/cg/libcrux_ct_ops.h
index ddf47bd96..5f693d09c 100644
--- a/libcrux-ml-kem/cg/libcrux_ct_ops.h
+++ b/libcrux-ml-kem/cg/libcrux_ct_ops.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
  */
 
 #ifndef __libcrux_ct_ops_H
diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h
index aa0858642..bb50d3eaf 100644
--- a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h
+++ b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
  */
 
 #ifndef __libcrux_mlkem768_avx2_H
@@ -171,11 +171,16 @@ libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) {
   __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16(
       LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i v_minus_field_modulus =
-      libcrux_intrinsics_avx2_mm256_sub_epi16(vector, field_modulus);
+      libcrux_intrinsics_avx2_mm256_sub_epi16(/* Compute v_i - Q and crate a
+                                                 mask from the sign bit of each
+                                                 of these quantities. */
+                                              vector, field_modulus);
   __m256i sign_mask = libcrux_intrinsics_avx2_mm256_srai_epi16(
       (int32_t)15, v_minus_field_modulus, __m256i);
   __m256i conditional_add_field_modulus =
-      libcrux_intrinsics_avx2_mm256_and_si256(sign_mask, field_modulus);
+      libcrux_intrinsics_avx2_mm256_and_si256(/* If v_i - Q < 0 then add back Q
+                                                 to (v_i - Q). */
+                                              sign_mask, field_modulus);
   return libcrux_intrinsics_avx2_mm256_add_epi16(v_minus_field_modulus,
                                                  conditional_add_field_modulus);
 }
@@ -557,6 +562,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
     __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2,
     int16_t zeta3) {
+  /* Compute the first term of the product */
   __m256i shuffle_with = libcrux_intrinsics_avx2_mm256_set_epi8(
       (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6,
       (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8,
@@ -564,8 +570,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2,
       (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4,
       (int8_t)1, (int8_t)0);
-  __m256i lhs_shuffled =
-      libcrux_intrinsics_avx2_mm256_shuffle_epi8(lhs, shuffle_with);
+  __m256i lhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
+      /* Prepare the left hand side */ lhs, shuffle_with);
   __m256i lhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
       (int32_t)216, lhs_shuffled, __m256i);
   __m128i lhs_evens =
@@ -574,8 +580,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i lhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256(
       (int32_t)1, lhs_shuffled0, __m128i);
   __m256i lhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(lhs_odds);
-  __m256i rhs_shuffled =
-      libcrux_intrinsics_avx2_mm256_shuffle_epi8(rhs, shuffle_with);
+  __m256i rhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
+      /* Prepare the right hand side */ rhs, shuffle_with);
   __m256i rhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
       (int32_t)216, rhs_shuffled, __m256i);
   __m128i rhs_evens =
@@ -584,8 +590,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i rhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256(
       (int32_t)1, rhs_shuffled0, __m128i);
   __m256i rhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(rhs_odds);
-  __m256i left =
-      libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_evens0, rhs_evens0);
+  __m256i left = libcrux_intrinsics_avx2_mm256_mullo_epi32(
+      /* Start operating with them */ lhs_evens0, rhs_evens0);
   __m256i right =
       libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_odds0, rhs_odds0);
   __m256i right0 =
@@ -600,7 +606,7 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(
           products_left);
   __m256i rhs_adjacent_swapped = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
-      rhs,
+      /* Compute the second term of the product */ rhs,
       libcrux_intrinsics_avx2_mm256_set_epi8(
           (int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8,
           (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6,
@@ -615,8 +621,10 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
           products_right);
   __m256i products_right1 = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)16, products_right0, __m256i);
-  return libcrux_intrinsics_avx2_mm256_blend_epi16((int32_t)170, products_left0,
-                                                   products_right1, __m256i);
+  return libcrux_intrinsics_avx2_mm256_blend_epi16(
+      (int32_t)170,
+      /* Combine them into one vector */ products_left0, products_right1,
+      __m256i);
 }
 
 /**
@@ -634,13 +642,60 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09(
 KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1(
     __m256i vector, uint8_t ret[2U]) {
-  __m256i lsb_to_msb =
-      libcrux_intrinsics_avx2_mm256_slli_epi16((int32_t)15, vector, __m256i);
-  __m128i low_msbs = libcrux_intrinsics_avx2_mm256_castsi256_si128(lsb_to_msb);
+  __m256i lsb_to_msb = libcrux_intrinsics_avx2_mm256_slli_epi16(
+      (int32_t)15,
+      /* Suppose |vector| is laid out as follows (superscript number indicates
+         the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀
+         0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least
+         significant bit in each lane, move it to the most significant position
+         to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵
+         d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵
+         n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */
+      vector, __m256i);
+  __m128i low_msbs =
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* Get the first 8 16-bit
+                                                       elements ... */
+                                                    lsb_to_msb);
   __m128i high_msbs = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, lsb_to_msb, __m128i);
-  __m128i msbs = libcrux_intrinsics_avx2_mm_packs_epi16(low_msbs, high_msbs);
-  int32_t bits_packed = libcrux_intrinsics_avx2_mm_movemask_epi8(msbs);
+      (int32_t)1,
+      /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i);
+  __m128i msbs =
+      libcrux_intrinsics_avx2_mm_packs_epi16(/* ... and then pack them into
+                                                8-bit values using signed
+                                                saturation. This function packs
+                                                all the |low_msbs|, and then the
+                                                high ones. low_msbs = a₀0¹⁵
+                                                b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵
+                                                g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵
+                                                j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵
+                                                o₀0¹⁵ p₀0¹⁵ We shifted by 15
+                                                above to take advantage of the
+                                                signed saturation performed by
+                                                mm_packs_epi16: - if the sign
+                                                bit of the 16-bit element being
+                                                packed is 1, the corresponding
+                                                8-bit element in |msbs| will be
+                                                0xFF. - if the sign bit of the
+                                                16-bit element being packed is
+                                                0, the corresponding 8-bit
+                                                element in |msbs| will be 0.
+                                                Thus, if, for example, a₀ = 1,
+                                                e₀ = 1, and p₀ = 1, and every
+                                                other bit is 0, after packing
+                                                into 8 bit value, |msbs| will
+                                                look like: 0xFF 0x00 0x00 0x00 |
+                                                0xFF 0x00 0x00 0x00 | 0x00 0x00
+                                                0x00 0x00 | 0x00 0x00 0x00 0xFF
+                                              */
+                                             low_msbs, high_msbs);
+  int32_t bits_packed =
+      libcrux_intrinsics_avx2_mm_movemask_epi8(/* Now that every element is
+                                                  either 0xFF or 0x00, we just
+                                                  extract the most significant
+                                                  bit from each element and
+                                                  collate them into two bytes.
+                                                */
+                                               msbs);
   uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)};
   memcpy(ret, result, (size_t)2U * sizeof(uint8_t));
 }
@@ -659,18 +714,63 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s(
     int16_t a, int16_t b) {
-  __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16(
-      b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a);
-  __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16(
-      coefficients, libcrux_intrinsics_avx2_mm256_set_epi16(
-                        (int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U,
-                        (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U,
-                        (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U,
-                        (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U,
-                        (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U,
-                        (int16_t)-32768));
-  return libcrux_intrinsics_avx2_mm256_srli_epi16((int32_t)15,
-                                                  coefficients_in_msb, __m256i);
+  __m256i coefficients =
+      libcrux_intrinsics_avx2_mm256_set_epi16(/* We need to take each bit from
+                                                 the 2 bytes of input and put
+                                                 them into their own 16-bit
+                                                 lane. Ideally, we'd load the
+                                                 two bytes into the vector,
+                                                 duplicate them, and right-shift
+                                                 the 0th element by 0 bits, the
+                                                 first element by 1 bit, the
+                                                 second by 2 bits and so on
+                                                 before AND-ing with 0x1 to
+                                                 leave only the least
+                                                 signifinicant bit. But since
+                                                 |_mm256_srlv_epi16| does not
+                                                 exist, so we have to resort to
+                                                 a workaround. Rather than
+                                                 shifting each element by a
+                                                 different amount, we'll
+                                                 multiply each element by a
+                                                 value such that the bit we're
+                                                 interested in becomes the most
+                                                 significant bit. The
+                                                 coefficients are loaded as
+                                                 follows: */
+                                              b, b, b, b, b, b, b, b, a, a, a,
+                                              a, a, a, a, a);
+  __m256i coefficients_in_msb =
+      libcrux_intrinsics_avx2_mm256_mullo_epi16(/* And this vector, when
+                                                   multiplied with the previous
+                                                   one, ensures that the bit
+                                                   we'd like to keep in each
+                                                   lane becomes the most
+                                                   significant bit upon
+                                                   multiplication. */
+                                                coefficients,
+                                                libcrux_intrinsics_avx2_mm256_set_epi16(
+                                                    (int16_t)1 << 8U,
+                                                    (int16_t)1 << 9U,
+                                                    (int16_t)1 << 10U,
+                                                    (int16_t)1 << 11U,
+                                                    (int16_t)1 << 12U,
+                                                    (int16_t)1 << 13U,
+                                                    (int16_t)1 << 14U,
+                                                    (int16_t)-32768,
+                                                    (int16_t)1 << 8U,
+                                                    (int16_t)1 << 9U,
+                                                    (int16_t)1 << 10U,
+                                                    (int16_t)1 << 11U,
+                                                    (int16_t)1 << 12U,
+                                                    (int16_t)1 << 13U,
+                                                    (int16_t)1 << 14U,
+                                                    (int16_t)-32768));
+  return libcrux_intrinsics_avx2_mm256_srli_epi16(
+      (int32_t)15,
+      /* Now that they're all in the most significant bit position, shift them
+         down to the least significant bit. */
+      coefficients_in_msb, __m256i);
 }
 
 KRML_ATTRIBUTE_TARGET("avx2")
@@ -685,7 +785,23 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s(
-      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(
+          bytes,
+          /* We need to take each bit from the 2 bytes of input and put them
+             into their own 16-bit lane. Ideally, we'd load the two bytes into
+             the vector, duplicate them, and right-shift the 0th element by 0
+             bits, the first element by 1 bit, the second by 2 bits and so on
+             before AND-ing with 0x1 to leave only the least signifinicant bit.
+             But since |_mm256_srlv_epi16| does not exist, so we have to resort
+             to a workaround. Rather than shifting each element by a different
+             amount, we'll multiply each element by a value such that the bit
+             we're interested in becomes the most significant bit. The
+             coefficients are loaded as follows: And this vector, when
+             multiplied with the previous one, ensures that the bit we'd like to
+             keep in each lane becomes the most significant bit upon
+             multiplication. Now that they're all in the most significant bit
+             position, shift them down to the least significant bit. */
+          (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *));
 }
 
@@ -721,23 +837,70 @@ static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4(
     __m256i vector, uint8_t ret[8U]) {
   uint8_t serialized[16U] = {0U};
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector);
-  __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
-      adjacent_2_combined,
-      libcrux_intrinsics_avx2_mm256_set_epi8(
-          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-          (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0,
-          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-          (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0));
-  __m256i combined = libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32(
-      adjacent_8_combined, libcrux_intrinsics_avx2_mm256_set_epi32(
-                               (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0,
-                               (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
+          4U,
+          /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D |
+             0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be
+             laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA
+             0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */
+          vector);
+  __m256i adjacent_8_combined =
+      libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* Recall that
+                                                    |adjacent_2_combined| goes
+                                                    as follows: 0x00_00_00_BA
+                                                    0x00_00_00_DC |
+                                                    0x00_00_00_FE 0x00_00_00_HG
+                                                    | ... Out of this, we only
+                                                    need the first byte, the 4th
+                                                    byte, the 8th byte and so on
+                                                    from the bottom and the top
+                                                    128 bits. */
+                                                 adjacent_2_combined,
+                                                 libcrux_intrinsics_avx2_mm256_set_epi8(
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)12, (int8_t)8,
+                                                     (int8_t)4, (int8_t)0,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)12, (int8_t)8,
+                                                     (int8_t)4, (int8_t)0));
+  __m256i combined =
+      libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32(/* |adjacent_8_combined|
+                                                            looks like this: 0:
+                                                            0xHG_FE_DC_BA 1:
+                                                            0x00_00_00_00 | 2:
+                                                            0x00_00_00_00 3:
+                                                            0x00_00_00_00 | 4:
+                                                            0xPO_NM_LK_JI ....
+                                                            We put the element
+                                                            at 4 after the
+                                                            element at 0 ... */
+                                                         adjacent_8_combined,
+                                                         libcrux_intrinsics_avx2_mm256_set_epi32(
+                                                             (int32_t)0,
+                                                             (int32_t)0,
+                                                             (int32_t)0,
+                                                             (int32_t)0,
+                                                             (int32_t)0,
+                                                             (int32_t)0,
+                                                             (int32_t)4,
+                                                             (int32_t)0));
   __m128i combined0 = libcrux_intrinsics_avx2_mm256_castsi256_si128(combined);
   libcrux_intrinsics_avx2_mm_storeu_bytes_si128(
-      Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0);
+      Eurydice_array_to_slice(
+          (size_t)16U,
+          /* ... so that we can read them out in one go. */ serialized,
+          uint8_t),
+      combined0);
   uint8_t ret0[8U];
   Result_15 dst;
   Eurydice_slice_to_array2(
@@ -763,8 +926,33 @@ static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
     int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5,
     int16_t b6, int16_t b7) {
-  __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16(
-      b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, b0);
+  __m256i coefficients =
+      libcrux_intrinsics_avx2_mm256_set_epi16(/* Every 4 bits from each byte of
+                                                 input should be put into its
+                                                 own 16-bit lane. Since
+                                                 |_mm256_srlv_epi16| does not
+                                                 exist, we have to resort to a
+                                                 workaround. Rather than
+                                                 shifting each element by a
+                                                 different amount, we'll
+                                                 multiply each element by a
+                                                 value such that the bits we're
+                                                 interested in become the most
+                                                 significant bits (of an 8-bit
+                                                 value). In this lane, the 4
+                                                 bits we need to put are already
+                                                 the most significant bits of
+                                                 |bytes[7]| (that is, b7). */
+                                              b7,
+                                              /* In this lane, the 4 bits we
+                                                 need to put are the least
+                                                 significant bits, so we need to
+                                                 shift the 4 least-significant
+                                                 bits of |b7| to the most
+                                                 significant bits (of an 8-bit
+                                                 value). */
+                                              b7, b6, b6, b5, b5, b4, b4, b3,
+                                              b3, b2, b2, b1, b1, b0, b0);
   __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16(
       coefficients, libcrux_intrinsics_avx2_mm256_set_epi16(
                         (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
@@ -774,10 +962,14 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
                         (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
                         (int16_t)1 << 4U));
   __m256i coefficients_in_lsb = libcrux_intrinsics_avx2_mm256_srli_epi16(
-      (int32_t)4, coefficients_in_msb, __m256i);
+      (int32_t)4,
+      /* Once the 4-bit coefficients are in the most significant positions (of
+         an 8-bit value), shift them all down by 4. */
+      coefficients_in_msb, __m256i);
   return libcrux_intrinsics_avx2_mm256_and_si256(
-      coefficients_in_lsb, libcrux_intrinsics_avx2_mm256_set1_epi16(
-                               ((int16_t)1 << 4U) - (int16_t)1));
+      /* Zero the remaining bits. */ coefficients_in_lsb,
+      libcrux_intrinsics_avx2_mm256_set1_epi16(((int16_t)1 << 4U) -
+                                               (int16_t)1));
 }
 
 KRML_ATTRIBUTE_TARGET("avx2")
@@ -794,7 +986,23 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s(
-      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(
+          bytes,
+          /* Every 4 bits from each byte of input should be put into its own
+             16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to
+             resort to a workaround. Rather than shifting each element by a
+             different amount, we'll multiply each element by a value such that
+             the bits we're interested in become the most significant bits (of
+             an 8-bit value). In this lane, the 4 bits we need to put are
+             already the most significant bits of |bytes[7]| (that is, b7). In
+             this lane, the 4 bits we need to put are the least significant
+             bits, so we need to shift the 4 least-significant bits of |b7| to
+             the most significant bits (of an 8-bit value). These constants are
+             chosen to shift the bits of the values that we loaded into
+             |coefficients|. Once the 4-bit coefficients are in the most
+             significant positions (of an 8-bit value), shift them all down
+             by 4. Zero the remaining bits. */
+          (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *),
@@ -818,35 +1026,106 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5(
     __m256i vector, uint8_t ret[10U]) {
   uint8_t serialized[32U] = {0U};
-  __m256i adjacent_2_combined = libcrux_intrinsics_avx2_mm256_madd_epi16(
-      vector, libcrux_intrinsics_avx2_mm256_set_epi16(
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1));
-  __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32(
-      adjacent_2_combined,
-      libcrux_intrinsics_avx2_mm256_set_epi32(
-          (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, (int32_t)0,
-          (int32_t)22, (int32_t)0, (int32_t)22));
+  __m256i adjacent_2_combined =
+      libcrux_intrinsics_avx2_mm256_madd_epi16(/* If |vector| is laid out as
+                                                  follows (superscript number
+                                                  indicates the corresponding
+                                                  bit is duplicated that many
+                                                  times): 0¹¹a₄a₃a₂a₁a₀
+                                                  0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀
+                                                  0¹¹d₄d₃d₂d₁d₀ | ↩
+                                                  0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀
+                                                  0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ |
+                                                  ↩ |adjacent_2_combined| will
+                                                  be laid out as a series of
+                                                  32-bit integers, as follows:
+                                                  0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
+                                                  0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩
+                                                  0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
+                                                  0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
+                                                  .... */
+                                               vector,
+                                               libcrux_intrinsics_avx2_mm256_set_epi16(
+                                                   (int16_t)1 << 5U, (int16_t)1,
+                                                   (int16_t)1 << 5U, (int16_t)1,
+                                                   (int16_t)1 << 5U, (int16_t)1,
+                                                   (int16_t)1 << 5U, (int16_t)1,
+                                                   (int16_t)1 << 5U, (int16_t)1,
+                                                   (int16_t)1 << 5U, (int16_t)1,
+                                                   (int16_t)1 << 5U, (int16_t)1,
+                                                   (int16_t)1 << 5U,
+                                                   (int16_t)1));
+  __m256i adjacent_4_combined =
+      libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Recall that
+                                                  |adjacent_2_combined| is laid
+                                                  out as follows:
+                                                  0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
+                                                  0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩
+                                                  0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
+                                                  0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
+                                                  .... This shift results in:
+                                                  b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²²
+                                                  0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩
+                                                  f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²²
+                                                  0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
+                                                  .... */
+                                               adjacent_2_combined,
+                                               libcrux_intrinsics_avx2_mm256_set_epi32(
+                                                   (int32_t)0, (int32_t)22,
+                                                   (int32_t)0, (int32_t)22,
+                                                   (int32_t)0, (int32_t)22,
+                                                   (int32_t)0, (int32_t)22));
   __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64(
-      (int32_t)22, adjacent_4_combined, __m256i);
+      (int32_t)22,
+      /* |adjacent_4_combined|, when viewed as 64-bit lanes, is:
+         0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩
+         0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift
+         down by 22 bits to remove the least significant 0 bits that aren't part
+         of the bits we need. */
+      adjacent_4_combined, __m256i);
   __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi32(
-      (int32_t)8, adjacent_4_combined0, __m256i);
-  __m256i adjacent_8_combined0 = libcrux_intrinsics_avx2_mm256_sllv_epi32(
-      adjacent_8_combined,
-      libcrux_intrinsics_avx2_mm256_set_epi32(
-          (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, (int32_t)0,
-          (int32_t)0, (int32_t)0, (int32_t)12));
+      (int32_t)8,
+      /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks
+         like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³²
+         2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to
+         read out the bytes in one go, we need to shifts the bits in position 2
+         to position 1 in each 128-bit lane. */
+      adjacent_4_combined0, __m256i);
+  __m256i adjacent_8_combined0 =
+      libcrux_intrinsics_avx2_mm256_sllv_epi32(/* |adjacent_8_combined|, when
+                                                  viewed as a set of 32-bit
+                                                  values, now looks like:
+                                                  0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
+                                                  0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
+                                                  0³² 0³² | ↩ Once again, we
+                                                  line these bits up by shifting
+                                                  the up values at indices 0 and
+                                                  5 by 12, viewing the resulting
+                                                  register as a set of 64-bit
+                                                  values, and then shifting down
+                                                  the 64-bit values by 12 bits.
+                                                */
+                                               adjacent_8_combined,
+                                               libcrux_intrinsics_avx2_mm256_set_epi32(
+                                                   (int32_t)0, (int32_t)0,
+                                                   (int32_t)0, (int32_t)12,
+                                                   (int32_t)0, (int32_t)0,
+                                                   (int32_t)0, (int32_t)12));
   __m256i adjacent_8_combined1 = libcrux_intrinsics_avx2_mm256_srli_epi64(
       (int32_t)12, adjacent_8_combined0, __m256i);
   __m128i lower_8 =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined1);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 40 bits
+                                                       starting at position 0 in
+                                                       the lower 128-bit lane,
+                                                       ... */
+                                                    adjacent_8_combined1);
   libcrux_intrinsics_avx2_mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t),
       lower_8);
   __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, adjacent_8_combined1, __m128i);
+      (int32_t)1,
+      /* ... and the second 40 bits at position 0 in the upper 128-bit lane */
+      adjacent_8_combined1, __m128i);
   libcrux_intrinsics_avx2_mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t),
       upper_8);
@@ -952,27 +1231,87 @@ static inline core_core_arch_x86___m128i_x2
 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
     __m256i vector) {
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector);
-  __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32(
-      adjacent_2_combined,
-      libcrux_intrinsics_avx2_mm256_set_epi32(
-          (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, (int32_t)0,
-          (int32_t)12, (int32_t)0, (int32_t)12));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
+          10U,
+          /* If |vector| is laid out as follows (superscript number indicates
+             the corresponding bit is duplicated that many times):
+             0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
+             0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩
+             0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
+             0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ...
+             |adjacent_2_combined| will be laid out as a series of 32-bit
+             integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+             0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩
+             0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+             0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */
+          vector);
+  __m256i adjacent_4_combined =
+      libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Shifting up the values at the
+                                                  even indices by 12, we get:
+                                                  b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+                                                  0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
+                                                  | ↩
+                                                  f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+                                                  0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
+                                                  | ↩ ... */
+                                               adjacent_2_combined,
+                                               libcrux_intrinsics_avx2_mm256_set_epi32(
+                                                   (int32_t)0, (int32_t)12,
+                                                   (int32_t)0, (int32_t)12,
+                                                   (int32_t)0, (int32_t)12,
+                                                   (int32_t)0, (int32_t)12));
   __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64(
-      (int32_t)12, adjacent_4_combined, __m256i);
-  __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
-      adjacent_4_combined0,
-      libcrux_intrinsics_avx2_mm256_set_epi8(
-          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-          (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8,
-          (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, (int8_t)-1,
-          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-          (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4,
-          (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0));
+      (int32_t)12,
+      /* Viewing this as a set of 64-bit integers we get:
+         0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+         | ↩
+         0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+         | ↩ ... Shifting down by 12 gives us:
+         0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+         | ↩
+         0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+         | ↩ ... */
+      adjacent_4_combined, __m256i);
+  __m256i adjacent_8_combined =
+      libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* |adjacent_4_combined|, when
+                                                    the bottom and top 128
+                                                    bit-lanes are grouped into
+                                                    bytes, looks like:
+                                                    0₇0₆0₅B₄B₃B₂B₁B₀ | ↩
+                                                    0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩
+                                                    In each 128-bit lane, we
+                                                    want to put bytes 8, 9, 10,
+                                                    11, 12 after bytes 0, 1, 2,
+                                                    3 to allow for sequential
+                                                    reading. */
+                                                 adjacent_4_combined0,
+                                                 libcrux_intrinsics_avx2_mm256_set_epi8(
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)12, (int8_t)11,
+                                                     (int8_t)10, (int8_t)9,
+                                                     (int8_t)8, (int8_t)4,
+                                                     (int8_t)3, (int8_t)2,
+                                                     (int8_t)1, (int8_t)0,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)-1, (int8_t)-1,
+                                                     (int8_t)12, (int8_t)11,
+                                                     (int8_t)10, (int8_t)9,
+                                                     (int8_t)8, (int8_t)4,
+                                                     (int8_t)3, (int8_t)2,
+                                                     (int8_t)1, (int8_t)0));
   __m128i lower_8 =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 64 bits
+                                                       starting at position 0 in
+                                                       the lower 128-bit lane,
+                                                       ... */
+                                                    adjacent_8_combined);
   __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, adjacent_8_combined, __m128i);
+      (int32_t)1,
+      /* and 64 bits starting at position 0 in the upper 128-bit lane. */
+      adjacent_8_combined, __m128i);
   return (
       CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8});
 }
@@ -981,8 +1320,167 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10(
     __m256i vector, uint8_t ret[20U]) {
   core_core_arch_x86___m128i_x2 uu____0 =
-      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
-          vector);
+      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If
+                                                                            |vector|
+                                                                            is
+                                                                            laid
+                                                                            out
+                                                                            as
+                                                                            follows
+                                                                            (superscript
+                                                                            number
+                                                                            indicates
+                                                                            the
+                                                                            corresponding
+                                                                            bit
+                                                                            is
+                                                                            duplicated
+                                                                            that
+                                                                            many
+                                                                            times):
+                                                                            0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+                                                                            0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
+                                                                            0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
+                                                                            0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀
+                                                                            | ↩
+                                                                            0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+                                                                            0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
+                                                                            0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
+                                                                            0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀
+                                                                            | ↩
+                                                                            ...
+                                                                            |adjacent_2_combined|
+                                                                            will
+                                                                            be
+                                                                            laid
+                                                                            out
+                                                                            as a
+                                                                            series
+                                                                            of
+                                                                            32-bit
+                                                                            integers,
+                                                                            as
+                                                                            follows:
+                                                                            0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
+                                                                            | ↩
+                                                                            0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
+                                                                            | ↩
+                                                                            ....
+                                                                            Shifting
+                                                                            up
+                                                                            the
+                                                                            values
+                                                                            at
+                                                                            the
+                                                                            even
+                                                                            indices
+                                                                            by
+                                                                            12,
+                                                                            we
+                                                                            get:
+                                                                            b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
+                                                                            | ↩
+                                                                            f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
+                                                                            | ↩
+                                                                            ...
+                                                                            Viewing
+                                                                            this
+                                                                            as a
+                                                                            set
+                                                                            of
+                                                                            64-bit
+                                                                            integers
+                                                                            we
+                                                                            get:
+                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
+                                                                            | ↩
+                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
+                                                                            | ↩
+                                                                            ...
+                                                                            Shifting
+                                                                            down
+                                                                            by
+                                                                            12
+                                                                            gives
+                                                                            us:
+                                                                            0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
+                                                                            | ↩
+                                                                            0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
+                                                                            | ↩
+                                                                            ...
+                                                                            |adjacent_4_combined|,
+                                                                            when
+                                                                            the
+                                                                            bottom
+                                                                            and
+                                                                            top
+                                                                            128
+                                                                            bit-lanes
+                                                                            are
+                                                                            grouped
+                                                                            into
+                                                                            bytes,
+                                                                            looks
+                                                                            like:
+                                                                            0₇0₆0₅B₄B₃B₂B₁B₀
+                                                                            | ↩
+                                                                            0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈
+                                                                            | ↩
+                                                                            In
+                                                                            each
+                                                                            128-bit
+                                                                            lane,
+                                                                            we
+                                                                            want
+                                                                            to
+                                                                            put
+                                                                            bytes
+                                                                            8,
+                                                                            9,
+                                                                            10,
+                                                                            11,
+                                                                            12
+                                                                            after
+                                                                            bytes
+                                                                            0,
+                                                                            1,
+                                                                            2, 3
+                                                                            to
+                                                                            allow
+                                                                            for
+                                                                            sequential
+                                                                            reading.
+                                                                            We
+                                                                            now
+                                                                            have
+                                                                            64
+                                                                            bits
+                                                                            starting
+                                                                            at
+                                                                            position
+                                                                            0 in
+                                                                            the
+                                                                            lower
+                                                                            128-bit
+                                                                            lane,
+                                                                            ...
+                                                                            and
+                                                                            64
+                                                                            bits
+                                                                            starting
+                                                                            at
+                                                                            position
+                                                                            0 in
+                                                                            the
+                                                                            upper
+                                                                            128-bit
+                                                                            lane.
+                                                                          */
+                                                                         vector);
   __m128i lower_8 = uu____0.fst;
   __m128i upper_8 = uu____0.snd;
   uint8_t serialized[32U] = {0U};
@@ -1038,16 +1536,20 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
                         (int16_t)1 << 6U));
   __m256i coefficients1 = libcrux_intrinsics_avx2_mm256_srli_epi16(
       (int32_t)6, coefficients0, __m256i);
-  return libcrux_intrinsics_avx2_mm256_and_si256(
-      coefficients1, libcrux_intrinsics_avx2_mm256_set1_epi16(
-                         ((int16_t)1 << 10U) - (int16_t)1));
+  return libcrux_intrinsics_avx2_mm256_and_si256(/* Here I can prove this `and`
+                                                    is not useful */
+                                                 coefficients1,
+                                                 libcrux_intrinsics_avx2_mm256_set1_epi16(
+                                                     ((int16_t)1 << 10U) -
+                                                     (int16_t)1));
 }
 
 KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) {
-  Eurydice_slice lower_coefficients =
-      Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t);
+  Eurydice_slice lower_coefficients = Eurydice_slice_subslice2(
+      /* Here I can prove this `and` is not useful */ bytes, (size_t)0U,
+      (size_t)16U, uint8_t);
   Eurydice_slice upper_coefficients =
       Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t);
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
@@ -1233,28 +1735,70 @@ libcrux_ml_kem_vector_avx2_sampling_rejection_sample(Eurydice_slice input,
   __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16(
       LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i potential_coefficients =
-      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input);
+      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can
+                                                             be interpreted as a
+                                                             sequence of
+                                                             serialized 12-bit
+                                                             (i.e. uncompressed)
+                                                             coefficients. Not
+                                                             all coefficients
+                                                             may be less than
+                                                             FIELD_MODULUS
+                                                             though. */
+                                                          input);
   __m256i compare_with_field_modulus =
-      libcrux_intrinsics_avx2_mm256_cmpgt_epi16(field_modulus,
+      libcrux_intrinsics_avx2_mm256_cmpgt_epi16(/* Suppose we view
+                                                   |potential_coefficients| as
+                                                   follows (grouping 64-bit
+                                                   elements): A B C D | E F G H
+                                                   | .... and A < 3329, D < 3329
+                                                   and H < 3329,
+                                                   |compare_with_field_modulus|
+                                                   will look like: 0xFF 0 0 0xFF
+                                                   | 0 0 0 0xFF | ... */
+                                                field_modulus,
                                                 potential_coefficients);
   uint8_t good[2U];
-  libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus,
+  libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each
+                                                      lane is either 0 or 1, we
+                                                      only need one bit from
+                                                      each lane in the register
+                                                      to tell us what
+                                                      coefficients to keep and
+                                                      what to throw-away.
+                                                      Combine all the bits
+                                                      (there are 16) into two
+                                                      bytes. */
+                                                   compare_with_field_modulus,
                                                    good);
   uint8_t lower_shuffles[16U];
   memcpy(lower_shuffles,
+         /* Each bit (and its corresponding position) represents an element we
+            want to sample. We'd like all such elements to be next to each other
+            starting at index 0, so that they can be read from the vector
+            easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level
+            shuffling indices needed to make this happen. For e.g. if good[0] =
+            0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit
+            lane to the first. To do this, we need the byte-level shuffle
+            indices to be 2 3 X X X X ... */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[0U]],
          (size_t)16U * sizeof(uint8_t));
-  __m128i lower_shuffles0 = libcrux_intrinsics_avx2_mm_loadu_si128(
-      Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t));
+  __m128i lower_shuffles0 =
+      libcrux_intrinsics_avx2_mm_loadu_si128(Eurydice_array_to_slice(
+          (size_t)16U,
+          /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles,
+          uint8_t));
   __m128i lower_coefficients =
       libcrux_intrinsics_avx2_mm256_castsi256_si128(potential_coefficients);
   __m128i lower_coefficients0 = libcrux_intrinsics_avx2_mm_shuffle_epi8(
       lower_coefficients, lower_shuffles0);
-  libcrux_intrinsics_avx2_mm_storeu_si128(output, lower_coefficients0);
+  libcrux_intrinsics_avx2_mm_storeu_si128(
+      /* ... then write them out ... */ output, lower_coefficients0);
   size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]);
   uint8_t upper_shuffles[16U];
   memcpy(upper_shuffles,
+         /* Do the same for |goood[1]| */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[1U]],
          (size_t)16U * sizeof(uint8_t));
@@ -1435,7 +1979,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)10);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- */
+                                                    vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1445,11 +1991,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)10, decompressed_low1, __m256i);
+      (int32_t)10,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1459,13 +2009,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)10, decompressed_high1, __m256i);
+      (int32_t)10,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      decompressed_low3, decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                decompressed_low3,
+                                                decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -1531,7 +2097,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)11);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- */
+                                                    vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1541,11 +2109,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)11, decompressed_low1, __m256i);
+      (int32_t)11,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1555,13 +2127,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)11, decompressed_high1, __m256i);
+      (int32_t)11,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      decompressed_low3, decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                decompressed_low3,
+                                                decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -1665,7 +2253,13 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer, size_t _initial_coefficient_bound) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -1676,7 +2270,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(
       libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 =
           libcrux_ml_kem_ntt_ntt_layer_int_vec_step_61(
               re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       __m256i x = uu____0.fst;
       __m256i y = uu____0.snd;
       re->coefficients[j] = x;
@@ -1699,8 +2293,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_3_61(
     size_t round = i;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_3_step_09(
-        re->coefficients[round],
-        libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+        re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
   }
 }
 
@@ -1718,8 +2311,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_2_61(
     size_t round = i;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_2_step_09(
-        re->coefficients[round], libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-        libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U));
+        re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+        libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U));
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
   }
 }
@@ -1738,10 +2331,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_1_61(
     size_t round = i;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_1_step_09(
-        re->coefficients[round], libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-        libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U),
-        libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U),
-        libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U));
+        re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+        libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U),
+        libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U),
+        libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U));
     zeta_i[0U] = zeta_i[0U] + (size_t)3U;
   }
 }
@@ -1761,7 +2354,11 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     self->coefficients[i0] =
         libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]);
@@ -1858,7 +2455,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)4);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- */
+                                                    vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1868,11 +2467,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)4, decompressed_low1, __m256i);
+      (int32_t)4,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1882,13 +2485,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)4, decompressed_high1, __m256i);
+      (int32_t)4,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      decompressed_low3, decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                decompressed_low3,
+                                                decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -1949,7 +2568,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)5);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- */
+                                                    vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1959,11 +2580,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)5, decompressed_low1, __m256i);
+      (int32_t)5,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1973,13 +2598,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)5, decompressed_high1, __m256i);
+      (int32_t)5,
+      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
+         support for const generic expressions. */
+      decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      decompressed_low3, decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                decompressed_low3,
+                                                decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -2062,13 +2703,13 @@ libcrux_ml_kem_polynomial_ntt_multiply_ef_61(
     size_t i0 = i;
     out.coefficients[i0] = libcrux_ml_kem_vector_avx2_ntt_multiply_09(
         &self->coefficients[i0], &rhs->coefficients[i0],
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0),
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                           (size_t)1U),
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                           (size_t)2U),
-        libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                           (size_t)3U));
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0),
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                       (size_t)1U),
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                       (size_t)2U),
+        libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                       (size_t)3U));
   }
   return out;
 }
@@ -2089,9 +2730,14 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i < Eurydice_slice_len(Eurydice_array_to_slice(
-                                  (size_t)16U, self->coefficients, __m256i),
-                              __m256i);
+       i <
+       Eurydice_slice_len(Eurydice_array_to_slice(
+                              (size_t)16U,
+                              /* The semicolon and parentheses at the end of
+                                 loop are a workaround for the following bug
+                                 https://github.com/hacspec/hax/issues/720 */
+                              self->coefficients, __m256i),
+                          __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -2114,11 +2760,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_61(
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     re->coefficients[round] =
         libcrux_ml_kem_vector_avx2_inv_ntt_layer_1_step_09(
-            re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U));
+            re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U));
     zeta_i[0U] = zeta_i[0U] - (size_t)3U;
   }
 }
@@ -2138,9 +2783,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_61(
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     re->coefficients[round] =
         libcrux_ml_kem_vector_avx2_inv_ntt_layer_2_step_09(
-            re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U));
+            re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U));
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
   }
 }
@@ -2161,7 +2805,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_61(
     re->coefficients[round] =
         libcrux_ml_kem_vector_avx2_inv_ntt_layer_3_step_09(
             re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
   }
 }
 
@@ -2196,7 +2840,13 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -2209,7 +2859,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_61(
       libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 =
           libcrux_ml_kem_invert_ntt_inv_ntt_layer_int_vec_step_reduce_61(
               re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       __m256i x = uu____0.fst;
       __m256i y = uu____0.snd;
       re->coefficients[j] = x;
@@ -2228,7 +2878,10 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_61(&zeta_i, re, (size_t)1U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_61(&zeta_i, re, (size_t)2U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_61(&zeta_i, re, (size_t)3U);
@@ -2423,11 +3076,16 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_2f(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *secret_key,
     uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(ciphertext, u_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(/* u :=
+                                                             Decompress_q(Decode_{d_u}(c),
+                                                             d_u) */
+                                                          ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_ed(
-          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
-                                          (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)1088U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       libcrux_ml_kem_matrix_compute_message_ab(&v, secret_key->secret_as_ntt,
                                                u_as_ntt);
@@ -2450,7 +3108,8 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_2f(
     Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(secret_key, secret_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(
+      /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U];
   memcpy(
@@ -2999,6 +3658,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_6c(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -3065,7 +3728,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_6c(
          i++) {
       size_t j = i;
       libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-      if (transpose) {
+      if (/* A[i][j] = A_transpose[j][i] */ transpose) {
         A_transpose[j][i1] = sample;
       } else {
         A_transpose[i1][j] = sample;
@@ -3087,12 +3750,15 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_fa(
     Eurydice_slice public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63
         *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab(
       uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -3362,7 +4028,12 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U; i < step; i++) {
+  for (size_t i = (size_t)0U;
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       step;
+       i++) {
     size_t j = i;
     __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09(
         re->coefficients[j + step], (int16_t)-1600);
@@ -3383,7 +4054,10 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void
 libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
-  libcrux_ml_kem_ntt_ntt_at_layer_7_61(re);
+  libcrux_ml_kem_ntt_ntt_at_layer_7_61(/* Due to the small coefficient bound, we
+                                          can skip the first round of Montgomery
+                                          reductions. */
+                                       re);
   size_t zeta_i = (size_t)1U;
   libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U,
                                             (size_t)11207U);
@@ -3594,7 +4268,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
@@ -3717,8 +4395,26 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_61(
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
             result.coefficients[i0], (int16_t)1441);
-    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0],
-                                                    &message->coefficients[i0]);
+    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(
+        self->coefficients
+            [/* FIXME: Eurydice crashes with: Warning 11: in top-level
+                declaration
+                libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
+                this expression is not Low*; the enclosing function cannot be
+                translated into C*: let mutable ret(Mark.Present,(Mark.AtMost
+                2), ): int16_t[16size_t] = $any in
+                libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
+                ((@9:
+                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
+                &(((@8:
+                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
+                @0; @0 Warning 11 is fatal, exiting. On the following code:
+                ```rust result.coefficients[i] =
+                Vector::barrett_reduce(Vector::add( coefficient_normal_form,
+                &Vector::add(self.coefficients[i], &message.coefficients[i]),
+                )); ``` */
+             i0],
+        &message->coefficients[i0]);
     __m256i tmp0 =
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp);
     result.coefficients[i0] =
@@ -3776,9 +4472,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- Take
+                                                       the bottom 128 bits, i.e.
+                                                       the first 8 16-bit
+                                                       coefficients */
+                                                    vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
+                                                      = A
+                                                      coefficients_low[16:31] =
+                                                      B coefficients_low[32:63]
+                                                      = C and so on ... after
+                                                      this step:
+                                                      coefficients_low[0:31] = A
+                                                      coefficients_low[32:63] =
+                                                      B and so on ... */
+                                                   coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)10, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -3787,11 +4497,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3, compressed_low1, __m256i);
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -3805,10 +4521,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      compressed_low3, compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                compressed_low3,
+                                                compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -3872,9 +4601,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- Take
+                                                       the bottom 128 bits, i.e.
+                                                       the first 8 16-bit
+                                                       coefficients */
+                                                    vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
+                                                      = A
+                                                      coefficients_low[16:31] =
+                                                      B coefficients_low[32:63]
+                                                      = C and so on ... after
+                                                      this step:
+                                                      coefficients_low[0:31] = A
+                                                      coefficients_low[32:63] =
+                                                      B and so on ... */
+                                                   coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)11, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -3883,11 +4626,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3, compressed_low1, __m256i);
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -3901,10 +4650,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      compressed_low3, compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                compressed_low3,
+                                                compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -4020,9 +4782,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- Take
+                                                       the bottom 128 bits, i.e.
+                                                       the first 8 16-bit
+                                                       coefficients */
+                                                    vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
+                                                      = A
+                                                      coefficients_low[16:31] =
+                                                      B coefficients_low[32:63]
+                                                      = C and so on ... after
+                                                      this step:
+                                                      coefficients_low[0:31] = A
+                                                      coefficients_low[32:63] =
+                                                      B and so on ... */
+                                                   coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)4, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -4031,11 +4807,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3, compressed_low1, __m256i);
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -4049,10 +4831,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      compressed_low3, compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                compressed_low3,
+                                                compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -4083,7 +4878,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     __m256i coefficient = libcrux_ml_kem_vector_avx2_compress_09_d1(
         libcrux_ml_kem_serialize_to_unsigned_field_modulus_61(
@@ -4115,9 +4914,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
+                                                       coefficients ---- Take
+                                                       the bottom 128 bits, i.e.
+                                                       the first 8 16-bit
+                                                       coefficients */
+                                                    vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
+                                                      = A
+                                                      coefficients_low[16:31] =
+                                                      B coefficients_low[32:63]
+                                                      = C and so on ... after
+                                                      this step:
+                                                      coefficients_low[0:31] = A
+                                                      coefficients_low[32:63] =
+                                                      B and so on ... */
+                                                   coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)5, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -4126,11 +4939,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3, compressed_low1, __m256i);
+      (int32_t)3,
+      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
+         just need to shift right by 35 - 32 = 3 more. */
+      compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1, vector, __m128i);
+      (int32_t)1,
+      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
+         i.e. the next 8 16-bit coefficients */
+      vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -4144,10 +4963,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
-      compressed_low3, compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
-                                                         compressed, __m256i);
+  __m256i compressed =
+      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
+                                                   each set of 64-bits, this
+                                                   function results in: 0: low
+                                                   low low low | 1: high high
+                                                   high high | 2: low low low
+                                                   low | 3: high high high high
+                                                   where each |low| and |high|
+                                                   is a 16-bit element */
+                                                compressed_low3,
+                                                compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
+      (int32_t)216,
+      /* To be in the right order, we need to move the |low|s above in position
+         2 to position 1 and the |high|s in position 1 to position 2, and leave
+         the rest unchanged. */
+      compressed, __m256i);
 }
 
 /**
@@ -4178,7 +5010,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     __m256i coefficients = libcrux_ml_kem_vector_avx2_compress_09_f4(
         libcrux_ml_kem_vector_traits_to_unsigned_representative_61(
@@ -4270,7 +5106,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key,
     uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -4283,6 +5122,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_230 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_b4(
       copy_of_prf_input, domain_separator0);
@@ -4291,7 +5131,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   libcrux_ml_kem_hash_functions_avx2_PRF_a9_410(
       Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output);
@@ -4299,10 +5139,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
       libcrux_ml_kem_sampling_sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U];
-  libcrux_ml_kem_matrix_compute_vector_u_ab(public_key->A, r_as_ntt, error_1,
+  libcrux_ml_kem_matrix_compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */
+                                            public_key->A, r_as_ntt, error_1,
                                             u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       libcrux_ml_kem_serialize_deserialize_then_decompress_message_61(
@@ -4312,12 +5154,14 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
           public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   libcrux_ml_kem_ind_cpa_compress_then_serialize_u_8c(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_ed(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -4460,17 +5304,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_a1(
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0,
                               uint8_t),
       ciphertext, implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  libcrux_ml_kem_variant_kdf_d8_ae(shared_secret0, ciphertext, shared_secret1);
   uint8_t shared_secret[32U];
+  libcrux_ml_kem_variant_kdf_d8_ae(shared_secret0, ciphertext, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_80(ciphertext),
       Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -4810,11 +5654,18 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_traits_to_standard_domain_61(
-            self->coefficients[j]);
+            self->coefficients[/* The coefficients are of the form aR^{-1} mod
+                                  q, which means calling to_montgomery_domain()
+                                  on them should return a mod q. */
+                               j]);
     self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form,
                                           &error->coefficients[j]));
@@ -4845,6 +5696,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_ab(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 =
         libcrux_ml_kem_polynomial_ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
@@ -4925,7 +5778,9 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
+                                                  := G(d || K) for ML-KEM */
+                                               key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -4958,8 +5813,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   libcrux_ml_kem_matrix_compute_As_plus_e_ab(
-      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
-      error_as_ntt);
+      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
+      private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -5095,12 +5950,18 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_8c(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key) {
   uint8_t public_key_serialized[1184U];
-  libcrux_ml_kem_ind_cpa_serialize_public_key_ed(
-      public_key->t_as_ntt,
-      Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
-      public_key_serialized);
+  libcrux_ml_kem_ind_cpa_serialize_public_key_ed(/* pk := (Encode_12(tˆ
+                                                    mod^{+}q) || ρ) */
+                                                 public_key->t_as_ntt,
+                                                 Eurydice_array_to_slice(
+                                                     (size_t)32U,
+                                                     public_key->seed_for_A,
+                                                     uint8_t),
+                                                 public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(private_key->secret_as_ntt,
+  libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(/* sk := Encode_12(sˆ mod^{+}q)
+                                                  */
+                                                 private_key->secret_as_ntt,
                                                  secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
@@ -5442,17 +6303,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_a10(
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0,
                               uint8_t),
       ciphertext, implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  libcrux_ml_kem_variant_kdf_33_ae(shared_secret0, ciphertext, shared_secret1);
   uint8_t shared_secret[32U];
+  libcrux_ml_kem_variant_kdf_33_ae(shared_secret0, ciphertext, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_80(ciphertext),
       Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -5772,7 +6633,9 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_33_be(key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_33_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
+                                                  := G(d || K) for ML-KEM */
+                                               key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -5805,8 +6668,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   libcrux_ml_kem_matrix_compute_As_plus_e_ab(
-      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
-      error_as_ntt);
+      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
+      private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -5971,7 +6834,10 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
   libcrux_ml_kem_hash_functions_avx2_H_a9_e0(
-      Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U,
+      Eurydice_array_to_subslice2(/* Eurydice can't access values directly on
+                                     the types. We need to go to the `value`
+                                     directly. */
+                                  private_key->value, (size_t)384U * (size_t)3U,
                                   (size_t)768U * (size_t)3U + (size_t)32U,
                                   uint8_t),
       t);
@@ -6931,6 +7797,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_b3(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -6998,7 +7868,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_b3(
          i++) {
       size_t j = i;
       libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-      if (transpose) {
+      if (/* A[i][j] = A_transpose[j][i] */ transpose) {
         A_transpose[j][i1] = sample;
       } else {
         A_transpose[i1][j] = sample;
@@ -7021,12 +7891,15 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_bf(
     Eurydice_slice public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63
         *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab(
       uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -7061,7 +7934,10 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_e2(
   Eurydice_slice ind_cpa_public_key_hash = uu____0.thd;
   Eurydice_slice implicit_rejection_value = uu____0.f3;
   Eurydice_slice uu____1 = Eurydice_array_to_slice(
-      (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
+      (size_t)3U,
+      /* XXX: We need to copy_from_slice here because karamel can't handle the
+         assignment cf. https://github.com/FStarLang/karamel/pull/491 */
+      key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
       libcrux_ml_kem_polynomial_PolynomialRingElement_f6);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 ret[3U];
   libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(ind_cpa_secret_key, ret);
diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h
index 519b51565..7a9446452 100644
--- a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h
+++ b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
  */
 
 #ifndef __libcrux_mlkem768_portable_H
@@ -87,7 +87,7 @@ static const int16_t libcrux_ml_kem_polynomial_ZETAS_TIMES_MONTGOMERY_R[128U] =
      (int16_t)-108,  (int16_t)-308,  (int16_t)996,   (int16_t)991,
      (int16_t)958,   (int16_t)-1460, (int16_t)1522,  (int16_t)1628};
 
-static KRML_MUSTINLINE int16_t libcrux_ml_kem_polynomial_get_zeta(size_t i) {
+static KRML_MUSTINLINE int16_t libcrux_ml_kem_polynomial_zeta(size_t i) {
   return libcrux_ml_kem_polynomial_ZETAS_TIMES_MONTGOMERY_R[i];
 }
 
@@ -1235,11 +1235,28 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
 static inline uint8_t
 libcrux_ml_kem_vector_portable_compress_compress_message_coefficient(
     uint16_t fe) {
-  int16_t shifted = (int16_t)1664 - (int16_t)fe;
-  int16_t mask = shifted >> 15U;
+  int16_t shifted =
+      (int16_t)1664 -
+      (int16_t) /* The approach used here is inspired by:
+                   https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150
+                   If 833 <= fe <= 2496, then -832 <= shifted <= 831 */
+      fe;
+  int16_t mask =
+      /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) =
+         -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive
+         <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so
+         if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */
+      shifted
+
+      >> 15U;
   int16_t shifted_to_positive = mask ^ shifted;
   int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832;
-  int16_t r0 = shifted_positive_in_range >> 15U;
+  int16_t r0 =
+      /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the
+         most significant bit of shifted_positive_in_range will be 1. */
+      shifted_positive_in_range
+
+      >> 15U;
   int16_t r1 = r0 & (int16_t)1;
   return (uint8_t)r1;
 }
@@ -1276,7 +1293,16 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits(
 static inline int16_t
 libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient(
     uint8_t coefficient_bits, uint16_t fe) {
-  uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits;
+  uint64_t compressed =
+      (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits
+                    == 5 || coefficient_bits == 10 || coefficient_bits == 11 );
+                    hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to
+                    be constant time due to:
+                    https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ
+                  */
+      fe
+
+      << (uint32_t)coefficient_bits;
   compressed = compressed + 1664ULL;
   compressed = compressed * 10321340ULL;
   compressed = compressed >> 35U;
@@ -2878,7 +2904,13 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer, size_t _initial_coefficient_bound) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -2889,7 +2921,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(
       libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 =
           libcrux_ml_kem_ntt_ntt_layer_int_vec_step_8c(
               re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst;
       libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd;
       re->coefficients[j] = x;
@@ -2913,7 +2945,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_3_8c(
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_ntt_layer_3_step_0d(
             re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
     re->coefficients[round] = uu____0;
   }
 }
@@ -2932,9 +2964,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_2_8c(
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     re->coefficients[round] =
         libcrux_ml_kem_vector_portable_ntt_layer_2_step_0d(
-            re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U));
+            re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U));
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
   }
 }
@@ -2953,11 +2984,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_1_8c(
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     re->coefficients[round] =
         libcrux_ml_kem_vector_portable_ntt_layer_1_step_0d(
-            re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U));
+            re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U));
     zeta_i[0U] = zeta_i[0U] + (size_t)3U;
   }
 }
@@ -2976,7 +3006,11 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
@@ -3222,13 +3256,13 @@ libcrux_ml_kem_polynomial_ntt_multiply_ef_8c(
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_ntt_multiply_0d(
             &self->coefficients[i0], &rhs->coefficients[i0],
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0),
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                               (size_t)1U),
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                               (size_t)2U),
-            libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 +
-                                               (size_t)3U));
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0),
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                           (size_t)1U),
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                           (size_t)2U),
+            libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 +
+                                           (size_t)3U));
     out.coefficients[i0] = uu____0;
   }
   return out;
@@ -3251,7 +3285,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_1b(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U, self->coefficients,
+                   (size_t)16U,
+                   /* The semicolon and parentheses at the end of loop are a
+                      workaround for the following bug
+                      https://github.com/hacspec/hax/issues/720 */
+                   self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -3277,11 +3315,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_8c(
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     re->coefficients[round] =
         libcrux_ml_kem_vector_portable_inv_ntt_layer_1_step_0d(
-            re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U));
+            re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U));
     zeta_i[0U] = zeta_i[0U] - (size_t)3U;
   }
 }
@@ -3300,9 +3337,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_8c(
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     re->coefficients[round] =
         libcrux_ml_kem_vector_portable_inv_ntt_layer_2_step_0d(
-            re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]),
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U));
+            re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]),
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U));
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
   }
 }
@@ -3322,7 +3358,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_8c(
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_inv_ntt_layer_3_step_0d(
             re->coefficients[round],
-            libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+            libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
     re->coefficients[round] = uu____0;
   }
 }
@@ -3360,7 +3396,13 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
+  for (size_t i0 = (size_t)0U;
+       i0 < (size_t)128U >>
+       (uint32_t) /* The semicolon and parentheses at the end of loop are a
+                     workaround for the following bug
+                     https://github.com/hacspec/hax/issues/720 */
+       layer;
+       i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -3373,7 +3415,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_8c(
       libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 =
           libcrux_ml_kem_invert_ntt_inv_ntt_layer_int_vec_step_reduce_8c(
               re->coefficients[j], re->coefficients[j + step_vec],
-              libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));
+              libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));
       libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst;
       libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd;
       re->coefficients[j] = x;
@@ -3391,7 +3433,10 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_1b(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
+      /* We only ever call this function after matrix/vector multiplication */
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
+
+      / (size_t)2U;
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_8c(&zeta_i, re, (size_t)1U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_8c(&zeta_i, re, (size_t)2U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_8c(&zeta_i, re, (size_t)3U);
@@ -3595,11 +3640,16 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_42(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *secret_key,
     uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(ciphertext, u_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(/* u :=
+                                                             Decompress_q(Decode_{d_u}(c),
+                                                             d_u) */
+                                                          ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_89(
-          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
-                                          (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from(
+              (size_t)1088U,
+              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
+              ciphertext, (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       libcrux_ml_kem_matrix_compute_message_1b(&v, secret_key->secret_as_ntt,
                                                u_as_ntt);
@@ -3621,7 +3671,8 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_42(
     Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(secret_key, secret_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(
+      /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U];
   memcpy(
@@ -4156,6 +4207,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_2b(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_89(
       copy_of_randomness0, sampled_coefficients, out);
+  /* Requiring more than 5 blocks to sample a ring element should be very
+   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
+   * failing here, we squeeze more blocks out of the state until we have enough.
+   */
   while (true) {
     if (done) {
       break;
@@ -4222,7 +4277,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_2b(
          i++) {
       size_t j = i;
       libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-      if (transpose) {
+      if (/* A[i][j] = A_transpose[j][i] */ transpose) {
         A_transpose[j][i1] = sample;
       } else {
         A_transpose[i1][j] = sample;
@@ -4244,12 +4299,15 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_3f(
     Eurydice_slice public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0
         *unpacked_public_key) {
-  Eurydice_slice uu____0 =
-      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
+      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_1b(
       uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
+                                      do for j from 0 to k − 1 do AˆT[i][j] :=
+                                      Parse(XOF(ρ, i, j)) end for end for */
+                                   public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -4497,7 +4555,12 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U; i < step; i++) {
+  for (size_t i = (size_t)0U;
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       step;
+       i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector t =
         libcrux_ml_kem_vector_portable_multiply_by_constant_0d(
@@ -4519,7 +4582,10 @@ with const generics
 static KRML_MUSTINLINE void
 libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
-  libcrux_ml_kem_ntt_ntt_at_layer_7_8c(re);
+  libcrux_ml_kem_ntt_ntt_at_layer_7_8c(/* Due to the small coefficient bound, we
+                                          can skip the first round of Montgomery
+                                          reductions. */
+                                       re);
   size_t zeta_i = (size_t)1U;
   libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U,
                                             (size_t)11207U);
@@ -4726,7 +4792,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
         coefficient_normal_form =
@@ -4858,8 +4928,28 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_8c(
             libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
                 result.coefficients[i0], (int16_t)1441);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp =
-        libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0],
-                                              &message->coefficients[i0]);
+        libcrux_ml_kem_vector_portable_add_0d(
+            self->coefficients
+                [/* FIXME: Eurydice crashes with: Warning 11: in
+                    top-level declaration
+                    libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
+                    this expression is not Low*; the enclosing
+                    function cannot be translated into C*: let
+                    mutable ret(Mark.Present,(Mark.AtMost 2), ):
+                    int16_t[16size_t] = $any in
+                    libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
+                    ((@9:
+                    libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
+                    &(((@8:
+                    libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
+                    @0; @0 Warning 11 is fatal, exiting. On the
+                    following code: ```rust result.coefficients[i]
+                    = Vector::barrett_reduce(Vector::add(
+                    coefficient_normal_form,
+                    &Vector::add(self.coefficients[i],
+                    &message.coefficients[i]), )); ``` */
+                 i0],
+            &message->coefficients[i0]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 =
         libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
@@ -5116,7 +5206,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient =
         libcrux_ml_kem_vector_portable_compress_0d_d1(
@@ -5176,7 +5270,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients =
         libcrux_ml_kem_vector_portable_compress_0d_f4(
@@ -5268,7 +5366,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key,
     uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
+                                               CBD{η1}(PRF(r, N)) N := N + 1 end
+                                               for rˆ := NTT(r) */
+                                            randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -5281,6 +5382,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
+  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_23 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_3b(
       copy_of_prf_input, domain_separator0);
@@ -5289,7 +5391,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = domain_separator;
+  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
   uint8_t prf_output[128U];
   libcrux_ml_kem_hash_functions_portable_PRF_f1_410(
       Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output);
@@ -5297,10 +5399,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
       libcrux_ml_kem_sampling_sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U];
-  libcrux_ml_kem_matrix_compute_vector_u_1b(public_key->A, r_as_ntt, error_1,
+  libcrux_ml_kem_matrix_compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */
+                                            public_key->A, r_as_ntt, error_1,
                                             u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
+  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       libcrux_ml_kem_serialize_deserialize_then_decompress_message_8c(
@@ -5310,12 +5414,14 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
           public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U];
+  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   libcrux_ml_kem_ind_cpa_compress_then_serialize_u_43(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
+  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_6c(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -5456,17 +5562,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_62(
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0,
                               uint8_t),
       ciphertext, implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  libcrux_ml_kem_variant_kdf_d8_d6(shared_secret0, ciphertext, shared_secret1);
   uint8_t shared_secret[32U];
+  libcrux_ml_kem_variant_kdf_d8_d6(shared_secret0, ciphertext, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_80(ciphertext),
       Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -5741,12 +5847,20 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
+       i <
+       /* The semicolon and parentheses at the end of loop are a workaround for
+          the following bug https://github.com/hacspec/hax/issues/720 */
+       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
+       i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
         coefficient_normal_form =
             libcrux_ml_kem_vector_traits_to_standard_domain_8c(
-                self->coefficients[j]);
+                self->coefficients[/* The coefficients are of the form aR^{-1}
+                                      mod q, which means calling
+                                      to_montgomery_domain() on them should
+                                      return a mod q. */
+                                   j]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
             libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form,
@@ -5778,6 +5892,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_1b(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
+    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
+     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 =
         libcrux_ml_kem_polynomial_ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
@@ -5857,7 +5973,9 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
+                                                  := G(d || K) for ML-KEM */
+                                               key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -5890,8 +6008,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   libcrux_ml_kem_matrix_compute_As_plus_e_1b(
-      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
-      error_as_ntt);
+      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
+      private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6023,12 +6141,18 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_43(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key) {
   uint8_t public_key_serialized[1184U];
-  libcrux_ml_kem_ind_cpa_serialize_public_key_6c(
-      public_key->t_as_ntt,
-      Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
-      public_key_serialized);
+  libcrux_ml_kem_ind_cpa_serialize_public_key_6c(/* pk := (Encode_12(tˆ
+                                                    mod^{+}q) || ρ) */
+                                                 public_key->t_as_ntt,
+                                                 Eurydice_array_to_slice(
+                                                     (size_t)32U,
+                                                     public_key->seed_for_A,
+                                                     uint8_t),
+                                                 public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  libcrux_ml_kem_ind_cpa_serialize_secret_key_89(private_key->secret_as_ntt,
+  libcrux_ml_kem_ind_cpa_serialize_secret_key_89(/* sk := Encode_12(sˆ mod^{+}q)
+                                                  */
+                                                 private_key->secret_as_ntt,
                                                  secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
@@ -6340,17 +6464,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_620(
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0,
                               uint8_t),
       ciphertext, implicit_rejection_shared_secret);
-  uint8_t shared_secret1[32U];
-  libcrux_ml_kem_variant_kdf_33_d6(shared_secret0, ciphertext, shared_secret1);
   uint8_t shared_secret[32U];
+  libcrux_ml_kem_variant_kdf_33_d6(shared_secret0, ciphertext, shared_secret);
+  uint8_t ret0[32U];
   libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time(
       libcrux_ml_kem_types_as_ref_43_80(ciphertext),
       Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t),
-      Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t),
+      Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t),
       Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret,
                               uint8_t),
-      shared_secret);
-  memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t));
+      ret0);
+  memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t));
 }
 
 /**
@@ -6609,7 +6733,9 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
+                                                  := G(d || K) for ML-KEM */
+                                               key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -6642,8 +6768,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   libcrux_ml_kem_matrix_compute_As_plus_e_1b(
-      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
-      error_as_ntt);
+      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
+      private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6781,7 +6907,10 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
   libcrux_ml_kem_hash_functions_portable_H_f1_e0(
-      Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U,
+      Eurydice_array_to_subslice2(/* Eurydice can't access values directly on
+                                     the types. We need to go to the `value`
+                                     directly. */
+                                  private_key->value, (size_t)384U * (size_t)3U,
                                   (size_t)768U * (size_t)3U + (size_t)32U,
                                   uint8_t),
       t);
@@ -7599,7 +7728,10 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_df(
   Eurydice_slice ind_cpa_public_key_hash = uu____0.thd;
   Eurydice_slice implicit_rejection_value = uu____0.f3;
   Eurydice_slice uu____1 = Eurydice_array_to_slice(
-      (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
+      (size_t)3U,
+      /* XXX: We need to copy_from_slice here because karamel can't handle the
+         assignment cf. https://github.com/FStarLang/karamel/pull/491 */
+      key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
       libcrux_ml_kem_polynomial_PolynomialRingElement_1d);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d ret[3U];
   libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(ind_cpa_secret_key, ret);
diff --git a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h
index a77bfdbea..5955882fa 100644
--- a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h
+++ b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
  */
 
 #ifndef __libcrux_sha3_avx2_H
@@ -104,7 +104,9 @@ libcrux_sha3_simd_avx2_and_not_xor_ef(__m256i a, __m256i b, __m256i c) {
 KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i libcrux_sha3_simd_avx2__veorq_n_u64(__m256i a,
                                                                    uint64_t c) {
-  __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x((int64_t)c);
+  __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x(
+      (int64_t) /* Casting here is required, doesn't change the value. */
+      c);
   return libcrux_intrinsics_avx2_mm256_xor_si256(a, c0);
 }
 
@@ -1699,7 +1701,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_5b(
         __m256i);
     __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256(
         (int32_t)32,
-        s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
+        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
         s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
@@ -2034,7 +2036,15 @@ static KRML_MUSTINLINE void libcrux_sha3_avx2_x4_shake256(
     Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2,
     Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1,
     Eurydice_slice out2, Eurydice_slice out3) {
-  Eurydice_slice buf0[4U] = {input0, input1, input2, input3};
+  Eurydice_slice buf0[4U] = {
+      /* XXX: These functions could alternatively implement the same with the
+         portable implementation #[cfg(feature = "simd128")] { keccakx2::<136,
+         0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136,
+         0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136,
+         0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]);
+         keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136,
+         0x1fu8>([input3], [out3]); } */
+      input0, input1, input2, input3};
   Eurydice_slice buf[4U] = {out0, out1, out2, out3};
   libcrux_sha3_generic_keccak_keccak_fb(buf0, buf);
 }
@@ -2274,7 +2284,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_3a(
         __m256i);
     __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256(
         (int32_t)32,
-        s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
+        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
         s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
diff --git a/libcrux-ml-kem/cg/libcrux_sha3_portable.h b/libcrux-ml-kem/cg/libcrux_sha3_portable.h
index d85d8e543..211cf1919 100644
--- a/libcrux-ml-kem/cg/libcrux_sha3_portable.h
+++ b/libcrux-ml-kem/cg/libcrux_sha3_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
- * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
- * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
- * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty
- * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a
+ * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
+ * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
+ * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
+ * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
+ * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
  */
 
 #ifndef __libcrux_sha3_portable_H
@@ -1654,6 +1654,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out);
 }
@@ -2012,6 +2013,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out);
 }
@@ -2140,6 +2142,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out);
 }
@@ -2746,6 +2749,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out);
 }
@@ -3104,6 +3108,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out);
 }
@@ -3399,6 +3404,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
+  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out);
 }
@@ -3496,6 +3502,7 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0,
                                                           Eurydice_slice input1,
                                                           Eurydice_slice out0,
                                                           Eurydice_slice out1) {
+  /* TODO: make argument ordering consistent */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3510,6 +3517,9 @@ typedef struct libcrux_sha3_neon_x2_incremental_KeccakState_s {
 */
 static KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState
 libcrux_sha3_neon_x2_incremental_init(void) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let s0 = KeccakState::new(); let s1 =
+   * KeccakState::new(); [s0, s1] } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3522,6 +3532,10 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
+   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3535,6 +3549,10 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_squeeze_first_three_blocks(&mut s0, out0);
+   * shake128_squeeze_first_three_blocks(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3548,6 +3566,10 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_squeeze_next_block(&mut s0, out0);
+   * shake128_squeeze_next_block(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3572,6 +3594,10 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake256_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
+  /* XXX: These functions could alternatively implement the same with the
+   * portable implementation { let [mut s0, mut s1] = s;
+   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
+   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3733,8 +3759,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (self->buf_len + input_len >= (size_t)136U) {
-      consumed = (size_t)136U - self->buf_len;
+    if (
+        /* There's something buffered internally to consume. */ self->buf_len +
+            input_len >=
+        (size_t)136U) {
+      consumed = (size_t)136U - /* We have enough data when combining the
+                                   internal buffer and the input. */
+                 self->buf_len;
       for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
         size_t i0 = i;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -3840,7 +3871,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs);
-  if (input_remainder_len > (size_t)0U) {
+  if (
+      /* ... buffer the rest if there's not enough input (left). */
+      input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
       size_t i0 = i;
@@ -4187,8 +4220,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (self->buf_len + input_len >= (size_t)168U) {
-      consumed = (size_t)168U - self->buf_len;
+    if (
+        /* There's something buffered internally to consume. */ self->buf_len +
+            input_len >=
+        (size_t)168U) {
+      consumed = (size_t)168U - /* We have enough data when combining the
+                                   internal buffer and the input. */
+                 self->buf_len;
       for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
         size_t i0 = i;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -4294,7 +4332,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs);
-  if (input_remainder_len > (size_t)0U) {
+  if (
+      /* ... buffer the rest if there's not enough input (left). */
+      input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
       size_t i0 = i;
@@ -4684,7 +4724,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   size_t blocks = out_len / (size_t)136U;
   size_t last = out_len - out_len % (size_t)136U;
   size_t mid;
-  if ((size_t)136U >= out_len) {
+  if ((size_t)136U >=
+      /* Squeeze out one to start with. XXX: Eurydice does not extract
+         `core::cmp::min`, so we do this instead. (cf.
+         https://github.com/AeneasVerif/eurydice/issues/49) */
+      out_len
+
+  ) {
     mid = out_len;
   } else {
     mid = (size_t)136U;
@@ -4698,8 +4744,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
-                                             .end = blocks}),
+          (CLITERAL(core_ops_range_Range_08){
+              .start = (size_t)1U,
+              .end = /* If we got asked for more than one block, squeeze out
+                        more. */
+              blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -4708,7 +4757,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
+                                                            always have full
+                                                            blocks to write out.
+                                                          */
+                                                         out_rest,
                                                          (size_t)136U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));
@@ -4803,7 +4856,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   size_t blocks = out_len / (size_t)168U;
   size_t last = out_len - out_len % (size_t)168U;
   size_t mid;
-  if ((size_t)168U >= out_len) {
+  if ((size_t)168U >=
+      /* Squeeze out one to start with. XXX: Eurydice does not extract
+         `core::cmp::min`, so we do this instead. (cf.
+         https://github.com/AeneasVerif/eurydice/issues/49) */
+      out_len
+
+  ) {
     mid = out_len;
   } else {
     mid = (size_t)168U;
@@ -4817,8 +4876,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
-                                             .end = blocks}),
+          (CLITERAL(core_ops_range_Range_08){
+              .start = (size_t)1U,
+              .end = /* If we got asked for more than one block, squeeze out
+                        more. */
+              blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -4827,7 +4889,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
+                                                            always have full
+                                                            blocks to write out.
+                                                          */
+                                                         out_rest,
                                                          (size_t)168U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));

From 98f9a92172d7a531ad6fa41fd018056fdbd60851 Mon Sep 17 00:00:00 2001
From: karthikbhargavan <karthik.bhargavan@gmail.com>
Date: Tue, 3 Dec 2024 19:17:37 +0000
Subject: [PATCH 5/7] c code

---
 libcrux-ml-kem/c/code_gen.txt                 |   10 +-
 libcrux-ml-kem/c/internal/libcrux_core.h      |   10 +-
 .../c/internal/libcrux_mlkem_avx2.h           |   10 +-
 .../c/internal/libcrux_mlkem_portable.h       |   10 +-
 libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h |   10 +-
 .../c/internal/libcrux_sha3_internal.h        |   78 +-
 libcrux-ml-kem/c/libcrux_core.c               |   10 +-
 libcrux-ml-kem/c/libcrux_core.h               |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024.h          |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c     |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h     |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_portable.c |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem1024_portable.h |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512.h           |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512_avx2.c      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512_avx2.h      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512_portable.c  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem512_portable.h  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768.h           |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_avx2.c      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_avx2.h      |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_portable.c  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem768_portable.h  |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem_avx2.c         | 1303 ++++------------
 libcrux-ml-kem/c/libcrux_mlkem_avx2.h         |   10 +-
 libcrux-ml-kem/c/libcrux_mlkem_portable.c     |  373 ++---
 libcrux-ml-kem/c/libcrux_mlkem_portable.h     |   10 +-
 libcrux-ml-kem/c/libcrux_sha3.h               |   10 +-
 libcrux-ml-kem/c/libcrux_sha3_avx2.c          |   52 +-
 libcrux-ml-kem/c/libcrux_sha3_avx2.h          |   10 +-
 libcrux-ml-kem/c/libcrux_sha3_internal.h      |   16 +-
 libcrux-ml-kem/c/libcrux_sha3_neon.c          |   30 +-
 libcrux-ml-kem/c/libcrux_sha3_neon.h          |   10 +-
 libcrux-ml-kem/cg/code_gen.txt                |   10 +-
 libcrux-ml-kem/cg/libcrux_core.h              |   10 +-
 libcrux-ml-kem/cg/libcrux_ct_ops.h            |   10 +-
 libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h     | 1309 +++--------------
 libcrux-ml-kem/cg/libcrux_mlkem768_portable.h |  230 +--
 libcrux-ml-kem/cg/libcrux_sha3_avx2.h         |   28 +-
 libcrux-ml-kem/cg/libcrux_sha3_portable.h     |  104 +-
 40 files changed, 826 insertions(+), 2997 deletions(-)

diff --git a/libcrux-ml-kem/c/code_gen.txt b/libcrux-ml-kem/c/code_gen.txt
index 8606206e0..54242b657 100644
--- a/libcrux-ml-kem/c/code_gen.txt
+++ b/libcrux-ml-kem/c/code_gen.txt
@@ -1,6 +1,6 @@
 This code was generated with the following revisions:
-Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
-Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
-Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
-F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
-Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+F*: 5643e656b989aca7629723653a2570c7df6252b9
+Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
diff --git a/libcrux-ml-kem/c/internal/libcrux_core.h b/libcrux-ml-kem/c/internal/libcrux_core.h
index fe0dc7d7d..fe89acd19 100644
--- a/libcrux-ml-kem/c/internal/libcrux_core.h
+++ b/libcrux-ml-kem/c/internal/libcrux_core.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __internal_libcrux_core_H
diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h
index 48345a968..466ef3ba0 100644
--- a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h
+++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __internal_libcrux_mlkem_avx2_H
diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h
index e89d87311..f108fb1a3 100644
--- a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h
+++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __internal_libcrux_mlkem_portable_H
diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h
index 78fe0a95b..67b2d4675 100644
--- a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h
+++ b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __internal_libcrux_sha3_avx2_H
diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h
index 92381f50f..342c481f4 100644
--- a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h
+++ b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __internal_libcrux_sha3_internal_H
@@ -273,13 +273,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (
-        /* There's something buffered internally to consume. */ self->buf_len +
-            input_len >=
-        (size_t)136U) {
-      consumed = (size_t)136U - /* We have enough data when combining the
-                                   internal buffer and the input. */
-                 self->buf_len;
+    if (self->buf_len + input_len >= (size_t)136U) {
+      consumed = (size_t)136U - self->buf_len;
       {
         size_t i = (size_t)0U;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -385,9 +380,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs);
-  if (
-      /* ... buffer the rest if there's not enough input (left). */
-      input_remainder_len > (size_t)0U) {
+  if (input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     {
       size_t i = (size_t)0U;
@@ -734,13 +727,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (
-        /* There's something buffered internally to consume. */ self->buf_len +
-            input_len >=
-        (size_t)168U) {
-      consumed = (size_t)168U - /* We have enough data when combining the
-                                   internal buffer and the input. */
-                 self->buf_len;
+    if (self->buf_len + input_len >= (size_t)168U) {
+      consumed = (size_t)168U - self->buf_len;
       {
         size_t i = (size_t)0U;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -846,9 +834,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs);
-  if (
-      /* ... buffer the rest if there's not enough input (left). */
-      input_remainder_len > (size_t)0U) {
+  if (input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     {
       size_t i = (size_t)0U;
@@ -1238,13 +1224,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   size_t blocks = out_len / (size_t)136U;
   size_t last = out_len - out_len % (size_t)136U;
   size_t mid;
-  if ((size_t)136U >=
-      /* Squeeze out one to start with. XXX: Eurydice does not extract
-         `core::cmp::min`, so we do this instead. (cf.
-         https://github.com/AeneasVerif/eurydice/issues/49) */
-      out_len
-
-  ) {
+  if ((size_t)136U >= out_len) {
     mid = out_len;
   } else {
     mid = (size_t)136U;
@@ -1258,11 +1238,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){
-              .start = (size_t)1U,
-              .end = /* If we got asked for more than one block, squeeze out
-                        more. */
-              blocks}),
+          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
+                                             .end = blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -1271,11 +1248,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
-                                                            always have full
-                                                            blocks to write out.
-                                                          */
-                                                         out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
                                                          (size_t)136U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));
@@ -1370,13 +1343,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   size_t blocks = out_len / (size_t)168U;
   size_t last = out_len - out_len % (size_t)168U;
   size_t mid;
-  if ((size_t)168U >=
-      /* Squeeze out one to start with. XXX: Eurydice does not extract
-         `core::cmp::min`, so we do this instead. (cf.
-         https://github.com/AeneasVerif/eurydice/issues/49) */
-      out_len
-
-  ) {
+  if ((size_t)168U >= out_len) {
     mid = out_len;
   } else {
     mid = (size_t)168U;
@@ -1390,11 +1357,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){
-              .start = (size_t)1U,
-              .end = /* If we got asked for more than one block, squeeze out
-                        more. */
-              blocks}),
+          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
+                                             .end = blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -1403,11 +1367,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
-                                                            always have full
-                                                            blocks to write out.
-                                                          */
-                                                         out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
                                                          (size_t)168U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));
diff --git a/libcrux-ml-kem/c/libcrux_core.c b/libcrux-ml-kem/c/libcrux_core.c
index de354115a..e69d41843 100644
--- a/libcrux-ml-kem/c/libcrux_core.c
+++ b/libcrux-ml-kem/c/libcrux_core.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "internal/libcrux_core.h"
diff --git a/libcrux-ml-kem/c/libcrux_core.h b/libcrux-ml-kem/c/libcrux_core.h
index 55c5c5d8e..9097eceda 100644
--- a/libcrux-ml-kem/c/libcrux_core.h
+++ b/libcrux-ml-kem/c/libcrux_core.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_core_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024.h b/libcrux-ml-kem/c/libcrux_mlkem1024.h
index 37334a9b1..041b2ec09 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem1024_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c
index 778d6fbf3..5fec937b0 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "libcrux_mlkem1024_avx2.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h
index 854751c45..96971f755 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem1024_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c
index e463cb267..c63594eaa 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "libcrux_mlkem1024_portable.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h
index 430c904d1..f951149be 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem1024_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512.h b/libcrux-ml-kem/c/libcrux_mlkem512.h
index fb7755a5a..0e850ae5d 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem512.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem512_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c
index 3e9fbd0cc..7971b5c4f 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "libcrux_mlkem512_avx2.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h
index 79012290d..3c4030f73 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem512_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c
index 8639c4603..b8f6fd756 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "libcrux_mlkem512_portable.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h
index faea31c8a..7766250f2 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem512_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768.h b/libcrux-ml-kem/c/libcrux_mlkem768.h
index 474b96082..f2c7db21a 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem768.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem768_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c
index a7a0f7e7d..d30955e8a 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "libcrux_mlkem768_avx2.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h
index 35608499b..ea29365da 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem768_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c
index 2d21b9d89..1cdebda61 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "libcrux_mlkem768_portable.h"
diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h
index 514894426..6c512c865 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem768_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c
index 64e5d2462..7cd2d548f 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "internal/libcrux_mlkem_avx2.h"
@@ -141,16 +141,11 @@ KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) {
   __m256i field_modulus =
       mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
-  __m256i v_minus_field_modulus =
-      mm256_sub_epi16(/* Compute v_i - Q and crate a mask from the sign bit of
-                         each of these quantities. */
-                      vector,
-                      field_modulus);
+  __m256i v_minus_field_modulus = mm256_sub_epi16(vector, field_modulus);
   __m256i sign_mask =
       mm256_srai_epi16((int32_t)15, v_minus_field_modulus, __m256i);
-  __m256i conditional_add_field_modulus = mm256_and_si256(
-      /* If v_i - Q < 0 then add back Q to (v_i - Q). */ sign_mask,
-      field_modulus);
+  __m256i conditional_add_field_modulus =
+      mm256_and_si256(sign_mask, field_modulus);
   return mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus);
 }
 
@@ -455,7 +450,6 @@ libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(__m256i vec) {
 KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
     __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2,
     int16_t zeta3) {
-  /* Compute the first term of the product */
   __m256i shuffle_with = mm256_set_epi8(
       (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6,
       (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8,
@@ -463,8 +457,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2,
       (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4,
       (int8_t)1, (int8_t)0);
-  __m256i lhs_shuffled =
-      mm256_shuffle_epi8(/* Prepare the left hand side */ lhs, shuffle_with);
+  __m256i lhs_shuffled = mm256_shuffle_epi8(lhs, shuffle_with);
   __m256i lhs_shuffled0 =
       mm256_permute4x64_epi64((int32_t)216, lhs_shuffled, __m256i);
   __m128i lhs_evens = mm256_castsi256_si128(lhs_shuffled0);
@@ -472,8 +465,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i lhs_odds =
       mm256_extracti128_si256((int32_t)1, lhs_shuffled0, __m128i);
   __m256i lhs_odds0 = mm256_cvtepi16_epi32(lhs_odds);
-  __m256i rhs_shuffled =
-      mm256_shuffle_epi8(/* Prepare the right hand side */ rhs, shuffle_with);
+  __m256i rhs_shuffled = mm256_shuffle_epi8(rhs, shuffle_with);
   __m256i rhs_shuffled0 =
       mm256_permute4x64_epi64((int32_t)216, rhs_shuffled, __m256i);
   __m128i rhs_evens = mm256_castsi256_si128(rhs_shuffled0);
@@ -481,8 +473,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i rhs_odds =
       mm256_extracti128_si256((int32_t)1, rhs_shuffled0, __m128i);
   __m256i rhs_odds0 = mm256_cvtepi16_epi32(rhs_odds);
-  __m256i left =
-      mm256_mullo_epi32(/* Start operating with them */ lhs_evens0, rhs_evens0);
+  __m256i left = mm256_mullo_epi32(lhs_evens0, rhs_evens0);
   __m256i right = mm256_mullo_epi32(lhs_odds0, rhs_odds0);
   __m256i right0 =
       libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(right);
@@ -495,7 +486,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(
           products_left);
   __m256i rhs_adjacent_swapped = mm256_shuffle_epi8(
-      /* Compute the second term of the product */ rhs,
+      rhs,
       mm256_set_epi8((int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9,
                      (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4,
                      (int8_t)7, (int8_t)6, (int8_t)1, (int8_t)0, (int8_t)3,
@@ -509,9 +500,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
           products_right);
   __m256i products_right1 =
       mm256_slli_epi32((int32_t)16, products_right0, __m256i);
-  return mm256_blend_epi16((int32_t)170,
-                           /* Combine them into one vector */ products_left0,
-                           products_right1, __m256i);
+  return mm256_blend_epi16((int32_t)170, products_left0, products_right1,
+                           __m256i);
 }
 
 /**
@@ -527,44 +517,11 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09(
 
 KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1(
     __m256i vector, uint8_t ret[2U]) {
-  __m256i lsb_to_msb = mm256_slli_epi16(
-      (int32_t)15,
-      /* Suppose |vector| is laid out as follows (superscript number indicates
-         the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀
-         0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least
-         significant bit in each lane, move it to the most significant position
-         to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵
-         d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵
-         n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */
-      vector, __m256i);
-  __m128i low_msbs = mm256_castsi256_si128(
-      /* Get the first 8 16-bit elements ... */ lsb_to_msb);
-  __m128i high_msbs = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i);
-  __m128i msbs =
-      mm_packs_epi16(/* ... and then pack them into 8-bit values using signed
-                        saturation. This function packs all the |low_msbs|, and
-                        then the high ones. low_msbs = a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ |
-                        e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ j₀0¹⁵ k₀0¹⁵
-                        l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ We shifted by 15 above
-                        to take advantage of the signed saturation performed by
-                        mm_packs_epi16: - if the sign bit of the 16-bit element
-                        being packed is 1, the corresponding 8-bit element in
-                        |msbs| will be 0xFF. - if the sign bit of the 16-bit
-                        element being packed is 0, the corresponding 8-bit
-                        element in |msbs| will be 0. Thus, if, for example, a₀ =
-                        1, e₀ = 1, and p₀ = 1, and every other bit is 0, after
-                        packing into 8 bit value, |msbs| will look like: 0xFF
-                        0x00 0x00 0x00 | 0xFF 0x00 0x00 0x00 | 0x00 0x00 0x00
-                        0x00 | 0x00 0x00 0x00 0xFF */
-                     low_msbs,
-                     high_msbs);
-  int32_t bits_packed =
-      mm_movemask_epi8(/* Now that every element is either 0xFF or 0x00, we just
-                          extract the most significant bit from each element and
-                          collate them into two bytes. */
-                       msbs);
+  __m256i lsb_to_msb = mm256_slli_epi16((int32_t)15, vector, __m256i);
+  __m128i low_msbs = mm256_castsi256_si128(lsb_to_msb);
+  __m128i high_msbs = mm256_extracti128_si256((int32_t)1, lsb_to_msb, __m128i);
+  __m128i msbs = mm_packs_epi16(low_msbs, high_msbs);
+  int32_t bits_packed = mm_movemask_epi8(msbs);
   uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)};
   memcpy(ret, result, (size_t)2U * sizeof(uint8_t));
 }
@@ -582,39 +539,16 @@ KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s(
     int16_t a, int16_t b) {
   __m256i coefficients =
-      mm256_set_epi16(/* We need to take each bit from the 2 bytes of input and
-                         put them into their own 16-bit lane. Ideally, we'd load
-                         the two bytes into the vector, duplicate them, and
-                         right-shift the 0th element by 0 bits, the first
-                         element by 1 bit, the second by 2 bits and so on before
-                         AND-ing with 0x1 to leave only the least signifinicant
-                         bit. But since |_mm256_srlv_epi16| does not exist, so
-                         we have to resort to a workaround. Rather than shifting
-                         each element by a different amount, we'll multiply each
-                         element by a value such that the bit we're interested
-                         in becomes the most significant bit. The coefficients
-                         are loaded as follows: */
-                      b,
-                      b, b, b, b, b, b, b, a, a, a, a, a, a, a, a);
-  __m256i coefficients_in_msb =
-      mm256_mullo_epi16(/* And this vector, when multiplied with the previous
-                           one, ensures that the bit we'd like to keep in each
-                           lane becomes the most significant bit upon
-                           multiplication. */
-                        coefficients,
-                        mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U,
-                                        (int16_t)1 << 10U, (int16_t)1 << 11U,
-                                        (int16_t)1 << 12U, (int16_t)1 << 13U,
-                                        (int16_t)1 << 14U, (int16_t)-32768,
-                                        (int16_t)1 << 8U, (int16_t)1 << 9U,
-                                        (int16_t)1 << 10U, (int16_t)1 << 11U,
-                                        (int16_t)1 << 12U, (int16_t)1 << 13U,
-                                        (int16_t)1 << 14U, (int16_t)-32768));
-  return mm256_srli_epi16(
-      (int32_t)15,
-      /* Now that they're all in the most significant bit position, shift them
-         down to the least significant bit. */
-      coefficients_in_msb, __m256i);
+      mm256_set_epi16(b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a);
+  __m256i coefficients_in_msb = mm256_mullo_epi16(
+      coefficients,
+      mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U,
+                      (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U,
+                      (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U,
+                      (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U,
+                      (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U,
+                      (int16_t)-32768));
+  return mm256_srli_epi16((int32_t)15, coefficients_in_msb, __m256i);
 }
 
 KRML_MUSTINLINE __m256i
@@ -627,23 +561,7 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s(
 KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s(
-      Eurydice_slice_index(
-          bytes,
-          /* We need to take each bit from the 2 bytes of input and put them
-             into their own 16-bit lane. Ideally, we'd load the two bytes into
-             the vector, duplicate them, and right-shift the 0th element by 0
-             bits, the first element by 1 bit, the second by 2 bits and so on
-             before AND-ing with 0x1 to leave only the least signifinicant bit.
-             But since |_mm256_srlv_epi16| does not exist, so we have to resort
-             to a workaround. Rather than shifting each element by a different
-             amount, we'll multiply each element by a value such that the bit
-             we're interested in becomes the most significant bit. The
-             coefficients are loaded as follows: And this vector, when
-             multiplied with the previous one, ensures that the bit we'd like to
-             keep in each lane becomes the most significant bit upon
-             multiplication. Now that they're all in the most significant bit
-             position, shift them down to the least significant bit. */
-          (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *));
 }
 
@@ -676,47 +594,23 @@ KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4(
     __m256i vector, uint8_t ret[8U]) {
   uint8_t serialized[16U] = {0U};
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
-          4U,
-          /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D |
-             0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be
-             laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA
-             0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */
-          vector);
-  __m256i adjacent_8_combined =
-      mm256_shuffle_epi8(/* Recall that |adjacent_2_combined| goes as follows:
-                            0x00_00_00_BA 0x00_00_00_DC | 0x00_00_00_FE
-                            0x00_00_00_HG | ... Out of this, we only need the
-                            first byte, the 4th byte, the 8th byte and so on
-                            from the bottom and the top 128 bits. */
-                         adjacent_2_combined,
-                         mm256_set_epi8(
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0,
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0));
-  __m256i combined =
-      mm256_permutevar8x32_epi32(/* |adjacent_8_combined| looks like this: 0:
-                                    0xHG_FE_DC_BA 1: 0x00_00_00_00 | 2:
-                                    0x00_00_00_00 3: 0x00_00_00_00 | 4:
-                                    0xPO_NM_LK_JI .... We put the element at 4
-                                    after the element at 0 ... */
-                                 adjacent_8_combined,
-                                 mm256_set_epi32((int32_t)0, (int32_t)0,
-                                                 (int32_t)0, (int32_t)0,
-                                                 (int32_t)0, (int32_t)0,
-                                                 (int32_t)4, (int32_t)0));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector);
+  __m256i adjacent_8_combined = mm256_shuffle_epi8(
+      adjacent_2_combined,
+      mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                     (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                     (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4,
+                     (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                     (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                     (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8,
+                     (int8_t)4, (int8_t)0));
+  __m256i combined = mm256_permutevar8x32_epi32(
+      adjacent_8_combined,
+      mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0,
+                      (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0));
   __m128i combined0 = mm256_castsi256_si128(combined);
   mm_storeu_bytes_si128(
-      Eurydice_array_to_slice(
-          (size_t)16U,
-          /* ... so that we can read them out in one go. */ serialized,
-          uint8_t),
-      combined0);
+      Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0);
   uint8_t ret0[8U];
   core_result_Result_15 dst;
   Eurydice_slice_to_array2(
@@ -740,23 +634,8 @@ KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
     int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5,
     int16_t b6, int16_t b7) {
-  __m256i coefficients =
-      mm256_set_epi16(/* Every 4 bits from each byte of input should be put into
-                         its own 16-bit lane. Since |_mm256_srlv_epi16| does not
-                         exist, we have to resort to a workaround. Rather than
-                         shifting each element by a different amount, we'll
-                         multiply each element by a value such that the bits
-                         we're interested in become the most significant bits
-                         (of an 8-bit value). In this lane, the 4 bits we need
-                         to put are already the most significant bits of
-                         |bytes[7]| (that is, b7). */
-                      b7,
-                      /* In this lane, the 4 bits we need to put are the least
-                         significant bits, so we need to shift the 4
-                         least-significant bits of |b7| to the most significant
-                         bits (of an 8-bit value). */
-                      b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0,
-                      b0);
+  __m256i coefficients = mm256_set_epi16(b7, b7, b6, b6, b5, b5, b4, b4, b3, b3,
+                                         b2, b2, b1, b1, b0, b0);
   __m256i coefficients_in_msb = mm256_mullo_epi16(
       coefficients,
       mm256_set_epi16((int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
@@ -765,12 +644,9 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
                       (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U,
                       (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
                       (int16_t)1 << 4U));
-  __m256i coefficients_in_lsb = mm256_srli_epi16(
-      (int32_t)4,
-      /* Once the 4-bit coefficients are in the most significant positions (of
-         an 8-bit value), shift them all down by 4. */
-      coefficients_in_msb, __m256i);
-  return mm256_and_si256(/* Zero the remaining bits. */ coefficients_in_lsb,
+  __m256i coefficients_in_lsb =
+      mm256_srli_epi16((int32_t)4, coefficients_in_msb, __m256i);
+  return mm256_and_si256(coefficients_in_lsb,
                          mm256_set1_epi16(((int16_t)1 << 4U) - (int16_t)1));
 }
 
@@ -786,23 +662,7 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s(
 KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s(
-      Eurydice_slice_index(
-          bytes,
-          /* Every 4 bits from each byte of input should be put into its own
-             16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to
-             resort to a workaround. Rather than shifting each element by a
-             different amount, we'll multiply each element by a value such that
-             the bits we're interested in become the most significant bits (of
-             an 8-bit value). In this lane, the 4 bits we need to put are
-             already the most significant bits of |bytes[7]| (that is, b7). In
-             this lane, the 4 bits we need to put are the least significant
-             bits, so we need to shift the 4 least-significant bits of |b7| to
-             the most significant bits (of an 8-bit value). These constants are
-             chosen to shift the bits of the values that we loaded into
-             |coefficients|. Once the 4-bit coefficients are in the most
-             significant positions (of an 8-bit value), shift them all down
-             by 4. Zero the remaining bits. */
-          (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *),
@@ -824,78 +684,32 @@ libcrux_ml_kem_vector_avx2_deserialize_4_09(Eurydice_slice bytes) {
 KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5(
     __m256i vector, uint8_t ret[10U]) {
   uint8_t serialized[32U] = {0U};
-  __m256i adjacent_2_combined =
-      mm256_madd_epi16(/* If |vector| is laid out as follows (superscript number
-                          indicates the corresponding bit is duplicated that
-                          many times): 0¹¹a₄a₃a₂a₁a₀ 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀
-                          0¹¹d₄d₃d₂d₁d₀ | ↩ 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀
-                          0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | ↩ |adjacent_2_combined|
-                          will be laid out as a series of 32-bit integers, as
-                          follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
-                          0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
-                          0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... */
-                       vector,
-                       mm256_set_epi16(
-                           (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U,
-                           (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                           (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U,
-                           (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
-                           (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U,
-                           (int16_t)1));
-  __m256i adjacent_4_combined =
-      mm256_sllv_epi32(/* Recall that |adjacent_2_combined| is laid out as
-                          follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
-                          0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
-                          0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... This shift results
-                          in: b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ |
-                          ↩ f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
-                          .... */
-                       adjacent_2_combined,
-                       mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0,
-                                       (int32_t)22, (int32_t)0, (int32_t)22,
-                                       (int32_t)0, (int32_t)22));
-  __m256i adjacent_4_combined0 = mm256_srli_epi64(
-      (int32_t)22,
-      /* |adjacent_4_combined|, when viewed as 64-bit lanes, is:
-         0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩
-         0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift
-         down by 22 bits to remove the least significant 0 bits that aren't part
-         of the bits we need. */
-      adjacent_4_combined, __m256i);
-  __m256i adjacent_8_combined = mm256_shuffle_epi32(
-      (int32_t)8,
-      /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks
-         like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³²
-         2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to
-         read out the bytes in one go, we need to shifts the bits in position 2
-         to position 1 in each 128-bit lane. */
-      adjacent_4_combined0, __m256i);
-  __m256i adjacent_8_combined0 =
-      mm256_sllv_epi32(/* |adjacent_8_combined|, when viewed as a set of 32-bit
-                          values, now looks like:
-                          0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
-                          0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 0³² 0³² |
-                          ↩ Once again, we line these bits up by shifting the up
-                          values at indices 0 and 5 by 12, viewing the resulting
-                          register as a set of 64-bit values, and then shifting
-                          down the 64-bit values by 12 bits. */
-                       adjacent_8_combined,
-                       mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0,
-                                       (int32_t)12, (int32_t)0, (int32_t)0,
-                                       (int32_t)0, (int32_t)12));
+  __m256i adjacent_2_combined = mm256_madd_epi16(
+      vector, mm256_set_epi16(
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1));
+  __m256i adjacent_4_combined = mm256_sllv_epi32(
+      adjacent_2_combined,
+      mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22,
+                      (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22));
+  __m256i adjacent_4_combined0 =
+      mm256_srli_epi64((int32_t)22, adjacent_4_combined, __m256i);
+  __m256i adjacent_8_combined =
+      mm256_shuffle_epi32((int32_t)8, adjacent_4_combined0, __m256i);
+  __m256i adjacent_8_combined0 = mm256_sllv_epi32(
+      adjacent_8_combined,
+      mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12,
+                      (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12));
   __m256i adjacent_8_combined1 =
       mm256_srli_epi64((int32_t)12, adjacent_8_combined0, __m256i);
-  __m128i lower_8 =
-      mm256_castsi256_si128(/* We now have 40 bits starting at position 0 in the
-                               lower 128-bit lane, ... */
-                            adjacent_8_combined1);
+  __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined1);
   mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t),
       lower_8);
-  __m128i upper_8 = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ... and the second 40 bits at position 0 in the upper 128-bit lane */
-      adjacent_8_combined1, __m128i);
+  __m128i upper_8 =
+      mm256_extracti128_si256((int32_t)1, adjacent_8_combined1, __m128i);
   mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t),
       upper_8);
@@ -989,67 +803,25 @@ core_core_arch_x86___m128i_x2
 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
     __m256i vector) {
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
-          10U,
-          /* If |vector| is laid out as follows (superscript number indicates
-             the corresponding bit is duplicated that many times):
-             0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
-             0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩
-             0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
-             0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ...
-             |adjacent_2_combined| will be laid out as a series of 32-bit
-             integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-             0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩
-             0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-             0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */
-          vector);
-  __m256i adjacent_4_combined =
-      mm256_sllv_epi32(/* Shifting up the values at the even indices by 12, we
-                          get: b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-                          0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩
-                          f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-                          0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ ... */
-                       adjacent_2_combined,
-                       mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0,
-                                       (int32_t)12, (int32_t)0, (int32_t)12,
-                                       (int32_t)0, (int32_t)12));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector);
+  __m256i adjacent_4_combined = mm256_sllv_epi32(
+      adjacent_2_combined,
+      mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12,
+                      (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12));
   __m256i adjacent_4_combined0 =
-      mm256_srli_epi64((int32_t)12,
-                       /* Viewing this as a set of 64-bit integers we get:
-                          0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-                          | ↩
-                          0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-                          | ↩ ... Shifting down by 12 gives us:
-                          0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-                          | ↩
-                          0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-                          | ↩ ... */
-                       adjacent_4_combined, __m256i);
-  __m256i adjacent_8_combined =
-      mm256_shuffle_epi8(/* |adjacent_4_combined|, when the bottom and top 128
-                            bit-lanes are grouped into bytes, looks like:
-                            0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ In
-                            each 128-bit lane, we want to put bytes 8, 9, 10,
-                            11, 12 after bytes 0, 1, 2, 3 to allow for
-                            sequential reading. */
-                         adjacent_4_combined0,
-                         mm256_set_epi8(
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11,
-                             (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4,
-                             (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0,
-                             (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
-                             (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11,
-                             (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4,
-                             (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0));
-  __m128i lower_8 =
-      mm256_castsi256_si128(/* We now have 64 bits starting at position 0 in the
-                               lower 128-bit lane, ... */
-                            adjacent_8_combined);
-  __m128i upper_8 = mm256_extracti128_si256(
-      (int32_t)1,
-      /* and 64 bits starting at position 0 in the upper 128-bit lane. */
-      adjacent_8_combined, __m128i);
+      mm256_srli_epi64((int32_t)12, adjacent_4_combined, __m256i);
+  __m256i adjacent_8_combined = mm256_shuffle_epi8(
+      adjacent_4_combined0,
+      mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                     (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9,
+                     (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1,
+                     (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+                     (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10,
+                     (int8_t)9, (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2,
+                     (int8_t)1, (int8_t)0));
+  __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined);
+  __m128i upper_8 =
+      mm256_extracti128_si256((int32_t)1, adjacent_8_combined, __m128i);
   return (
       CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8});
 }
@@ -1057,167 +829,8 @@ libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
 KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10(
     __m256i vector, uint8_t ret[20U]) {
   core_core_arch_x86___m128i_x2 uu____0 =
-      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If
-                                                                            |vector|
-                                                                            is
-                                                                            laid
-                                                                            out
-                                                                            as
-                                                                            follows
-                                                                            (superscript
-                                                                            number
-                                                                            indicates
-                                                                            the
-                                                                            corresponding
-                                                                            bit
-                                                                            is
-                                                                            duplicated
-                                                                            that
-                                                                            many
-                                                                            times):
-                                                                            0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-                                                                            0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
-                                                                            0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
-                                                                            0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀
-                                                                            | ↩
-                                                                            0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-                                                                            0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
-                                                                            0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
-                                                                            0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀
-                                                                            | ↩
-                                                                            ...
-                                                                            |adjacent_2_combined|
-                                                                            will
-                                                                            be
-                                                                            laid
-                                                                            out
-                                                                            as a
-                                                                            series
-                                                                            of
-                                                                            32-bit
-                                                                            integers,
-                                                                            as
-                                                                            follows:
-                                                                            0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
-                                                                            | ↩
-                                                                            0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
-                                                                            | ↩
-                                                                            ....
-                                                                            Shifting
-                                                                            up
-                                                                            the
-                                                                            values
-                                                                            at
-                                                                            the
-                                                                            even
-                                                                            indices
-                                                                            by
-                                                                            12,
-                                                                            we
-                                                                            get:
-                                                                            b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
-                                                                            | ↩
-                                                                            f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
-                                                                            | ↩
-                                                                            ...
-                                                                            Viewing
-                                                                            this
-                                                                            as a
-                                                                            set
-                                                                            of
-                                                                            64-bit
-                                                                            integers
-                                                                            we
-                                                                            get:
-                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-                                                                            | ↩
-                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-                                                                            | ↩
-                                                                            ...
-                                                                            Shifting
-                                                                            down
-                                                                            by
-                                                                            12
-                                                                            gives
-                                                                            us:
-                                                                            0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-                                                                            | ↩
-                                                                            0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-                                                                            | ↩
-                                                                            ...
-                                                                            |adjacent_4_combined|,
-                                                                            when
-                                                                            the
-                                                                            bottom
-                                                                            and
-                                                                            top
-                                                                            128
-                                                                            bit-lanes
-                                                                            are
-                                                                            grouped
-                                                                            into
-                                                                            bytes,
-                                                                            looks
-                                                                            like:
-                                                                            0₇0₆0₅B₄B₃B₂B₁B₀
-                                                                            | ↩
-                                                                            0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈
-                                                                            | ↩
-                                                                            In
-                                                                            each
-                                                                            128-bit
-                                                                            lane,
-                                                                            we
-                                                                            want
-                                                                            to
-                                                                            put
-                                                                            bytes
-                                                                            8,
-                                                                            9,
-                                                                            10,
-                                                                            11,
-                                                                            12
-                                                                            after
-                                                                            bytes
-                                                                            0,
-                                                                            1,
-                                                                            2, 3
-                                                                            to
-                                                                            allow
-                                                                            for
-                                                                            sequential
-                                                                            reading.
-                                                                            We
-                                                                            now
-                                                                            have
-                                                                            64
-                                                                            bits
-                                                                            starting
-                                                                            at
-                                                                            position
-                                                                            0 in
-                                                                            the
-                                                                            lower
-                                                                            128-bit
-                                                                            lane,
-                                                                            ...
-                                                                            and
-                                                                            64
-                                                                            bits
-                                                                            starting
-                                                                            at
-                                                                            position
-                                                                            0 in
-                                                                            the
-                                                                            upper
-                                                                            128-bit
-                                                                            lane.
-                                                                          */
-                                                                         vector);
+      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
+          vector);
   __m128i lower_8 = uu____0.fst;
   __m128i upper_8 = uu____0.snd;
   uint8_t serialized[32U] = {0U};
@@ -1267,16 +880,14 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
                       (int16_t)1 << 0U, (int16_t)1 << 2U, (int16_t)1 << 4U,
                       (int16_t)1 << 6U));
   __m256i coefficients1 = mm256_srli_epi16((int32_t)6, coefficients0, __m256i);
-  return mm256_and_si256(
-      /* Here I can prove this `and` is not useful */ coefficients1,
-      mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1));
+  return mm256_and_si256(coefficients1,
+                         mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1));
 }
 
 KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) {
-  Eurydice_slice lower_coefficients = Eurydice_slice_subslice2(
-      /* Here I can prove this `and` is not useful */ bytes, (size_t)0U,
-      (size_t)16U, uint8_t);
+  Eurydice_slice lower_coefficients =
+      Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t);
   Eurydice_slice upper_coefficients =
       Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t);
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
@@ -1442,64 +1053,26 @@ KRML_MUSTINLINE size_t libcrux_ml_kem_vector_avx2_sampling_rejection_sample(
   __m256i field_modulus =
       mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i potential_coefficients =
-      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can
-                                                             be interpreted as a
-                                                             sequence of
-                                                             serialized 12-bit
-                                                             (i.e. uncompressed)
-                                                             coefficients. Not
-                                                             all coefficients
-                                                             may be less than
-                                                             FIELD_MODULUS
-                                                             though. */
-                                                          input);
+      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input);
   __m256i compare_with_field_modulus =
-      mm256_cmpgt_epi16(/* Suppose we view |potential_coefficients| as follows
-                           (grouping 64-bit elements): A B C D | E F G H | ....
-                           and A < 3329, D < 3329 and H < 3329,
-                           |compare_with_field_modulus| will look like: 0xFF 0 0
-                           0xFF | 0 0 0 0xFF | ... */
-                        field_modulus,
-                        potential_coefficients);
+      mm256_cmpgt_epi16(field_modulus, potential_coefficients);
   uint8_t good[2U];
-  libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each
-                                                      lane is either 0 or 1, we
-                                                      only need one bit from
-                                                      each lane in the register
-                                                      to tell us what
-                                                      coefficients to keep and
-                                                      what to throw-away.
-                                                      Combine all the bits
-                                                      (there are 16) into two
-                                                      bytes. */
-                                                   compare_with_field_modulus,
+  libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus,
                                                    good);
   uint8_t lower_shuffles[16U];
   memcpy(lower_shuffles,
-         /* Each bit (and its corresponding position) represents an element we
-            want to sample. We'd like all such elements to be next to each other
-            starting at index 0, so that they can be read from the vector
-            easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level
-            shuffling indices needed to make this happen. For e.g. if good[0] =
-            0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit
-            lane to the first. To do this, we need the byte-level shuffle
-            indices to be 2 3 X X X X ... */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[0U]],
          (size_t)16U * sizeof(uint8_t));
-  __m128i lower_shuffles0 = mm_loadu_si128(Eurydice_array_to_slice(
-      (size_t)16U,
-      /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles,
-      uint8_t));
+  __m128i lower_shuffles0 = mm_loadu_si128(
+      Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t));
   __m128i lower_coefficients = mm256_castsi256_si128(potential_coefficients);
   __m128i lower_coefficients0 =
       mm_shuffle_epi8(lower_coefficients, lower_shuffles0);
-  mm_storeu_si128(/* ... then write them out ... */ output,
-                  lower_coefficients0);
+  mm_storeu_si128(output, lower_coefficients0);
   size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]);
   uint8_t upper_shuffles[16U];
   memcpy(upper_shuffles,
-         /* Do the same for |goood[1]| */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[1U]],
          (size_t)16U * sizeof(uint8_t));
@@ -1852,13 +1425,9 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
-  H_a9_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
-                                         on the types. We need to go to the
-                                         `value` directly. */
-                                      private_key->value,
-                                      (size_t)384U * (size_t)3U,
-                                      (size_t)768U * (size_t)3U + (size_t)32U,
-                                      uint8_t),
+  H_a9_e0(Eurydice_array_to_subslice2(
+              private_key->value, (size_t)384U * (size_t)3U,
+              (size_t)768U * (size_t)3U + (size_t)32U, uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)3U + (size_t)32U,
@@ -2345,10 +1914,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c1(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_ed(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -2407,7 +1972,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c1(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+        if (transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -2622,12 +2187,7 @@ with const generics
 static KRML_MUSTINLINE void ntt_at_layer_7_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       step;
-       i++) {
+  for (size_t i = (size_t)0U; i < step; i++) {
     size_t j = i;
     __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09(
         re->coefficients[j + step], (int16_t)-1600);
@@ -2679,13 +2239,7 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -2771,11 +2325,7 @@ with const generics
 static KRML_MUSTINLINE void poly_barrett_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     self->coefficients[i0] =
         libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]);
@@ -2790,9 +2340,7 @@ with const generics
 */
 static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
-  ntt_at_layer_7_61(/* Due to the small coefficient bound, we can skip the first
-                       round of Montgomery reductions. */
-                    re);
+  ntt_at_layer_7_61(re);
   size_t zeta_i = (size_t)1U;
   ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U);
   ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)5U);
@@ -2925,14 +2473,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i <
-       Eurydice_slice_len(Eurydice_array_to_slice(
-                              (size_t)16U,
-                              /* The semicolon and parentheses at the end of
-                                 loop are a workaround for the following bug
-                                 https://github.com/hacspec/hax/issues/720 */
-                              self->coefficients, __m256i),
-                          __m256i);
+       i < Eurydice_slice_len(Eurydice_array_to_slice(
+                                  (size_t)16U, self->coefficients, __m256i),
+                              __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -2966,17 +2509,10 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
-    __m256i coefficient_normal_form = to_standard_domain_61(
-        self->coefficients[/* The coefficients are of the form aR^{-1} mod q,
-                              which means calling to_montgomery_domain() on them
-                              should return a mod q. */
-                           j]);
+    __m256i coefficient_normal_form =
+        to_standard_domain_61(self->coefficients[j]);
     self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form,
                                           &error->coefficients[j]));
@@ -3006,8 +2542,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_ab(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -3083,10 +2617,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221(
     IndCpaPrivateKeyUnpacked_63 *private_key,
     IndCpaPublicKeyUnpacked_63 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
-                           ML-KEM */
-                        key_generation_seed,
-                        hashed);
+  cpa_keygen_seed_d8_be(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -3116,8 +2647,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221(
       sample_vector_cbd_then_ntt_out_b41(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
-  compute_As_plus_e_ab(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
-                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_ab(public_key->t_as_ntt, public_key->A,
+                       private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -3142,13 +2673,11 @@ serialize_unpacked_secret_key_8c(IndCpaPublicKeyUnpacked_63 *public_key,
                                  IndCpaPrivateKeyUnpacked_63 *private_key) {
   uint8_t public_key_serialized[1184U];
   serialize_public_key_ed(
-      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
+      public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  serialize_secret_key_ed(
-      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
-      secret_key_serialized);
+  serialize_secret_key_ed(private_key->secret_as_ntt, secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -3335,15 +2864,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa1(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_63 *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
   deserialize_ring_elements_reduced_ab(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key,
-                                   (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -3520,13 +3045,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -3557,10 +3076,7 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   invert_ntt_at_layer_1_61(&zeta_i, re);
   invert_ntt_at_layer_2_61(&zeta_i, re);
   invert_ntt_at_layer_3_61(&zeta_i, re);
@@ -3586,11 +3102,7 @@ static KRML_MUSTINLINE void add_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
@@ -3703,26 +3215,8 @@ add_message_error_reduce_ef_61(
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
             result.coefficients[i0], (int16_t)1441);
-    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(
-        self->coefficients
-            [/* FIXME: Eurydice crashes with: Warning 11: in top-level
-                declaration
-                libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
-                this expression is not Low*; the enclosing function cannot be
-                translated into C*: let mutable ret(Mark.Present,(Mark.AtMost
-                2), ): int16_t[16size_t] = $any in
-                libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
-                ((@9:
-                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
-                &(((@8:
-                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
-                @0; @0 Warning 11 is fatal, exiting. On the following code:
-                ```rust result.coefficients[i] =
-                Vector::barrett_reduce(Vector::add( coefficient_normal_form,
-                &Vector::add(self.coefficients[i], &message.coefficients[i]),
-                )); ``` */
-             i0],
-        &message->coefficients[i0]);
+    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0],
+                                                    &message->coefficients[i0]);
     __m256i tmp0 =
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp);
     result.coefficients[i0] =
@@ -3770,18 +3264,8 @@ compress_ciphertext_coefficient_ef(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1);
-  __m128i coefficients_low =
-      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
-                               the bottom 128 bits, i.e. the first 8 16-bit
-                               coefficients */
-                            vector);
-  __m256i coefficients_low0 =
-      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
-                              coefficients_low[16:31] = B
-                              coefficients_low[32:63] = C and so on ... after
-                              this step: coefficients_low[0:31] = A
-                              coefficients_low[32:63] = B and so on ... */
-                           coefficients_low);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)10, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -3789,18 +3273,12 @@ compress_ciphertext_coefficient_ef(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 = mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+  __m256i compressed_low2 =
+      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)10, coefficients_high0, __m256i);
@@ -3813,20 +3291,8 @@ compress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        compressed_low3,
-                        compressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -3880,18 +3346,8 @@ compress_ciphertext_coefficient_c4(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1);
-  __m128i coefficients_low =
-      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
-                               the bottom 128 bits, i.e. the first 8 16-bit
-                               coefficients */
-                            vector);
-  __m256i coefficients_low0 =
-      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
-                              coefficients_low[16:31] = B
-                              coefficients_low[32:63] = C and so on ... after
-                              this step: coefficients_low[0:31] = A
-                              coefficients_low[32:63] = B and so on ... */
-                           coefficients_low);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)11, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -3899,18 +3355,12 @@ compress_ciphertext_coefficient_c4(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 = mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+  __m256i compressed_low2 =
+      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)11, coefficients_high0, __m256i);
@@ -3923,20 +3373,8 @@ compress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        compressed_low3,
-                        compressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -4014,18 +3452,8 @@ compress_ciphertext_coefficient_d1(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1);
-  __m128i coefficients_low =
-      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
-                               the bottom 128 bits, i.e. the first 8 16-bit
-                               coefficients */
-                            vector);
-  __m256i coefficients_low0 =
-      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
-                              coefficients_low[16:31] = B
-                              coefficients_low[32:63] = C and so on ... after
-                              this step: coefficients_low[0:31] = A
-                              coefficients_low[32:63] = B and so on ... */
-                           coefficients_low);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)4, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -4033,18 +3461,12 @@ compress_ciphertext_coefficient_d1(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 = mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+  __m256i compressed_low2 =
+      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)4, coefficients_high0, __m256i);
@@ -4057,20 +3479,8 @@ compress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        compressed_low3,
-                        compressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -4096,11 +3506,7 @@ static KRML_MUSTINLINE void compress_then_serialize_4_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     __m256i coefficient =
         compress_09_d1(to_unsigned_field_modulus_61(re.coefficients[i0]));
@@ -4127,18 +3533,8 @@ compress_ciphertext_coefficient_f4(__m256i vector) {
   __m256i compression_factor = mm256_set1_epi32((int32_t)10321340);
   __m256i coefficient_bits_mask =
       mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1);
-  __m128i coefficients_low =
-      mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take
-                               the bottom 128 bits, i.e. the first 8 16-bit
-                               coefficients */
-                            vector);
-  __m256i coefficients_low0 =
-      mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A
-                              coefficients_low[16:31] = B
-                              coefficients_low[32:63] = C and so on ... after
-                              this step: coefficients_low[0:31] = A
-                              coefficients_low[32:63] = B and so on ... */
-                           coefficients_low);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
+  __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low =
       mm256_slli_epi32((int32_t)5, coefficients_low0, __m256i);
   __m256i compressed_low0 =
@@ -4146,18 +3542,12 @@ compress_ciphertext_coefficient_f4(__m256i vector) {
   __m256i compressed_low1 =
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
-  __m256i compressed_low2 = mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+  __m256i compressed_low2 =
+      mm256_srli_epi32((int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 =
       mm256_and_si256(compressed_low2, coefficient_bits_mask);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high =
       mm256_slli_epi32((int32_t)5, coefficients_high0, __m256i);
@@ -4170,20 +3560,8 @@ compress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_srli_epi32((int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 =
       mm256_and_si256(compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        compressed_low3,
-                        compressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -4209,11 +3587,7 @@ static KRML_MUSTINLINE void compress_then_serialize_5_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     __m256i coefficients =
         compress_09_f4(to_unsigned_representative_61(re.coefficients[i0]));
@@ -4301,11 +3675,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
     IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness,
-                                            prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -4317,7 +3687,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_23 uu____3 =
       sample_ring_element_cbd_b41(copy_of_prf_input, domain_separator0);
@@ -4326,7 +3695,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   PRF_a9_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -4334,11 +3703,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
       sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U];
-  compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
-                      r_as_ntt, error_1, u);
+  compute_vector_u_ab(public_key->A, r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       deserialize_then_decompress_message_61(copy_of_message);
@@ -4347,14 +3714,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_741(
                                 &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   compress_then_serialize_u_8c(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   compress_then_serialize_ring_element_v_ed(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -4549,8 +3914,7 @@ decompress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)10);
-  __m128i coefficients_low = mm256_castsi256_si128(
-      /* ---- Compress the first 8 coefficients ---- */ vector);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -4558,16 +3922,12 @@ decompress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 = mm256_srli_epi32(
-      (int32_t)10,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+  __m256i decompressed_low2 =
+      mm256_srli_epi32((int32_t)10, decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -4575,27 +3935,12 @@ decompress_ciphertext_coefficient_ef(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 = mm256_srli_epi32(
-      (int32_t)10,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+  __m256i decompressed_high2 =
+      mm256_srli_epi32((int32_t)10, decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        decompressed_low3,
-                        decompressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -4650,8 +3995,7 @@ decompress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)11);
-  __m128i coefficients_low = mm256_castsi256_si128(
-      /* ---- Compress the first 8 coefficients ---- */ vector);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -4659,16 +4003,12 @@ decompress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 = mm256_srli_epi32(
-      (int32_t)11,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+  __m256i decompressed_low2 =
+      mm256_srli_epi32((int32_t)11, decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -4676,27 +4016,12 @@ decompress_ciphertext_coefficient_c4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 = mm256_srli_epi32(
-      (int32_t)11,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+  __m256i decompressed_high2 =
+      mm256_srli_epi32((int32_t)11, decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        decompressed_low3,
-                        decompressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -4819,8 +4144,7 @@ decompress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)4);
-  __m128i coefficients_low = mm256_castsi256_si128(
-      /* ---- Compress the first 8 coefficients ---- */ vector);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -4828,16 +4152,12 @@ decompress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 = mm256_srli_epi32(
-      (int32_t)4,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+  __m256i decompressed_low2 =
+      mm256_srli_epi32((int32_t)4, decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -4845,27 +4165,12 @@ decompress_ciphertext_coefficient_d1(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 = mm256_srli_epi32(
-      (int32_t)4,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+  __m256i decompressed_high2 =
+      mm256_srli_epi32((int32_t)4, decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        decompressed_low3,
-                        decompressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -4915,8 +4220,7 @@ decompress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i two_pow_coefficient_bits =
       mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)5);
-  __m128i coefficients_low = mm256_castsi256_si128(
-      /* ---- Compress the first 8 coefficients ---- */ vector);
+  __m128i coefficients_low = mm256_castsi256_si128(vector);
   __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low =
       mm256_mullo_epi32(coefficients_low0, field_modulus);
@@ -4924,16 +4228,12 @@ decompress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_low, __m256i);
   __m256i decompressed_low1 =
       mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits);
-  __m256i decompressed_low2 = mm256_srli_epi32(
-      (int32_t)5,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+  __m256i decompressed_low2 =
+      mm256_srli_epi32((int32_t)5, decompressed_low1, __m256i);
   __m256i decompressed_low3 =
       mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i);
-  __m128i coefficients_high = mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+  __m128i coefficients_high =
+      mm256_extracti128_si256((int32_t)1, vector, __m128i);
   __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high =
       mm256_mullo_epi32(coefficients_high0, field_modulus);
@@ -4941,27 +4241,12 @@ decompress_ciphertext_coefficient_f4(__m256i vector) {
       mm256_slli_epi32((int32_t)1, decompressed_high, __m256i);
   __m256i decompressed_high1 =
       mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits);
-  __m256i decompressed_high2 = mm256_srli_epi32(
-      (int32_t)5,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+  __m256i decompressed_high2 =
+      mm256_srli_epi32((int32_t)5, decompressed_high1, __m256i);
   __m256i decompressed_high3 =
       mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits,
-                           this function results in: 0: low low low low | 1:
-                           high high high high | 2: low low low low | 3: high
-                           high high high where each |low| and |high| is a
-                           16-bit element */
-                        decompressed_low3,
-                        decompressed_high3);
-  return mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3);
+  return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i);
 }
 
 /**
@@ -5128,14 +4413,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_2f(
     IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U];
-  deserialize_then_decompress_u_ed(
-      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_ed(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       deserialize_then_decompress_ring_element_v_ed(
-          Eurydice_array_to_subslice_from(
-              (size_t)1088U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
+                                          (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -5156,8 +4438,7 @@ with const generics
 static KRML_MUSTINLINE void decrypt_2f(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U];
-  deserialize_secret_key_ab(/* sˆ := Decode_12(sk) */ secret_key,
-                            secret_as_ntt);
+  deserialize_secret_key_ab(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U];
   memcpy(
@@ -5477,13 +4758,9 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_5e(
     libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) {
   uint8_t t[32U];
-  H_a9_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
-                                         on the types. We need to go to the
-                                         `value` directly. */
-                                      private_key->value,
-                                      (size_t)384U * (size_t)4U,
-                                      (size_t)768U * (size_t)4U + (size_t)32U,
-                                      uint8_t),
+  H_a9_ac(Eurydice_array_to_subslice2(
+              private_key->value, (size_t)384U * (size_t)4U,
+              (size_t)768U * (size_t)4U + (size_t)32U, uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)4U + (size_t)32U,
@@ -5960,10 +5237,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c(
   memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_78(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -6022,7 +5295,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+        if (transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -6173,14 +5446,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_42(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i <
-       Eurydice_slice_len(Eurydice_array_to_slice(
-                              (size_t)16U,
-                              /* The semicolon and parentheses at the end of
-                                 loop are a workaround for the following bug
-                                 https://github.com/hacspec/hax/issues/720 */
-                              self->coefficients, __m256i),
-                          __m256i);
+       i < Eurydice_slice_len(Eurydice_array_to_slice(
+                                  (size_t)16U, self->coefficients, __m256i),
+                              __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -6211,8 +5479,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_42(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -6288,10 +5554,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22(
     IndCpaPrivateKeyUnpacked_39 *private_key,
     IndCpaPublicKeyUnpacked_39 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_6a(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
-                           ML-KEM */
-                        key_generation_seed,
-                        hashed);
+  cpa_keygen_seed_d8_6a(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -6321,8 +5584,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22(
       sample_vector_cbd_then_ntt_out_b4(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
-  compute_As_plus_e_42(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
-                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_42(public_key->t_as_ntt, public_key->A,
+                       private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6347,13 +5610,11 @@ serialize_unpacked_secret_key_c9(IndCpaPublicKeyUnpacked_39 *public_key,
                                  IndCpaPrivateKeyUnpacked_39 *private_key) {
   uint8_t public_key_serialized[1568U];
   serialize_public_key_1e(
-      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
+      public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1536U];
-  serialize_secret_key_78(
-      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
-      secret_key_serialized);
+  serialize_secret_key_78(private_key->secret_as_ntt, secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1536U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -6540,15 +5801,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa0(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_39 *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t);
   deserialize_ring_elements_reduced_42(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key,
-                                   (size_t)1536U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[4U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -6640,10 +5897,7 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_42(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   invert_ntt_at_layer_1_61(&zeta_i, re);
   invert_ntt_at_layer_2_61(&zeta_i, re);
   invert_ntt_at_layer_3_61(&zeta_i, re);
@@ -6873,11 +6127,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
     IndCpaPublicKeyUnpacked_39 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1568U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness,
-                                            prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -6889,7 +6139,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_dd uu____3 =
       sample_ring_element_cbd_b4(copy_of_prf_input, domain_separator0);
@@ -6898,7 +6147,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
       error_1, uu____3.fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   PRF_a9_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -6906,11 +6155,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
       sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[4U];
-  compute_vector_u_42(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
-                      r_as_ntt, error_1, u);
+  compute_vector_u_42(public_key->A, r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       deserialize_then_decompress_message_61(copy_of_message);
@@ -6919,14 +6166,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_74(
                                 &message_as_ring_element);
   uint8_t ciphertext[1568U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[4U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   compress_then_serialize_u_c9(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U,
                                            (size_t)1408U, uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   compress_then_serialize_ring_element_v_1e(
       uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
@@ -7240,14 +6485,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_37(
     IndCpaPrivateKeyUnpacked_39 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[4U];
-  deserialize_then_decompress_u_1e(
-      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_1e(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       deserialize_then_decompress_ring_element_v_78(
-          Eurydice_array_to_subslice_from(
-              (size_t)1568U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)1408U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
+                                          (size_t)1408U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       compute_message_42(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -7268,8 +6510,7 @@ with const generics
 static KRML_MUSTINLINE void decrypt_37(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[4U];
-  deserialize_secret_key_42(/* sˆ := Decode_12(sk) */ secret_key,
-                            secret_as_ntt);
+  deserialize_secret_key_42(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[4U];
   memcpy(
@@ -7577,13 +6818,9 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_4d(
     libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) {
   uint8_t t[32U];
-  H_a9_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
-                                         on the types. We need to go to the
-                                         `value` directly. */
-                                      private_key->value,
-                                      (size_t)384U * (size_t)2U,
-                                      (size_t)768U * (size_t)2U + (size_t)32U,
-                                      uint8_t),
+  H_a9_fd(Eurydice_array_to_subslice2(
+              private_key->value, (size_t)384U * (size_t)2U,
+              (size_t)768U * (size_t)2U + (size_t)32U, uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)2U + (size_t)32U,
@@ -8034,10 +7271,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c0(
   memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_29(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -8096,7 +7329,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c0(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+        if (transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -8252,14 +7485,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_89(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i <
-       Eurydice_slice_len(Eurydice_array_to_slice(
-                              (size_t)16U,
-                              /* The semicolon and parentheses at the end of
-                                 loop are a workaround for the following bug
-                                 https://github.com/hacspec/hax/issues/720 */
-                              self->coefficients, __m256i),
-                          __m256i);
+       i < Eurydice_slice_len(Eurydice_array_to_slice(
+                                  (size_t)16U, self->coefficients, __m256i),
+                              __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -8290,8 +7518,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_89(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -8367,10 +7593,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220(
     IndCpaPrivateKeyUnpacked_94 *private_key,
     IndCpaPublicKeyUnpacked_94 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_f8(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
-                           ML-KEM */
-                        key_generation_seed,
-                        hashed);
+  cpa_keygen_seed_d8_f8(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -8400,8 +7623,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220(
       sample_vector_cbd_then_ntt_out_b40(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
-  compute_As_plus_e_89(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
-                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_89(public_key->t_as_ntt, public_key->A,
+                       private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -8426,13 +7649,11 @@ serialize_unpacked_secret_key_2d(IndCpaPublicKeyUnpacked_94 *public_key,
                                  IndCpaPrivateKeyUnpacked_94 *private_key) {
   uint8_t public_key_serialized[800U];
   serialize_public_key_ba(
-      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
+      public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[768U];
-  serialize_secret_key_29(
-      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
-      secret_key_serialized);
+  serialize_secret_key_29(private_key->secret_as_ntt, secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[768U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -8619,15 +7840,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_94 *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t);
   deserialize_ring_elements_reduced_89(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key,
-                                   (size_t)768U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[2U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -8765,10 +7982,7 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_89(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   invert_ntt_at_layer_1_61(&zeta_i, re);
   invert_ntt_at_layer_2_61(&zeta_i, re);
   invert_ntt_at_layer_3_61(&zeta_i, re);
@@ -8960,11 +8174,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
     IndCpaPublicKeyUnpacked_94 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[768U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness,
-                                            prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -8976,7 +8186,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_40 uu____3 =
       sample_ring_element_cbd_b40(copy_of_prf_input, domain_separator0);
@@ -8985,7 +8194,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
       error_1, uu____3.fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   PRF_a9_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -8993,11 +8202,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
       sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[2U];
-  compute_vector_u_89(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
-                      r_as_ntt, error_1, u);
+  compute_vector_u_89(public_key->A, r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       deserialize_then_decompress_message_61(copy_of_message);
@@ -9006,14 +8213,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_740(
                                 &message_as_ring_element);
   uint8_t ciphertext[768U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[2U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   compress_then_serialize_u_2d(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U,
                                            uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   compress_then_serialize_ring_element_v_ba(
       uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
@@ -9297,14 +8502,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_4b(
     IndCpaPrivateKeyUnpacked_94 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[2U];
-  deserialize_then_decompress_u_ba(
-      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_ba(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       deserialize_then_decompress_ring_element_v_29(
-          Eurydice_array_to_subslice_from(
-              (size_t)768U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)640U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
+                                          (size_t)640U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       compute_message_89(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -9325,8 +8527,7 @@ with const generics
 static KRML_MUSTINLINE void decrypt_4b(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[2U];
-  deserialize_secret_key_89(/* sˆ := Decode_12(sk) */ secret_key,
-                            secret_as_ntt);
+  deserialize_secret_key_89(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[2U];
   memcpy(
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h
index addfdaf30..95dad8cf8 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.c b/libcrux-ml-kem/c/libcrux_mlkem_portable.c
index fddae347c..1d3a317a8 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_portable.c
+++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "internal/libcrux_mlkem_portable.h"
@@ -1152,28 +1152,11 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
 */
 uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient(
     uint16_t fe) {
-  int16_t shifted =
-      (int16_t)1664 -
-      (int16_t) /* The approach used here is inspired by:
-                   https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150
-                   If 833 <= fe <= 2496, then -832 <= shifted <= 831 */
-      fe;
-  int16_t mask =
-      /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) =
-         -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive
-         <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so
-         if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */
-      shifted
-
-      >> 15U;
+  int16_t shifted = (int16_t)1664 - (int16_t)fe;
+  int16_t mask = shifted >> 15U;
   int16_t shifted_to_positive = mask ^ shifted;
   int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832;
-  int16_t r0 =
-      /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the
-         most significant bit of shifted_positive_in_range will be 1. */
-      shifted_positive_in_range
-
-      >> 15U;
+  int16_t r0 = shifted_positive_in_range >> 15U;
   int16_t r1 = r0 & (int16_t)1;
   return (uint8_t)r1;
 }
@@ -1209,16 +1192,7 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits(
 
 int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient(
     uint8_t coefficient_bits, uint16_t fe) {
-  uint64_t compressed =
-      (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits
-                    == 5 || coefficient_bits == 10 || coefficient_bits == 11 );
-                    hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to
-                    be constant time due to:
-                    https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ
-                  */
-      fe
-
-      << (uint32_t)coefficient_bits;
+  uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits;
   compressed = compressed + 1664ULL;
   compressed = compressed * 10321340ULL;
   compressed = compressed >> 35U;
@@ -2738,13 +2712,9 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_60(
     libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) {
   uint8_t t[32U];
-  H_f1_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
-                                         on the types. We need to go to the
-                                         `value` directly. */
-                                      private_key->value,
-                                      (size_t)384U * (size_t)4U,
-                                      (size_t)768U * (size_t)4U + (size_t)32U,
-                                      uint8_t),
+  H_f1_ac(Eurydice_array_to_subslice2(
+              private_key->value, (size_t)384U * (size_t)4U,
+              (size_t)768U * (size_t)4U + (size_t)32U, uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)4U + (size_t)32U,
@@ -3234,10 +3204,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b(
   memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_ff(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -3297,7 +3263,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+        if (transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -3495,12 +3461,7 @@ with const generics
 static KRML_MUSTINLINE void ntt_at_layer_7_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       step;
-       i++) {
+  for (size_t i = (size_t)0U; i < step; i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector t =
         libcrux_ml_kem_vector_portable_multiply_by_constant_0d(
@@ -3562,13 +3523,7 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -3660,11 +3615,7 @@ with const generics
 static KRML_MUSTINLINE void poly_barrett_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
@@ -3681,9 +3632,7 @@ with const generics
 */
 static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
-  ntt_at_layer_7_8c(/* Due to the small coefficient bound, we can skip the first
-                       round of Montgomery reductions. */
-                    re);
+  ntt_at_layer_7_8c(re);
   size_t zeta_i = (size_t)1U;
   ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U);
   ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)5U);
@@ -3822,11 +3771,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_d0(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U,
-                   /* The semicolon and parentheses at the end of loop are a
-                      workaround for the following bug
-                      https://github.com/hacspec/hax/issues/720 */
-                   self->coefficients,
+                   (size_t)16U, self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -3866,18 +3811,10 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
-        coefficient_normal_form = to_standard_domain_8c(
-            self->coefficients[/* The coefficients are of the form aR^{-1} mod
-                                  q, which means calling to_montgomery_domain()
-                                  on them should return a mod q. */
-                               j]);
+        coefficient_normal_form = to_standard_domain_8c(self->coefficients[j]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
             libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form,
@@ -3909,8 +3846,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_d0(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -3986,10 +3921,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c(
     IndCpaPrivateKeyUnpacked_af *private_key,
     IndCpaPublicKeyUnpacked_af *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_03(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
-                           ML-KEM */
-                        key_generation_seed,
-                        hashed);
+  cpa_keygen_seed_d8_03(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -4019,8 +3951,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c(
       sample_vector_cbd_then_ntt_out_3b(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
-  compute_As_plus_e_d0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
-                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_d0(public_key->t_as_ntt, public_key->A,
+                       private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -4045,13 +3977,11 @@ serialize_unpacked_secret_key_2f(IndCpaPublicKeyUnpacked_af *public_key,
                                  IndCpaPrivateKeyUnpacked_af *private_key) {
   uint8_t public_key_serialized[1568U];
   serialize_public_key_00(
-      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
+      public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1536U];
-  serialize_secret_key_ff(
-      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
-      secret_key_serialized);
+  serialize_secret_key_ff(private_key->secret_as_ntt, secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1536U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -4239,15 +4169,11 @@ generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_af *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t);
   deserialize_ring_elements_reduced_d0(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key,
-                                   (size_t)1536U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[4U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -4434,13 +4360,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -4471,10 +4391,7 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_d0(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   invert_ntt_at_layer_1_8c(&zeta_i, re);
   invert_ntt_at_layer_2_8c(&zeta_i, re);
   invert_ntt_at_layer_3_8c(&zeta_i, re);
@@ -4500,11 +4417,7 @@ static KRML_MUSTINLINE void add_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
         coefficient_normal_form =
@@ -4630,27 +4543,8 @@ add_message_error_reduce_ef_8c(
             libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
                 result.coefficients[i0], (int16_t)1441);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp =
-        libcrux_ml_kem_vector_portable_add_0d(
-            self->coefficients[/* FIXME: Eurydice crashes with: Warning 11: in
-                                  top-level declaration
-                                  libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
-                                  this expression is not Low*; the enclosing
-                                  function cannot be translated into C*: let
-                                  mutable ret(Mark.Present,(Mark.AtMost 2), ):
-                                  int16_t[16size_t] = $any in
-                                  libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
-                                  ((@9:
-                                  libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
-                                  &(((@8:
-                                  libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
-                                  @0; @0 Warning 11 is fatal, exiting. On the
-                                  following code: ```rust result.coefficients[i]
-                                  = Vector::barrett_reduce(Vector::add(
-                                  coefficient_normal_form,
-                                  &Vector::add(self.coefficients[i],
-                                  &message.coefficients[i]), )); ``` */
-                               i0],
-            &message->coefficients[i0]);
+        libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0],
+                                              &message->coefficients[i0]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 =
         libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
@@ -4863,11 +4757,7 @@ static KRML_MUSTINLINE void compress_then_serialize_4_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient =
         compress_0d_d1(to_unsigned_field_modulus_8c(re.coefficients[i0]));
@@ -4922,11 +4812,7 @@ static KRML_MUSTINLINE void compress_then_serialize_5_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients =
         compress_0d_f4(to_unsigned_representative_8c(re.coefficients[i0]));
@@ -5015,11 +4901,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
     IndCpaPublicKeyUnpacked_af *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1568U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness,
-                                            prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -5031,7 +4913,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_dd0 uu____3 =
       sample_ring_element_cbd_3b(copy_of_prf_input, domain_separator0);
@@ -5040,7 +4921,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
       error_1, uu____3.fst,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   PRF_f1_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -5048,11 +4929,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
       sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[4U];
-  compute_vector_u_d0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
-                      r_as_ntt, error_1, u);
+  compute_vector_u_d0(public_key->A, r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       deserialize_then_decompress_message_8c(copy_of_message);
@@ -5061,14 +4940,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a(
                                 &message_as_ring_element);
   uint8_t ciphertext[1568U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[4U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   compress_then_serialize_u_2f(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U,
                                            (size_t)1408U, uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   compress_then_serialize_ring_element_v_00(
       uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
@@ -5707,14 +5584,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_7d(
     IndCpaPrivateKeyUnpacked_af *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[4U];
-  deserialize_then_decompress_u_00(
-      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_00(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       deserialize_then_decompress_ring_element_v_ff(
-          Eurydice_array_to_subslice_from(
-              (size_t)1568U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)1408U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)1568U, ciphertext,
+                                          (size_t)1408U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       compute_message_d0(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -5735,8 +5609,7 @@ with const generics
 static KRML_MUSTINLINE void decrypt_7d(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[4U];
-  deserialize_secret_key_d0(/* sˆ := Decode_12(sk) */ secret_key,
-                            secret_as_ntt);
+  deserialize_secret_key_d0(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[4U];
   memcpy(
@@ -6056,13 +5929,9 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_30(
     libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) {
   uint8_t t[32U];
-  H_f1_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
-                                         on the types. We need to go to the
-                                         `value` directly. */
-                                      private_key->value,
-                                      (size_t)384U * (size_t)2U,
-                                      (size_t)768U * (size_t)2U + (size_t)32U,
-                                      uint8_t),
+  H_f1_fd(Eurydice_array_to_subslice2(
+              private_key->value, (size_t)384U * (size_t)2U,
+              (size_t)768U * (size_t)2U + (size_t)32U, uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)2U + (size_t)32U,
@@ -6512,10 +6381,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b0(
   memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_64(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -6575,7 +6440,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b0(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+        if (transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -6721,11 +6586,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_a0(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U,
-                   /* The semicolon and parentheses at the end of loop are a
-                      workaround for the following bug
-                      https://github.com/hacspec/hax/issues/720 */
-                   self->coefficients,
+                   (size_t)16U, self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -6760,8 +6621,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_a0(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -6837,10 +6696,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0(
     IndCpaPrivateKeyUnpacked_d4 *private_key,
     IndCpaPublicKeyUnpacked_d4 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_10(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
-                           ML-KEM */
-                        key_generation_seed,
-                        hashed);
+  cpa_keygen_seed_d8_10(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -6870,8 +6726,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0(
       sample_vector_cbd_then_ntt_out_3b0(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
-  compute_As_plus_e_a0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
-                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_a0(public_key->t_as_ntt, public_key->A,
+                       private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6896,13 +6752,11 @@ serialize_unpacked_secret_key_6d(IndCpaPublicKeyUnpacked_d4 *public_key,
                                  IndCpaPrivateKeyUnpacked_d4 *private_key) {
   uint8_t public_key_serialized[800U];
   serialize_public_key_86(
-      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
+      public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[768U];
-  serialize_secret_key_64(
-      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
-      secret_key_serialized);
+  serialize_secret_key_64(private_key->secret_as_ntt, secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[768U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -7090,15 +6944,11 @@ generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f0(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_d4 *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t);
   deserialize_ring_elements_reduced_a0(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key,
-                                   (size_t)768U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[2U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -7224,10 +7074,7 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_a0(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   invert_ntt_at_layer_1_8c(&zeta_i, re);
   invert_ntt_at_layer_2_8c(&zeta_i, re);
   invert_ntt_at_layer_3_8c(&zeta_i, re);
@@ -7458,11 +7305,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
     IndCpaPublicKeyUnpacked_d4 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[768U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness,
-                                            prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -7475,7 +7318,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_400 uu____3 =
       sample_ring_element_cbd_3b0(copy_of_prf_input, domain_separator0);
@@ -7484,7 +7326,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
       error_1, uu____3.fst,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   PRF_f1_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -7492,11 +7334,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
       sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[2U];
-  compute_vector_u_a0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
-                      r_as_ntt, error_1, u);
+  compute_vector_u_a0(public_key->A, r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       deserialize_then_decompress_message_8c(copy_of_message);
@@ -7505,14 +7345,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0(
                                 &message_as_ring_element);
   uint8_t ciphertext[768U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[2U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   compress_then_serialize_u_6d(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U,
                                            uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   compress_then_serialize_ring_element_v_86(
       uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
@@ -7827,14 +7665,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_d1(
     IndCpaPrivateKeyUnpacked_d4 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[2U];
-  deserialize_then_decompress_u_86(
-      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_86(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       deserialize_then_decompress_ring_element_v_64(
-          Eurydice_array_to_subslice_from(
-              (size_t)768U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)640U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)768U, ciphertext,
+                                          (size_t)640U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       compute_message_a0(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -7855,8 +7690,7 @@ with const generics
 static KRML_MUSTINLINE void decrypt_d1(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[2U];
-  deserialize_secret_key_a0(/* sˆ := Decode_12(sk) */ secret_key,
-                            secret_as_ntt);
+  deserialize_secret_key_a0(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[2U];
   memcpy(
@@ -8164,13 +7998,9 @@ with const generics
 bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
-  H_f1_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly
-                                         on the types. We need to go to the
-                                         `value` directly. */
-                                      private_key->value,
-                                      (size_t)384U * (size_t)3U,
-                                      (size_t)768U * (size_t)3U + (size_t)32U,
-                                      uint8_t),
+  H_f1_e0(Eurydice_array_to_subslice2(
+              private_key->value, (size_t)384U * (size_t)3U,
+              (size_t)768U * (size_t)3U + (size_t)32U, uint8_t),
           t);
   Eurydice_slice expected = Eurydice_array_to_subslice2(
       private_key->value, (size_t)768U * (size_t)3U + (size_t)32U,
@@ -8626,10 +8456,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b1(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = sample_from_uniform_distribution_next_89(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -8689,7 +8515,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b1(
            i++) {
         size_t j = i;
         libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-        if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+        if (transpose) {
           A_transpose[j][i1] = sample;
         } else {
           A_transpose[i1][j] = sample;
@@ -8824,11 +8650,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_1b(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U,
-                   /* The semicolon and parentheses at the end of loop are a
-                      workaround for the following bug
-                      https://github.com/hacspec/hax/issues/720 */
-                   self->coefficients,
+                   (size_t)16U, self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -8863,8 +8685,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_1b(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
     for (size_t i1 = (size_t)0U;
@@ -8940,10 +8760,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1(
     IndCpaPrivateKeyUnpacked_a0 *private_key,
     IndCpaPublicKeyUnpacked_a0 *public_key) {
   uint8_t hashed[64U];
-  cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for
-                           ML-KEM */
-                        key_generation_seed,
-                        hashed);
+  cpa_keygen_seed_d8_9c(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -8973,8 +8790,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1(
       sample_vector_cbd_then_ntt_out_3b1(copy_of_prf_input, domain_separator)
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
-  compute_As_plus_e_1b(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt,
-                       public_key->A, private_key->secret_as_ntt, error_as_ntt);
+  compute_As_plus_e_1b(public_key->t_as_ntt, public_key->A,
+                       private_key->secret_as_ntt, error_as_ntt);
   uint8_t uu____5[32U];
   core_result_Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -8999,13 +8816,11 @@ serialize_unpacked_secret_key_43(IndCpaPublicKeyUnpacked_a0 *public_key,
                                  IndCpaPrivateKeyUnpacked_a0 *private_key) {
   uint8_t public_key_serialized[1184U];
   serialize_public_key_6c(
-      /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt,
+      public_key->t_as_ntt,
       Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
       public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  serialize_secret_key_89(
-      /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt,
-      secret_key_serialized);
+  serialize_secret_key_89(private_key->secret_as_ntt, secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
   memcpy(copy_of_secret_key_serialized, secret_key_serialized,
@@ -9193,15 +9008,11 @@ generics
 static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f1(
     Eurydice_slice public_key,
     IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
   deserialize_ring_elements_reduced_1b(uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key,
-                                   (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -9295,10 +9106,7 @@ with const generics
 static KRML_MUSTINLINE void invert_ntt_montgomery_1b(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   invert_ntt_at_layer_1_8c(&zeta_i, re);
   invert_ntt_at_layer_2_8c(&zeta_i, re);
   invert_ntt_at_layer_3_8c(&zeta_i, re);
@@ -9491,11 +9299,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
     IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U],
     Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness,
-                                            prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -9508,7 +9312,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_230 uu____3 =
       sample_ring_element_cbd_3b1(copy_of_prf_input, domain_separator0);
@@ -9517,7 +9320,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   PRF_f1_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t),
              prf_output);
@@ -9525,11 +9328,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
       sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U];
-  compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A,
-                      r_as_ntt, error_1, u);
+  compute_vector_u_1b(public_key->A, r_as_ntt, error_1, u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       deserialize_then_decompress_message_8c(copy_of_message);
@@ -9538,14 +9339,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1(
                                 &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   compress_then_serialize_u_43(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   compress_then_serialize_ring_element_v_6c(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -9830,14 +9629,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_42(
     IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext,
     uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U];
-  deserialize_then_decompress_u_6c(
-      /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt);
+  deserialize_then_decompress_u_6c(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       deserialize_then_decompress_ring_element_v_89(
-          Eurydice_array_to_subslice_from(
-              (size_t)1088U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
+                                          (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt);
   uint8_t ret0[32U];
@@ -9858,8 +9654,7 @@ with const generics
 static KRML_MUSTINLINE void decrypt_42(Eurydice_slice secret_key,
                                        uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U];
-  deserialize_secret_key_1b(/* sˆ := Decode_12(sk) */ secret_key,
-                            secret_as_ntt);
+  deserialize_secret_key_1b(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U];
   memcpy(
diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/libcrux_mlkem_portable.h
index 012f00992..ccb5a6654 100644
--- a/libcrux-ml-kem/c/libcrux_mlkem_portable.h
+++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem_portable_H
diff --git a/libcrux-ml-kem/c/libcrux_sha3.h b/libcrux-ml-kem/c/libcrux_sha3.h
index 16a61b7e6..393be1f15 100644
--- a/libcrux-ml-kem/c/libcrux_sha3.h
+++ b/libcrux-ml-kem/c/libcrux_sha3.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_sha3_H
diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.c b/libcrux-ml-kem/c/libcrux_sha3_avx2.c
index 23fa30cd5..3274dc64a 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_avx2.c
+++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "internal/libcrux_sha3_avx2.h"
@@ -77,8 +77,7 @@ static KRML_MUSTINLINE __m256i and_not_xor_ef(__m256i a, __m256i b, __m256i c) {
 }
 
 static KRML_MUSTINLINE __m256i _veorq_n_u64(__m256i a, uint64_t c) {
-  __m256i c0 = mm256_set1_epi64x(
-      (int64_t) /* Casting here is required, doesn't change the value. */ c);
+  __m256i c0 = mm256_set1_epi64x((int64_t)c);
   return mm256_xor_si256(a, c0);
 }
 
@@ -1431,13 +1430,13 @@ static KRML_MUSTINLINE void store_block_5b(__m256i (*s)[5U],
         s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)2U) % (size_t)5U],
         __m256i);
-    __m256i v1h = mm256_permute2x128_si256(
-        (int32_t)32,
-        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
-         [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
-        s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
-         [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
-        __m256i);
+    __m256i v1h =
+        mm256_permute2x128_si256((int32_t)32,
+                                 s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
+                                  [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
+                                 s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
+                                  [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
+                                 __m256i);
     __m256i v2l = mm256_permute2x128_si256(
         (int32_t)49,
         s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U],
@@ -1748,16 +1747,7 @@ void libcrux_sha3_avx2_x4_shake256(Eurydice_slice input0, Eurydice_slice input1,
                                    Eurydice_slice input2, Eurydice_slice input3,
                                    Eurydice_slice out0, Eurydice_slice out1,
                                    Eurydice_slice out2, Eurydice_slice out3) {
-  Eurydice_slice buf0[4U] = {
-      /* XXX: These functions could alternatively implement the same with the
-         portable implementation #[cfg(feature = "simd128")] { keccakx2::<136,
-         0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136,
-         0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136,
-         0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]);
-         keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136,
-         0x1fu8>([input3], [out3]); } */
-      input0,
-      input1, input2, input3};
+  Eurydice_slice buf0[4U] = {input0, input1, input2, input3};
   Eurydice_slice buf[4U] = {out0, out1, out2, out3};
   keccak_fb(buf0, buf);
 }
@@ -1972,13 +1962,13 @@ static KRML_MUSTINLINE void store_block_3a(__m256i (*s)[5U],
         s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)2U) % (size_t)5U],
         __m256i);
-    __m256i v1h = mm256_permute2x128_si256(
-        (int32_t)32,
-        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
-         [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
-        s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
-         [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
-        __m256i);
+    __m256i v1h =
+        mm256_permute2x128_si256((int32_t)32,
+                                 s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
+                                  [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
+                                 s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
+                                  [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
+                                 __m256i);
     __m256i v2l = mm256_permute2x128_si256(
         (int32_t)49,
         s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U],
diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/libcrux_sha3_avx2.h
index 645f80b34..eaa8d8c25 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_avx2.h
+++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_sha3_avx2_H
diff --git a/libcrux-ml-kem/c/libcrux_sha3_internal.h b/libcrux-ml-kem/c/libcrux_sha3_internal.h
index 74eeb47a3..c68ee5802 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_internal.h
+++ b/libcrux-ml-kem/c/libcrux_sha3_internal.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_sha3_internal_H
@@ -1811,7 +1811,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out);
 }
@@ -2160,7 +2159,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out);
 }
@@ -2509,7 +2507,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out);
 }
@@ -2698,7 +2695,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out);
 }
@@ -2817,7 +2813,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out);
 }
@@ -3166,7 +3161,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out);
 }
diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.c b/libcrux-ml-kem/c/libcrux_sha3_neon.c
index 5e4416bcd..8c9edc379 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_neon.c
+++ b/libcrux-ml-kem/c/libcrux_sha3_neon.c
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #include "libcrux_sha3_neon.h"
@@ -62,7 +62,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0,
                                                    Eurydice_slice input1,
                                                    Eurydice_slice out0,
                                                    Eurydice_slice out1) {
-  /* TODO: make argument ordering consistent */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -73,9 +72,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0,
 */
 KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState
 libcrux_sha3_neon_x2_incremental_init(void) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let s0 = KeccakState::new(); let s1 =
-   * KeccakState::new(); [s0, s1] } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -87,10 +83,6 @@ libcrux_sha3_neon_x2_incremental_init(void) {
 KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
-   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -104,10 +96,6 @@ KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_squeeze_first_three_blocks(&mut s0, out0);
-   * shake128_squeeze_first_three_blocks(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -121,10 +109,6 @@ KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_squeeze_next_block(&mut s0, out0);
-   * shake128_squeeze_next_block(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -148,10 +132,6 @@ libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_five_blocks(
 KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
-   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.h b/libcrux-ml-kem/c/libcrux_sha3_neon.h
index 6e264c84f..c51c09cc5 100644
--- a/libcrux-ml-kem/c/libcrux_sha3_neon.h
+++ b/libcrux-ml-kem/c/libcrux_sha3_neon.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_sha3_neon_H
diff --git a/libcrux-ml-kem/cg/code_gen.txt b/libcrux-ml-kem/cg/code_gen.txt
index 7e79f022e..54242b657 100644
--- a/libcrux-ml-kem/cg/code_gen.txt
+++ b/libcrux-ml-kem/cg/code_gen.txt
@@ -1,6 +1,6 @@
 This code was generated with the following revisions:
-Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
-Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
-Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
-F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
-Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
+Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+F*: 5643e656b989aca7629723653a2570c7df6252b9
+Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
diff --git a/libcrux-ml-kem/cg/libcrux_core.h b/libcrux-ml-kem/cg/libcrux_core.h
index ca8a53171..b8e2354f8 100644
--- a/libcrux-ml-kem/cg/libcrux_core.h
+++ b/libcrux-ml-kem/cg/libcrux_core.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_core_H
diff --git a/libcrux-ml-kem/cg/libcrux_ct_ops.h b/libcrux-ml-kem/cg/libcrux_ct_ops.h
index 5f693d09c..cf4a616ac 100644
--- a/libcrux-ml-kem/cg/libcrux_ct_ops.h
+++ b/libcrux-ml-kem/cg/libcrux_ct_ops.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_ct_ops_H
diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h
index bb50d3eaf..f6933bc18 100644
--- a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h
+++ b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem768_avx2_H
@@ -171,16 +171,11 @@ libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) {
   __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16(
       LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i v_minus_field_modulus =
-      libcrux_intrinsics_avx2_mm256_sub_epi16(/* Compute v_i - Q and crate a
-                                                 mask from the sign bit of each
-                                                 of these quantities. */
-                                              vector, field_modulus);
+      libcrux_intrinsics_avx2_mm256_sub_epi16(vector, field_modulus);
   __m256i sign_mask = libcrux_intrinsics_avx2_mm256_srai_epi16(
       (int32_t)15, v_minus_field_modulus, __m256i);
   __m256i conditional_add_field_modulus =
-      libcrux_intrinsics_avx2_mm256_and_si256(/* If v_i - Q < 0 then add back Q
-                                                 to (v_i - Q). */
-                                              sign_mask, field_modulus);
+      libcrux_intrinsics_avx2_mm256_and_si256(sign_mask, field_modulus);
   return libcrux_intrinsics_avx2_mm256_add_epi16(v_minus_field_modulus,
                                                  conditional_add_field_modulus);
 }
@@ -562,7 +557,6 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
     __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2,
     int16_t zeta3) {
-  /* Compute the first term of the product */
   __m256i shuffle_with = libcrux_intrinsics_avx2_mm256_set_epi8(
       (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6,
       (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8,
@@ -570,8 +564,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2,
       (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4,
       (int8_t)1, (int8_t)0);
-  __m256i lhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
-      /* Prepare the left hand side */ lhs, shuffle_with);
+  __m256i lhs_shuffled =
+      libcrux_intrinsics_avx2_mm256_shuffle_epi8(lhs, shuffle_with);
   __m256i lhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
       (int32_t)216, lhs_shuffled, __m256i);
   __m128i lhs_evens =
@@ -580,8 +574,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i lhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256(
       (int32_t)1, lhs_shuffled0, __m128i);
   __m256i lhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(lhs_odds);
-  __m256i rhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
-      /* Prepare the right hand side */ rhs, shuffle_with);
+  __m256i rhs_shuffled =
+      libcrux_intrinsics_avx2_mm256_shuffle_epi8(rhs, shuffle_with);
   __m256i rhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
       (int32_t)216, rhs_shuffled, __m256i);
   __m128i rhs_evens =
@@ -590,8 +584,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
   __m128i rhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256(
       (int32_t)1, rhs_shuffled0, __m128i);
   __m256i rhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(rhs_odds);
-  __m256i left = libcrux_intrinsics_avx2_mm256_mullo_epi32(
-      /* Start operating with them */ lhs_evens0, rhs_evens0);
+  __m256i left =
+      libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_evens0, rhs_evens0);
   __m256i right =
       libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_odds0, rhs_odds0);
   __m256i right0 =
@@ -606,7 +600,7 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
       libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(
           products_left);
   __m256i rhs_adjacent_swapped = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
-      /* Compute the second term of the product */ rhs,
+      rhs,
       libcrux_intrinsics_avx2_mm256_set_epi8(
           (int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8,
           (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6,
@@ -621,10 +615,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply(
           products_right);
   __m256i products_right1 = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)16, products_right0, __m256i);
-  return libcrux_intrinsics_avx2_mm256_blend_epi16(
-      (int32_t)170,
-      /* Combine them into one vector */ products_left0, products_right1,
-      __m256i);
+  return libcrux_intrinsics_avx2_mm256_blend_epi16((int32_t)170, products_left0,
+                                                   products_right1, __m256i);
 }
 
 /**
@@ -642,60 +634,13 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09(
 KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1(
     __m256i vector, uint8_t ret[2U]) {
-  __m256i lsb_to_msb = libcrux_intrinsics_avx2_mm256_slli_epi16(
-      (int32_t)15,
-      /* Suppose |vector| is laid out as follows (superscript number indicates
-         the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀
-         0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least
-         significant bit in each lane, move it to the most significant position
-         to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵
-         d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵
-         n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */
-      vector, __m256i);
-  __m128i low_msbs =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* Get the first 8 16-bit
-                                                       elements ... */
-                                                    lsb_to_msb);
+  __m256i lsb_to_msb =
+      libcrux_intrinsics_avx2_mm256_slli_epi16((int32_t)15, vector, __m256i);
+  __m128i low_msbs = libcrux_intrinsics_avx2_mm256_castsi256_si128(lsb_to_msb);
   __m128i high_msbs = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i);
-  __m128i msbs =
-      libcrux_intrinsics_avx2_mm_packs_epi16(/* ... and then pack them into
-                                                8-bit values using signed
-                                                saturation. This function packs
-                                                all the |low_msbs|, and then the
-                                                high ones. low_msbs = a₀0¹⁵
-                                                b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵
-                                                g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵
-                                                j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵
-                                                o₀0¹⁵ p₀0¹⁵ We shifted by 15
-                                                above to take advantage of the
-                                                signed saturation performed by
-                                                mm_packs_epi16: - if the sign
-                                                bit of the 16-bit element being
-                                                packed is 1, the corresponding
-                                                8-bit element in |msbs| will be
-                                                0xFF. - if the sign bit of the
-                                                16-bit element being packed is
-                                                0, the corresponding 8-bit
-                                                element in |msbs| will be 0.
-                                                Thus, if, for example, a₀ = 1,
-                                                e₀ = 1, and p₀ = 1, and every
-                                                other bit is 0, after packing
-                                                into 8 bit value, |msbs| will
-                                                look like: 0xFF 0x00 0x00 0x00 |
-                                                0xFF 0x00 0x00 0x00 | 0x00 0x00
-                                                0x00 0x00 | 0x00 0x00 0x00 0xFF
-                                              */
-                                             low_msbs, high_msbs);
-  int32_t bits_packed =
-      libcrux_intrinsics_avx2_mm_movemask_epi8(/* Now that every element is
-                                                  either 0xFF or 0x00, we just
-                                                  extract the most significant
-                                                  bit from each element and
-                                                  collate them into two bytes.
-                                                */
-                                               msbs);
+      (int32_t)1, lsb_to_msb, __m128i);
+  __m128i msbs = libcrux_intrinsics_avx2_mm_packs_epi16(low_msbs, high_msbs);
+  int32_t bits_packed = libcrux_intrinsics_avx2_mm_movemask_epi8(msbs);
   uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)};
   memcpy(ret, result, (size_t)2U * sizeof(uint8_t));
 }
@@ -714,63 +659,18 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s(
     int16_t a, int16_t b) {
-  __m256i coefficients =
-      libcrux_intrinsics_avx2_mm256_set_epi16(/* We need to take each bit from
-                                                 the 2 bytes of input and put
-                                                 them into their own 16-bit
-                                                 lane. Ideally, we'd load the
-                                                 two bytes into the vector,
-                                                 duplicate them, and right-shift
-                                                 the 0th element by 0 bits, the
-                                                 first element by 1 bit, the
-                                                 second by 2 bits and so on
-                                                 before AND-ing with 0x1 to
-                                                 leave only the least
-                                                 signifinicant bit. But since
-                                                 |_mm256_srlv_epi16| does not
-                                                 exist, so we have to resort to
-                                                 a workaround. Rather than
-                                                 shifting each element by a
-                                                 different amount, we'll
-                                                 multiply each element by a
-                                                 value such that the bit we're
-                                                 interested in becomes the most
-                                                 significant bit. The
-                                                 coefficients are loaded as
-                                                 follows: */
-                                              b, b, b, b, b, b, b, b, a, a, a,
-                                              a, a, a, a, a);
-  __m256i coefficients_in_msb =
-      libcrux_intrinsics_avx2_mm256_mullo_epi16(/* And this vector, when
-                                                   multiplied with the previous
-                                                   one, ensures that the bit
-                                                   we'd like to keep in each
-                                                   lane becomes the most
-                                                   significant bit upon
-                                                   multiplication. */
-                                                coefficients,
-                                                libcrux_intrinsics_avx2_mm256_set_epi16(
-                                                    (int16_t)1 << 8U,
-                                                    (int16_t)1 << 9U,
-                                                    (int16_t)1 << 10U,
-                                                    (int16_t)1 << 11U,
-                                                    (int16_t)1 << 12U,
-                                                    (int16_t)1 << 13U,
-                                                    (int16_t)1 << 14U,
-                                                    (int16_t)-32768,
-                                                    (int16_t)1 << 8U,
-                                                    (int16_t)1 << 9U,
-                                                    (int16_t)1 << 10U,
-                                                    (int16_t)1 << 11U,
-                                                    (int16_t)1 << 12U,
-                                                    (int16_t)1 << 13U,
-                                                    (int16_t)1 << 14U,
-                                                    (int16_t)-32768));
-  return libcrux_intrinsics_avx2_mm256_srli_epi16(
-      (int32_t)15,
-      /* Now that they're all in the most significant bit position, shift them
-         down to the least significant bit. */
-      coefficients_in_msb, __m256i);
+  __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16(
+      b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a);
+  __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16(
+      coefficients, libcrux_intrinsics_avx2_mm256_set_epi16(
+                        (int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U,
+                        (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U,
+                        (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U,
+                        (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U,
+                        (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U,
+                        (int16_t)-32768));
+  return libcrux_intrinsics_avx2_mm256_srli_epi16((int32_t)15,
+                                                  coefficients_in_msb, __m256i);
 }
 
 KRML_ATTRIBUTE_TARGET("avx2")
@@ -785,23 +685,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s(
-      Eurydice_slice_index(
-          bytes,
-          /* We need to take each bit from the 2 bytes of input and put them
-             into their own 16-bit lane. Ideally, we'd load the two bytes into
-             the vector, duplicate them, and right-shift the 0th element by 0
-             bits, the first element by 1 bit, the second by 2 bits and so on
-             before AND-ing with 0x1 to leave only the least signifinicant bit.
-             But since |_mm256_srlv_epi16| does not exist, so we have to resort
-             to a workaround. Rather than shifting each element by a different
-             amount, we'll multiply each element by a value such that the bit
-             we're interested in becomes the most significant bit. The
-             coefficients are loaded as follows: And this vector, when
-             multiplied with the previous one, ensures that the bit we'd like to
-             keep in each lane becomes the most significant bit upon
-             multiplication. Now that they're all in the most significant bit
-             position, shift them down to the least significant bit. */
-          (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *));
 }
 
@@ -837,70 +721,23 @@ static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4(
     __m256i vector, uint8_t ret[8U]) {
   uint8_t serialized[16U] = {0U};
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
-          4U,
-          /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D |
-             0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be
-             laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA
-             0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */
-          vector);
-  __m256i adjacent_8_combined =
-      libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* Recall that
-                                                    |adjacent_2_combined| goes
-                                                    as follows: 0x00_00_00_BA
-                                                    0x00_00_00_DC |
-                                                    0x00_00_00_FE 0x00_00_00_HG
-                                                    | ... Out of this, we only
-                                                    need the first byte, the 4th
-                                                    byte, the 8th byte and so on
-                                                    from the bottom and the top
-                                                    128 bits. */
-                                                 adjacent_2_combined,
-                                                 libcrux_intrinsics_avx2_mm256_set_epi8(
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)12, (int8_t)8,
-                                                     (int8_t)4, (int8_t)0,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)12, (int8_t)8,
-                                                     (int8_t)4, (int8_t)0));
-  __m256i combined =
-      libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32(/* |adjacent_8_combined|
-                                                            looks like this: 0:
-                                                            0xHG_FE_DC_BA 1:
-                                                            0x00_00_00_00 | 2:
-                                                            0x00_00_00_00 3:
-                                                            0x00_00_00_00 | 4:
-                                                            0xPO_NM_LK_JI ....
-                                                            We put the element
-                                                            at 4 after the
-                                                            element at 0 ... */
-                                                         adjacent_8_combined,
-                                                         libcrux_intrinsics_avx2_mm256_set_epi32(
-                                                             (int32_t)0,
-                                                             (int32_t)0,
-                                                             (int32_t)0,
-                                                             (int32_t)0,
-                                                             (int32_t)0,
-                                                             (int32_t)0,
-                                                             (int32_t)4,
-                                                             (int32_t)0));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector);
+  __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
+      adjacent_2_combined,
+      libcrux_intrinsics_avx2_mm256_set_epi8(
+          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+          (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0,
+          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+          (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0));
+  __m256i combined = libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32(
+      adjacent_8_combined, libcrux_intrinsics_avx2_mm256_set_epi32(
+                               (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0,
+                               (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0));
   __m128i combined0 = libcrux_intrinsics_avx2_mm256_castsi256_si128(combined);
   libcrux_intrinsics_avx2_mm_storeu_bytes_si128(
-      Eurydice_array_to_slice(
-          (size_t)16U,
-          /* ... so that we can read them out in one go. */ serialized,
-          uint8_t),
-      combined0);
+      Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0);
   uint8_t ret0[8U];
   Result_15 dst;
   Eurydice_slice_to_array2(
@@ -926,33 +763,8 @@ static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
     int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5,
     int16_t b6, int16_t b7) {
-  __m256i coefficients =
-      libcrux_intrinsics_avx2_mm256_set_epi16(/* Every 4 bits from each byte of
-                                                 input should be put into its
-                                                 own 16-bit lane. Since
-                                                 |_mm256_srlv_epi16| does not
-                                                 exist, we have to resort to a
-                                                 workaround. Rather than
-                                                 shifting each element by a
-                                                 different amount, we'll
-                                                 multiply each element by a
-                                                 value such that the bits we're
-                                                 interested in become the most
-                                                 significant bits (of an 8-bit
-                                                 value). In this lane, the 4
-                                                 bits we need to put are already
-                                                 the most significant bits of
-                                                 |bytes[7]| (that is, b7). */
-                                              b7,
-                                              /* In this lane, the 4 bits we
-                                                 need to put are the least
-                                                 significant bits, so we need to
-                                                 shift the 4 least-significant
-                                                 bits of |b7| to the most
-                                                 significant bits (of an 8-bit
-                                                 value). */
-                                              b7, b6, b6, b5, b5, b4, b4, b3,
-                                              b3, b2, b2, b1, b1, b0, b0);
+  __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16(
+      b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, b0);
   __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16(
       coefficients, libcrux_intrinsics_avx2_mm256_set_epi16(
                         (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
@@ -962,14 +774,10 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s(
                         (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U,
                         (int16_t)1 << 4U));
   __m256i coefficients_in_lsb = libcrux_intrinsics_avx2_mm256_srli_epi16(
-      (int32_t)4,
-      /* Once the 4-bit coefficients are in the most significant positions (of
-         an 8-bit value), shift them all down by 4. */
-      coefficients_in_msb, __m256i);
+      (int32_t)4, coefficients_in_msb, __m256i);
   return libcrux_intrinsics_avx2_mm256_and_si256(
-      /* Zero the remaining bits. */ coefficients_in_lsb,
-      libcrux_intrinsics_avx2_mm256_set1_epi16(((int16_t)1 << 4U) -
-                                               (int16_t)1));
+      coefficients_in_lsb, libcrux_intrinsics_avx2_mm256_set1_epi16(
+                               ((int16_t)1 << 4U) - (int16_t)1));
 }
 
 KRML_ATTRIBUTE_TARGET("avx2")
@@ -986,23 +794,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) {
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s(
-      Eurydice_slice_index(
-          bytes,
-          /* Every 4 bits from each byte of input should be put into its own
-             16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to
-             resort to a workaround. Rather than shifting each element by a
-             different amount, we'll multiply each element by a value such that
-             the bits we're interested in become the most significant bits (of
-             an 8-bit value). In this lane, the 4 bits we need to put are
-             already the most significant bits of |bytes[7]| (that is, b7). In
-             this lane, the 4 bits we need to put are the least significant
-             bits, so we need to shift the 4 least-significant bits of |b7| to
-             the most significant bits (of an 8-bit value). These constants are
-             chosen to shift the bits of the values that we loaded into
-             |coefficients|. Once the 4-bit coefficients are in the most
-             significant positions (of an 8-bit value), shift them all down
-             by 4. Zero the remaining bits. */
-          (size_t)0U, uint8_t, uint8_t *),
+      Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *),
       Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *),
@@ -1026,106 +818,35 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5(
     __m256i vector, uint8_t ret[10U]) {
   uint8_t serialized[32U] = {0U};
-  __m256i adjacent_2_combined =
-      libcrux_intrinsics_avx2_mm256_madd_epi16(/* If |vector| is laid out as
-                                                  follows (superscript number
-                                                  indicates the corresponding
-                                                  bit is duplicated that many
-                                                  times): 0¹¹a₄a₃a₂a₁a₀
-                                                  0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀
-                                                  0¹¹d₄d₃d₂d₁d₀ | ↩
-                                                  0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀
-                                                  0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ |
-                                                  ↩ |adjacent_2_combined| will
-                                                  be laid out as a series of
-                                                  32-bit integers, as follows:
-                                                  0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
-                                                  0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩
-                                                  0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
-                                                  0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
-                                                  .... */
-                                               vector,
-                                               libcrux_intrinsics_avx2_mm256_set_epi16(
-                                                   (int16_t)1 << 5U, (int16_t)1,
-                                                   (int16_t)1 << 5U, (int16_t)1,
-                                                   (int16_t)1 << 5U, (int16_t)1,
-                                                   (int16_t)1 << 5U, (int16_t)1,
-                                                   (int16_t)1 << 5U, (int16_t)1,
-                                                   (int16_t)1 << 5U, (int16_t)1,
-                                                   (int16_t)1 << 5U, (int16_t)1,
-                                                   (int16_t)1 << 5U,
-                                                   (int16_t)1));
-  __m256i adjacent_4_combined =
-      libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Recall that
-                                                  |adjacent_2_combined| is laid
-                                                  out as follows:
-                                                  0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
-                                                  0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩
-                                                  0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
-                                                  0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
-                                                  .... This shift results in:
-                                                  b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²²
-                                                  0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩
-                                                  f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²²
-                                                  0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩
-                                                  .... */
-                                               adjacent_2_combined,
-                                               libcrux_intrinsics_avx2_mm256_set_epi32(
-                                                   (int32_t)0, (int32_t)22,
-                                                   (int32_t)0, (int32_t)22,
-                                                   (int32_t)0, (int32_t)22,
-                                                   (int32_t)0, (int32_t)22));
+  __m256i adjacent_2_combined = libcrux_intrinsics_avx2_mm256_madd_epi16(
+      vector, libcrux_intrinsics_avx2_mm256_set_epi16(
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1,
+                  (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1));
+  __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32(
+      adjacent_2_combined,
+      libcrux_intrinsics_avx2_mm256_set_epi32(
+          (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, (int32_t)0,
+          (int32_t)22, (int32_t)0, (int32_t)22));
   __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64(
-      (int32_t)22,
-      /* |adjacent_4_combined|, when viewed as 64-bit lanes, is:
-         0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩
-         0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift
-         down by 22 bits to remove the least significant 0 bits that aren't part
-         of the bits we need. */
-      adjacent_4_combined, __m256i);
+      (int32_t)22, adjacent_4_combined, __m256i);
   __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi32(
-      (int32_t)8,
-      /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks
-         like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³²
-         2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to
-         read out the bytes in one go, we need to shifts the bits in position 2
-         to position 1 in each 128-bit lane. */
-      adjacent_4_combined0, __m256i);
-  __m256i adjacent_8_combined0 =
-      libcrux_intrinsics_avx2_mm256_sllv_epi32(/* |adjacent_8_combined|, when
-                                                  viewed as a set of 32-bit
-                                                  values, now looks like:
-                                                  0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀
-                                                  0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀
-                                                  0³² 0³² | ↩ Once again, we
-                                                  line these bits up by shifting
-                                                  the up values at indices 0 and
-                                                  5 by 12, viewing the resulting
-                                                  register as a set of 64-bit
-                                                  values, and then shifting down
-                                                  the 64-bit values by 12 bits.
-                                                */
-                                               adjacent_8_combined,
-                                               libcrux_intrinsics_avx2_mm256_set_epi32(
-                                                   (int32_t)0, (int32_t)0,
-                                                   (int32_t)0, (int32_t)12,
-                                                   (int32_t)0, (int32_t)0,
-                                                   (int32_t)0, (int32_t)12));
+      (int32_t)8, adjacent_4_combined0, __m256i);
+  __m256i adjacent_8_combined0 = libcrux_intrinsics_avx2_mm256_sllv_epi32(
+      adjacent_8_combined,
+      libcrux_intrinsics_avx2_mm256_set_epi32(
+          (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, (int32_t)0,
+          (int32_t)0, (int32_t)0, (int32_t)12));
   __m256i adjacent_8_combined1 = libcrux_intrinsics_avx2_mm256_srli_epi64(
       (int32_t)12, adjacent_8_combined0, __m256i);
   __m128i lower_8 =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 40 bits
-                                                       starting at position 0 in
-                                                       the lower 128-bit lane,
-                                                       ... */
-                                                    adjacent_8_combined1);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined1);
   libcrux_intrinsics_avx2_mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t),
       lower_8);
   __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ... and the second 40 bits at position 0 in the upper 128-bit lane */
-      adjacent_8_combined1, __m128i);
+      (int32_t)1, adjacent_8_combined1, __m128i);
   libcrux_intrinsics_avx2_mm_storeu_bytes_si128(
       Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t),
       upper_8);
@@ -1231,87 +952,27 @@ static inline core_core_arch_x86___m128i_x2
 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
     __m256i vector) {
   __m256i adjacent_2_combined =
-      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(
-          10U,
-          /* If |vector| is laid out as follows (superscript number indicates
-             the corresponding bit is duplicated that many times):
-             0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
-             0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩
-             0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
-             0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ...
-             |adjacent_2_combined| will be laid out as a series of 32-bit
-             integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-             0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩
-             0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-             0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */
-          vector);
-  __m256i adjacent_4_combined =
-      libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Shifting up the values at the
-                                                  even indices by 12, we get:
-                                                  b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-                                                  0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
-                                                  | ↩
-                                                  f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-                                                  0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
-                                                  | ↩ ... */
-                                               adjacent_2_combined,
-                                               libcrux_intrinsics_avx2_mm256_set_epi32(
-                                                   (int32_t)0, (int32_t)12,
-                                                   (int32_t)0, (int32_t)12,
-                                                   (int32_t)0, (int32_t)12,
-                                                   (int32_t)0, (int32_t)12));
+      libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector);
+  __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32(
+      adjacent_2_combined,
+      libcrux_intrinsics_avx2_mm256_set_epi32(
+          (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, (int32_t)0,
+          (int32_t)12, (int32_t)0, (int32_t)12));
   __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64(
-      (int32_t)12,
-      /* Viewing this as a set of 64-bit integers we get:
-         0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-         | ↩
-         0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-         | ↩ ... Shifting down by 12 gives us:
-         0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-         | ↩
-         0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-         | ↩ ... */
-      adjacent_4_combined, __m256i);
-  __m256i adjacent_8_combined =
-      libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* |adjacent_4_combined|, when
-                                                    the bottom and top 128
-                                                    bit-lanes are grouped into
-                                                    bytes, looks like:
-                                                    0₇0₆0₅B₄B₃B₂B₁B₀ | ↩
-                                                    0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩
-                                                    In each 128-bit lane, we
-                                                    want to put bytes 8, 9, 10,
-                                                    11, 12 after bytes 0, 1, 2,
-                                                    3 to allow for sequential
-                                                    reading. */
-                                                 adjacent_4_combined0,
-                                                 libcrux_intrinsics_avx2_mm256_set_epi8(
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)12, (int8_t)11,
-                                                     (int8_t)10, (int8_t)9,
-                                                     (int8_t)8, (int8_t)4,
-                                                     (int8_t)3, (int8_t)2,
-                                                     (int8_t)1, (int8_t)0,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)-1, (int8_t)-1,
-                                                     (int8_t)12, (int8_t)11,
-                                                     (int8_t)10, (int8_t)9,
-                                                     (int8_t)8, (int8_t)4,
-                                                     (int8_t)3, (int8_t)2,
-                                                     (int8_t)1, (int8_t)0));
+      (int32_t)12, adjacent_4_combined, __m256i);
+  __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8(
+      adjacent_4_combined0,
+      libcrux_intrinsics_avx2_mm256_set_epi8(
+          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+          (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8,
+          (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, (int8_t)-1,
+          (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1,
+          (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4,
+          (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0));
   __m128i lower_8 =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 64 bits
-                                                       starting at position 0 in
-                                                       the lower 128-bit lane,
-                                                       ... */
-                                                    adjacent_8_combined);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined);
   __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* and 64 bits starting at position 0 in the upper 128-bit lane. */
-      adjacent_8_combined, __m128i);
+      (int32_t)1, adjacent_8_combined, __m128i);
   return (
       CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8});
 }
@@ -1320,167 +981,8 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10(
     __m256i vector, uint8_t ret[20U]) {
   core_core_arch_x86___m128i_x2 uu____0 =
-      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If
-                                                                            |vector|
-                                                                            is
-                                                                            laid
-                                                                            out
-                                                                            as
-                                                                            follows
-                                                                            (superscript
-                                                                            number
-                                                                            indicates
-                                                                            the
-                                                                            corresponding
-                                                                            bit
-                                                                            is
-                                                                            duplicated
-                                                                            that
-                                                                            many
-                                                                            times):
-                                                                            0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-                                                                            0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀
-                                                                            0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
-                                                                            0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀
-                                                                            | ↩
-                                                                            0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-                                                                            0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀
-                                                                            0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
-                                                                            0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀
-                                                                            | ↩
-                                                                            ...
-                                                                            |adjacent_2_combined|
-                                                                            will
-                                                                            be
-                                                                            laid
-                                                                            out
-                                                                            as a
-                                                                            series
-                                                                            of
-                                                                            32-bit
-                                                                            integers,
-                                                                            as
-                                                                            follows:
-                                                                            0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
-                                                                            | ↩
-                                                                            0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
-                                                                            | ↩
-                                                                            ....
-                                                                            Shifting
-                                                                            up
-                                                                            the
-                                                                            values
-                                                                            at
-                                                                            the
-                                                                            even
-                                                                            indices
-                                                                            by
-                                                                            12,
-                                                                            we
-                                                                            get:
-                                                                            b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀
-                                                                            | ↩
-                                                                            f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀
-                                                                            | ↩
-                                                                            ...
-                                                                            Viewing
-                                                                            this
-                                                                            as a
-                                                                            set
-                                                                            of
-                                                                            64-bit
-                                                                            integers
-                                                                            we
-                                                                            get:
-                                                                            0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹²
-                                                                            | ↩
-                                                                            0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹²
-                                                                            | ↩
-                                                                            ...
-                                                                            Shifting
-                                                                            down
-                                                                            by
-                                                                            12
-                                                                            gives
-                                                                            us:
-                                                                            0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀
-                                                                            | ↩
-                                                                            0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀
-                                                                            | ↩
-                                                                            ...
-                                                                            |adjacent_4_combined|,
-                                                                            when
-                                                                            the
-                                                                            bottom
-                                                                            and
-                                                                            top
-                                                                            128
-                                                                            bit-lanes
-                                                                            are
-                                                                            grouped
-                                                                            into
-                                                                            bytes,
-                                                                            looks
-                                                                            like:
-                                                                            0₇0₆0₅B₄B₃B₂B₁B₀
-                                                                            | ↩
-                                                                            0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈
-                                                                            | ↩
-                                                                            In
-                                                                            each
-                                                                            128-bit
-                                                                            lane,
-                                                                            we
-                                                                            want
-                                                                            to
-                                                                            put
-                                                                            bytes
-                                                                            8,
-                                                                            9,
-                                                                            10,
-                                                                            11,
-                                                                            12
-                                                                            after
-                                                                            bytes
-                                                                            0,
-                                                                            1,
-                                                                            2, 3
-                                                                            to
-                                                                            allow
-                                                                            for
-                                                                            sequential
-                                                                            reading.
-                                                                            We
-                                                                            now
-                                                                            have
-                                                                            64
-                                                                            bits
-                                                                            starting
-                                                                            at
-                                                                            position
-                                                                            0 in
-                                                                            the
-                                                                            lower
-                                                                            128-bit
-                                                                            lane,
-                                                                            ...
-                                                                            and
-                                                                            64
-                                                                            bits
-                                                                            starting
-                                                                            at
-                                                                            position
-                                                                            0 in
-                                                                            the
-                                                                            upper
-                                                                            128-bit
-                                                                            lane.
-                                                                          */
-                                                                         vector);
+      libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(
+          vector);
   __m128i lower_8 = uu____0.fst;
   __m128i upper_8 = uu____0.snd;
   uint8_t serialized[32U] = {0U};
@@ -1536,20 +1038,16 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
                         (int16_t)1 << 6U));
   __m256i coefficients1 = libcrux_intrinsics_avx2_mm256_srli_epi16(
       (int32_t)6, coefficients0, __m256i);
-  return libcrux_intrinsics_avx2_mm256_and_si256(/* Here I can prove this `and`
-                                                    is not useful */
-                                                 coefficients1,
-                                                 libcrux_intrinsics_avx2_mm256_set1_epi16(
-                                                     ((int16_t)1 << 10U) -
-                                                     (int16_t)1));
+  return libcrux_intrinsics_avx2_mm256_and_si256(
+      coefficients1, libcrux_intrinsics_avx2_mm256_set1_epi16(
+                         ((int16_t)1 << 10U) - (int16_t)1));
 }
 
 KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i
 libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) {
-  Eurydice_slice lower_coefficients = Eurydice_slice_subslice2(
-      /* Here I can prove this `and` is not useful */ bytes, (size_t)0U,
-      (size_t)16U, uint8_t);
+  Eurydice_slice lower_coefficients =
+      Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t);
   Eurydice_slice upper_coefficients =
       Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t);
   return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec(
@@ -1735,70 +1233,28 @@ libcrux_ml_kem_vector_avx2_sampling_rejection_sample(Eurydice_slice input,
   __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16(
       LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS);
   __m256i potential_coefficients =
-      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can
-                                                             be interpreted as a
-                                                             sequence of
-                                                             serialized 12-bit
-                                                             (i.e. uncompressed)
-                                                             coefficients. Not
-                                                             all coefficients
-                                                             may be less than
-                                                             FIELD_MODULUS
-                                                             though. */
-                                                          input);
+      libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input);
   __m256i compare_with_field_modulus =
-      libcrux_intrinsics_avx2_mm256_cmpgt_epi16(/* Suppose we view
-                                                   |potential_coefficients| as
-                                                   follows (grouping 64-bit
-                                                   elements): A B C D | E F G H
-                                                   | .... and A < 3329, D < 3329
-                                                   and H < 3329,
-                                                   |compare_with_field_modulus|
-                                                   will look like: 0xFF 0 0 0xFF
-                                                   | 0 0 0 0xFF | ... */
-                                                field_modulus,
+      libcrux_intrinsics_avx2_mm256_cmpgt_epi16(field_modulus,
                                                 potential_coefficients);
   uint8_t good[2U];
-  libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each
-                                                      lane is either 0 or 1, we
-                                                      only need one bit from
-                                                      each lane in the register
-                                                      to tell us what
-                                                      coefficients to keep and
-                                                      what to throw-away.
-                                                      Combine all the bits
-                                                      (there are 16) into two
-                                                      bytes. */
-                                                   compare_with_field_modulus,
+  libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus,
                                                    good);
   uint8_t lower_shuffles[16U];
   memcpy(lower_shuffles,
-         /* Each bit (and its corresponding position) represents an element we
-            want to sample. We'd like all such elements to be next to each other
-            starting at index 0, so that they can be read from the vector
-            easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level
-            shuffling indices needed to make this happen. For e.g. if good[0] =
-            0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit
-            lane to the first. To do this, we need the byte-level shuffle
-            indices to be 2 3 X X X X ... */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[0U]],
          (size_t)16U * sizeof(uint8_t));
-  __m128i lower_shuffles0 =
-      libcrux_intrinsics_avx2_mm_loadu_si128(Eurydice_array_to_slice(
-          (size_t)16U,
-          /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles,
-          uint8_t));
+  __m128i lower_shuffles0 = libcrux_intrinsics_avx2_mm_loadu_si128(
+      Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t));
   __m128i lower_coefficients =
       libcrux_intrinsics_avx2_mm256_castsi256_si128(potential_coefficients);
   __m128i lower_coefficients0 = libcrux_intrinsics_avx2_mm_shuffle_epi8(
       lower_coefficients, lower_shuffles0);
-  libcrux_intrinsics_avx2_mm_storeu_si128(
-      /* ... then write them out ... */ output, lower_coefficients0);
+  libcrux_intrinsics_avx2_mm_storeu_si128(output, lower_coefficients0);
   size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]);
   uint8_t upper_shuffles[16U];
   memcpy(upper_shuffles,
-         /* Do the same for |goood[1]| */
          libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[(
              size_t)good[1U]],
          (size_t)16U * sizeof(uint8_t));
@@ -1979,9 +1435,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)10);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -1991,15 +1445,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)10,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+      (int32_t)10, decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -2009,29 +1459,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)10,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+      (int32_t)10, decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                decompressed_low3,
-                                                decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      decompressed_low3, decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -2097,9 +1531,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)11);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -2109,15 +1541,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)11,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+      (int32_t)11, decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -2127,29 +1555,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)11,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+      (int32_t)11, decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                decompressed_low3,
-                                                decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      decompressed_low3, decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -2253,13 +1665,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer, size_t _initial_coefficient_bound) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -2354,11 +1760,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     self->coefficients[i0] =
         libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]);
@@ -2455,9 +1857,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)4);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -2467,15 +1867,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)4,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+      (int32_t)4, decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -2485,29 +1881,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)4,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+      (int32_t)4, decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                decompressed_low3,
-                                                decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      decompressed_low3, decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -2568,9 +1948,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4(
   __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32(
       (int32_t)1 << (uint32_t)(int32_t)5);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -2580,15 +1958,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4(
   __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_low0, two_pow_coefficient_bits);
   __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)5,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_low1, __m256i);
+      (int32_t)5, decompressed_low1, __m256i);
   __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_low2, __m256i);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- */ vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32(
@@ -2598,29 +1972,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4(
   __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32(
       decompressed_high0, two_pow_coefficient_bits);
   __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)5,
-      /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of
-         support for const generic expressions. */
-      decompressed_high1, __m256i);
+      (int32_t)5, decompressed_high1, __m256i);
   __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32(
       (int32_t)1, decompressed_high2, __m256i);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                decompressed_low3,
-                                                decompressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      decompressed_low3, decompressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -2730,14 +2088,9 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) {
   for (size_t i = (size_t)0U;
-       i <
-       Eurydice_slice_len(Eurydice_array_to_slice(
-                              (size_t)16U,
-                              /* The semicolon and parentheses at the end of
-                                 loop are a workaround for the following bug
-                                 https://github.com/hacspec/hax/issues/720 */
-                              self->coefficients, __m256i),
-                          __m256i);
+       i < Eurydice_slice_len(Eurydice_array_to_slice(
+                                  (size_t)16U, self->coefficients, __m256i),
+                              __m256i);
        i++) {
     size_t i0 = i;
     self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09(
@@ -2840,13 +2193,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_61(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -2878,10 +2225,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_ab(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_61(&zeta_i, re, (size_t)1U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_61(&zeta_i, re, (size_t)2U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_61(&zeta_i, re, (size_t)3U);
@@ -3076,16 +2420,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_2f(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *secret_key,
     uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(/* u :=
-                                                             Decompress_q(Decode_{d_u}(c),
-                                                             d_u) */
-                                                          ciphertext, u_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v =
       libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_ed(
-          Eurydice_array_to_subslice_from(
-              (size_t)1088U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
+                                          (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message =
       libcrux_ml_kem_matrix_compute_message_ab(&v, secret_key->secret_as_ntt,
                                                u_as_ntt);
@@ -3108,8 +2447,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_2f(
     Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(
-      /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U];
   memcpy(
@@ -3658,10 +2996,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_6c(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -3728,7 +3062,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_6c(
          i++) {
       size_t j = i;
       libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-      if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+      if (transpose) {
         A_transpose[j][i1] = sample;
       } else {
         A_transpose[i1][j] = sample;
@@ -3750,15 +3084,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_fa(
     Eurydice_slice public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63
         *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab(
       uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -4028,12 +3359,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       step;
-       i++) {
+  for (size_t i = (size_t)0U; i < step; i++) {
     size_t j = i;
     __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09(
         re->coefficients[j + step], (int16_t)-1600);
@@ -4054,10 +3380,7 @@ KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE void
 libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) {
-  libcrux_ml_kem_ntt_ntt_at_layer_7_61(/* Due to the small coefficient bound, we
-                                          can skip the first round of Montgomery
-                                          reductions. */
-                                       re);
+  libcrux_ml_kem_ntt_ntt_at_layer_7_61(re);
   size_t zeta_i = (size_t)1U;
   libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U,
                                             (size_t)11207U);
@@ -4268,11 +3591,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
@@ -4395,26 +3714,8 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_61(
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09(
             result.coefficients[i0], (int16_t)1441);
-    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(
-        self->coefficients
-            [/* FIXME: Eurydice crashes with: Warning 11: in top-level
-                declaration
-                libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
-                this expression is not Low*; the enclosing function cannot be
-                translated into C*: let mutable ret(Mark.Present,(Mark.AtMost
-                2), ): int16_t[16size_t] = $any in
-                libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
-                ((@9:
-                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
-                &(((@8:
-                libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
-                @0; @0 Warning 11 is fatal, exiting. On the following code:
-                ```rust result.coefficients[i] =
-                Vector::barrett_reduce(Vector::add( coefficient_normal_form,
-                &Vector::add(self.coefficients[i], &message.coefficients[i]),
-                )); ``` */
-             i0],
-        &message->coefficients[i0]);
+    __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0],
+                                                    &message->coefficients[i0]);
     __m256i tmp0 =
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp);
     result.coefficients[i0] =
@@ -4472,23 +3773,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- Take
-                                                       the bottom 128 bits, i.e.
-                                                       the first 8 16-bit
-                                                       coefficients */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
-                                                      = A
-                                                      coefficients_low[16:31] =
-                                                      B coefficients_low[32:63]
-                                                      = C and so on ... after
-                                                      this step:
-                                                      coefficients_low[0:31] = A
-                                                      coefficients_low[32:63] =
-                                                      B and so on ... */
-                                                   coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)10, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -4497,17 +3784,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+      (int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -4521,23 +3802,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                compressed_low3,
-                                                compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      compressed_low3, compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -4601,23 +3869,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- Take
-                                                       the bottom 128 bits, i.e.
-                                                       the first 8 16-bit
-                                                       coefficients */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
-                                                      = A
-                                                      coefficients_low[16:31] =
-                                                      B coefficients_low[32:63]
-                                                      = C and so on ... after
-                                                      this step:
-                                                      coefficients_low[0:31] = A
-                                                      coefficients_low[32:63] =
-                                                      B and so on ... */
-                                                   coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)11, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -4626,17 +3880,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+      (int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -4650,23 +3898,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                compressed_low3,
-                                                compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      compressed_low3, compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -4782,23 +4017,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- Take
-                                                       the bottom 128 bits, i.e.
-                                                       the first 8 16-bit
-                                                       coefficients */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
-                                                      = A
-                                                      coefficients_low[16:31] =
-                                                      B coefficients_low[32:63]
-                                                      = C and so on ... after
-                                                      this step:
-                                                      coefficients_low[0:31] = A
-                                                      coefficients_low[32:63] =
-                                                      B and so on ... */
-                                                   coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)4, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -4807,17 +4028,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+      (int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -4831,23 +4046,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                compressed_low3,
-                                                compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      compressed_low3, compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -4878,11 +4080,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     __m256i coefficient = libcrux_ml_kem_vector_avx2_compress_09_d1(
         libcrux_ml_kem_serialize_to_unsigned_field_modulus_61(
@@ -4914,23 +4112,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4(
   __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32(
       ((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1);
   __m128i coefficients_low =
-      libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8
-                                                       coefficients ---- Take
-                                                       the bottom 128 bits, i.e.
-                                                       the first 8 16-bit
-                                                       coefficients */
-                                                    vector);
+      libcrux_intrinsics_avx2_mm256_castsi256_si128(vector);
   __m256i coefficients_low0 =
-      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15]
-                                                      = A
-                                                      coefficients_low[16:31] =
-                                                      B coefficients_low[32:63]
-                                                      = C and so on ... after
-                                                      this step:
-                                                      coefficients_low[0:31] = A
-                                                      coefficients_low[32:63] =
-                                                      B and so on ... */
-                                                   coefficients_low);
+      libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low);
   __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32(
       (int32_t)5, coefficients_low0, __m256i);
   __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32(
@@ -4939,17 +4123,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4(
       libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0,
                                                             compression_factor);
   __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32(
-      (int32_t)3,
-      /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we
-         just need to shift right by 35 - 32 = 3 more. */
-      compressed_low1, __m256i);
+      (int32_t)3, compressed_low1, __m256i);
   __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_low2, coefficient_bits_mask);
   __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256(
-      (int32_t)1,
-      /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits,
-         i.e. the next 8 16-bit coefficients */
-      vector, __m128i);
+      (int32_t)1, vector, __m128i);
   __m256i coefficients_high0 =
       libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high);
   __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32(
@@ -4963,23 +4141,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4(
       (int32_t)3, compressed_high1, __m256i);
   __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256(
       compressed_high2, coefficient_bits_mask);
-  __m256i compressed =
-      libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping
-                                                   each set of 64-bits, this
-                                                   function results in: 0: low
-                                                   low low low | 1: high high
-                                                   high high | 2: low low low
-                                                   low | 3: high high high high
-                                                   where each |low| and |high|
-                                                   is a 16-bit element */
-                                                compressed_low3,
-                                                compressed_high3);
-  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64(
-      (int32_t)216,
-      /* To be in the right order, we need to move the |low|s above in position
-         2 to position 1 and the |high|s in position 1 to position 2, and leave
-         the rest unchanged. */
-      compressed, __m256i);
+  __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32(
+      compressed_low3, compressed_high3);
+  return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216,
+                                                         compressed, __m256i);
 }
 
 /**
@@ -5010,11 +4175,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     __m256i coefficients = libcrux_ml_kem_vector_avx2_compress_09_f4(
         libcrux_ml_kem_vector_traits_to_unsigned_representative_61(
@@ -5106,10 +4267,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key,
     uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -5122,7 +4280,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_230 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_b4(
       copy_of_prf_input, domain_separator0);
@@ -5131,7 +4288,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   libcrux_ml_kem_hash_functions_avx2_PRF_a9_410(
       Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output);
@@ -5139,12 +4296,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
       libcrux_ml_kem_sampling_sample_from_binomial_distribution_89(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U];
-  libcrux_ml_kem_matrix_compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */
-                                            public_key->A, r_as_ntt, error_1,
+  libcrux_ml_kem_matrix_compute_vector_u_ab(public_key->A, r_as_ntt, error_1,
                                             u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element =
       libcrux_ml_kem_serialize_deserialize_then_decompress_message_61(
@@ -5154,14 +4309,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74(
           public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   libcrux_ml_kem_ind_cpa_compress_then_serialize_u_8c(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v;
   libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_ed(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -5654,18 +4807,11 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_61(
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
     __m256i coefficient_normal_form =
         libcrux_ml_kem_vector_traits_to_standard_domain_61(
-            self->coefficients[/* The coefficients are of the form aR^{-1} mod
-                                  q, which means calling to_montgomery_domain()
-                                  on them should return a mod q. */
-                               j]);
+            self->coefficients[j]);
     self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(
         libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form,
                                           &error->coefficients[j]));
@@ -5696,8 +4842,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_ab(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 =
         libcrux_ml_kem_polynomial_ZERO_ef_61();
     t_as_ntt[i0] = uu____0;
@@ -5778,9 +4922,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
-                                                  := G(d || K) for ML-KEM */
-                                               key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -5813,8 +4955,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   libcrux_ml_kem_matrix_compute_As_plus_e_ab(
-      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
-      private_key->secret_as_ntt, error_as_ntt);
+      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
+      error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -5950,18 +5092,12 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_8c(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key) {
   uint8_t public_key_serialized[1184U];
-  libcrux_ml_kem_ind_cpa_serialize_public_key_ed(/* pk := (Encode_12(tˆ
-                                                    mod^{+}q) || ρ) */
-                                                 public_key->t_as_ntt,
-                                                 Eurydice_array_to_slice(
-                                                     (size_t)32U,
-                                                     public_key->seed_for_A,
-                                                     uint8_t),
-                                                 public_key_serialized);
+  libcrux_ml_kem_ind_cpa_serialize_public_key_ed(
+      public_key->t_as_ntt,
+      Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
+      public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(/* sk := Encode_12(sˆ mod^{+}q)
-                                                  */
-                                                 private_key->secret_as_ntt,
+  libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(private_key->secret_as_ntt,
                                                  secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
@@ -6633,9 +5769,7 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_33_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
-                                                  := G(d || K) for ML-KEM */
-                                               key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_33_be(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -6668,8 +5802,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6));
   libcrux_ml_kem_matrix_compute_As_plus_e_ab(
-      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
-      private_key->secret_as_ntt, error_as_ntt);
+      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
+      error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6834,10 +5968,7 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
   libcrux_ml_kem_hash_functions_avx2_H_a9_e0(
-      Eurydice_array_to_subslice2(/* Eurydice can't access values directly on
-                                     the types. We need to go to the `value`
-                                     directly. */
-                                  private_key->value, (size_t)384U * (size_t)3U,
+      Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U,
                                   (size_t)768U * (size_t)3U + (size_t)32U,
                                   uint8_t),
       t);
@@ -7797,10 +6928,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_b3(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -7868,7 +6995,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_b3(
          i++) {
       size_t j = i;
       libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j];
-      if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+      if (transpose) {
         A_transpose[j][i1] = sample;
       } else {
         A_transpose[i1][j] = sample;
@@ -7891,15 +7018,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_bf(
     Eurydice_slice public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63
         *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab(
       uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -7934,10 +7058,7 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_e2(
   Eurydice_slice ind_cpa_public_key_hash = uu____0.thd;
   Eurydice_slice implicit_rejection_value = uu____0.f3;
   Eurydice_slice uu____1 = Eurydice_array_to_slice(
-      (size_t)3U,
-      /* XXX: We need to copy_from_slice here because karamel can't handle the
-         assignment cf. https://github.com/FStarLang/karamel/pull/491 */
-      key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
+      (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
       libcrux_ml_kem_polynomial_PolynomialRingElement_f6);
   libcrux_ml_kem_polynomial_PolynomialRingElement_f6 ret[3U];
   libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(ind_cpa_secret_key, ret);
diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h
index 7a9446452..8f0de6a3e 100644
--- a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h
+++ b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_mlkem768_portable_H
@@ -1235,28 +1235,11 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
 static inline uint8_t
 libcrux_ml_kem_vector_portable_compress_compress_message_coefficient(
     uint16_t fe) {
-  int16_t shifted =
-      (int16_t)1664 -
-      (int16_t) /* The approach used here is inspired by:
-                   https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150
-                   If 833 <= fe <= 2496, then -832 <= shifted <= 831 */
-      fe;
-  int16_t mask =
-      /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) =
-         -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive
-         <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so
-         if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */
-      shifted
-
-      >> 15U;
+  int16_t shifted = (int16_t)1664 - (int16_t)fe;
+  int16_t mask = shifted >> 15U;
   int16_t shifted_to_positive = mask ^ shifted;
   int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832;
-  int16_t r0 =
-      /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the
-         most significant bit of shifted_positive_in_range will be 1. */
-      shifted_positive_in_range
-
-      >> 15U;
+  int16_t r0 = shifted_positive_in_range >> 15U;
   int16_t r1 = r0 & (int16_t)1;
   return (uint8_t)r1;
 }
@@ -1293,16 +1276,7 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits(
 static inline int16_t
 libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient(
     uint8_t coefficient_bits, uint16_t fe) {
-  uint64_t compressed =
-      (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits
-                    == 5 || coefficient_bits == 10 || coefficient_bits == 11 );
-                    hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to
-                    be constant time due to:
-                    https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ
-                  */
-      fe
-
-      << (uint32_t)coefficient_bits;
+  uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits;
   compressed = compressed + 1664ULL;
   compressed = compressed * 10321340ULL;
   compressed = compressed >> 35U;
@@ -2904,13 +2878,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer, size_t _initial_coefficient_bound) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] + (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -3006,11 +2974,7 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
@@ -3285,11 +3249,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_1b(
   for (size_t i = (size_t)0U;
        i < Eurydice_slice_len(
                Eurydice_array_to_slice(
-                   (size_t)16U,
-                   /* The semicolon and parentheses at the end of loop are a
-                      workaround for the following bug
-                      https://github.com/hacspec/hax/issues/720 */
-                   self->coefficients,
+                   (size_t)16U, self->coefficients,
                    libcrux_ml_kem_vector_portable_vector_type_PortableVector),
                libcrux_ml_kem_vector_portable_vector_type_PortableVector);
        i++) {
@@ -3396,13 +3356,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_8c(
     size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re,
     size_t layer) {
   size_t step = (size_t)1U << (uint32_t)layer;
-  for (size_t i0 = (size_t)0U;
-       i0 < (size_t)128U >>
-       (uint32_t) /* The semicolon and parentheses at the end of loop are a
-                     workaround for the following bug
-                     https://github.com/hacspec/hax/issues/720 */
-       layer;
-       i0++) {
+  for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) {
     size_t round = i0;
     zeta_i[0U] = zeta_i[0U] - (size_t)1U;
     size_t offset = round * step * (size_t)2U;
@@ -3433,10 +3387,7 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_1b(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t zeta_i =
-      /* We only ever call this function after matrix/vector multiplication */
-      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT
-
-      / (size_t)2U;
+      LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U;
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_8c(&zeta_i, re, (size_t)1U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_8c(&zeta_i, re, (size_t)2U);
   libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_8c(&zeta_i, re, (size_t)3U);
@@ -3640,16 +3591,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_42(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *secret_key,
     uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(/* u :=
-                                                             Decompress_q(Decode_{d_u}(c),
-                                                             d_u) */
-                                                          ciphertext, u_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(ciphertext, u_as_ntt);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d v =
       libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_89(
-          Eurydice_array_to_subslice_from(
-              (size_t)1088U,
-              /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */
-              ciphertext, (size_t)960U, uint8_t, size_t));
+          Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
+                                          (size_t)960U, uint8_t, size_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message =
       libcrux_ml_kem_matrix_compute_message_1b(&v, secret_key->secret_as_ntt,
                                                u_as_ntt);
@@ -3671,8 +3617,7 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_42(
     Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) {
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U];
-  libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(
-      /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt);
+  libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(secret_key, secret_as_ntt);
   /* Passing arrays by value in Rust generates a copy in C */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U];
   memcpy(
@@ -4207,10 +4152,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_2b(
   memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U]));
   bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_89(
       copy_of_randomness0, sampled_coefficients, out);
-  /* Requiring more than 5 blocks to sample a ring element should be very
-   * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid
-   * failing here, we squeeze more blocks out of the state until we have enough.
-   */
   while (true) {
     if (done) {
       break;
@@ -4277,7 +4218,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_2b(
          i++) {
       size_t j = i;
       libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j];
-      if (/* A[i][j] = A_transpose[j][i] */ transpose) {
+      if (transpose) {
         A_transpose[j][i1] = sample;
       } else {
         A_transpose[i1][j] = sample;
@@ -4299,15 +4240,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_3f(
     Eurydice_slice public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0
         *unpacked_public_key) {
-  Eurydice_slice uu____0 = Eurydice_slice_subslice_to(
-      /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t);
+  Eurydice_slice uu____0 =
+      Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_1b(
       uu____0, unpacked_public_key->t_as_ntt);
   Eurydice_slice seed =
-      Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1
-                                      do for j from 0 to k − 1 do AˆT[i][j] :=
-                                      Parse(XOF(ρ, i, j)) end for end for */
-                                   public_key, (size_t)1152U, uint8_t, size_t);
+      Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] =
       unpacked_public_key->A;
   uint8_t ret[34U];
@@ -4555,12 +4493,7 @@ with const generics
 static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
   size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U;
-  for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       step;
-       i++) {
+  for (size_t i = (size_t)0U; i < step; i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector t =
         libcrux_ml_kem_vector_portable_multiply_by_constant_0d(
@@ -4582,10 +4515,7 @@ with const generics
 static KRML_MUSTINLINE void
 libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) {
-  libcrux_ml_kem_ntt_ntt_at_layer_7_8c(/* Due to the small coefficient bound, we
-                                          can skip the first round of Montgomery
-                                          reductions. */
-                                       re);
+  libcrux_ml_kem_ntt_ntt_at_layer_7_8c(re);
   size_t zeta_i = (size_t)1U;
   libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U,
                                             (size_t)11207U);
@@ -4792,11 +4722,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
         coefficient_normal_form =
@@ -4928,28 +4854,8 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_8c(
             libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d(
                 result.coefficients[i0], (int16_t)1441);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp =
-        libcrux_ml_kem_vector_portable_add_0d(
-            self->coefficients
-                [/* FIXME: Eurydice crashes with: Warning 11: in
-                    top-level declaration
-                    libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement<Vector>[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector:
-                    this expression is not Low*; the enclosing
-                    function cannot be translated into C*: let
-                    mutable ret(Mark.Present,(Mark.AtMost 2), ):
-                    int16_t[16size_t] = $any in
-                    libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add
-                    ((@9:
-                    libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4]
-                    &(((@8:
-                    libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4])
-                    @0; @0 Warning 11 is fatal, exiting. On the
-                    following code: ```rust result.coefficients[i]
-                    = Vector::barrett_reduce(Vector::add(
-                    coefficient_normal_form,
-                    &Vector::add(self.coefficients[i],
-                    &message.coefficients[i]), )); ``` */
-                 i0],
-            &message->coefficients[i0]);
+        libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0],
+                                              &message->coefficients[i0]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 =
         libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
@@ -5206,11 +5112,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient =
         libcrux_ml_kem_vector_portable_compress_0d_d1(
@@ -5270,11 +5172,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d re,
     Eurydice_slice serialized) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t i0 = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients =
         libcrux_ml_kem_vector_portable_compress_0d_f4(
@@ -5366,10 +5264,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key,
     uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) {
   uint8_t prf_input[33U];
-  libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] :=
-                                               CBD{η1}(PRF(r, N)) N := N + 1 end
-                                               for rˆ := NTT(r) */
-                                            randomness, prf_input);
+  libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input0[33U];
   memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t));
@@ -5382,7 +5277,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
   uint8_t domain_separator0 = uu____1.snd;
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_prf_input[33U];
-  /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */
   memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t));
   tuple_23 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_3b(
       copy_of_prf_input, domain_separator0);
@@ -5391,7 +5285,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
       error_1, uu____3.fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   uint8_t domain_separator = uu____3.snd;
-  prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator;
+  prf_input[32U] = domain_separator;
   uint8_t prf_output[128U];
   libcrux_ml_kem_hash_functions_portable_PRF_f1_410(
       Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output);
@@ -5399,12 +5293,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
       libcrux_ml_kem_sampling_sample_from_binomial_distribution_a0(
           Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U];
-  libcrux_ml_kem_matrix_compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */
-                                            public_key->A, r_as_ntt, error_1,
+  libcrux_ml_kem_matrix_compute_vector_u_1b(public_key->A, r_as_ntt, error_1,
                                             u);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_message[32U];
-  /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */
   memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t));
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element =
       libcrux_ml_kem_serialize_deserialize_then_decompress_message_8c(
@@ -5414,14 +5306,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a(
           public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element);
   uint8_t ciphertext[1088U] = {0U};
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U];
-  /* c_1 := Encode_{du}(Compress_q(u,d_u)) */
   memcpy(
       uu____5, u,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   libcrux_ml_kem_ind_cpa_compress_then_serialize_u_43(
       uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U,
                                            uint8_t));
-  /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v;
   libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_6c(
       uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext,
@@ -5847,20 +5737,12 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_8c(
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self,
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) {
   for (size_t i = (size_t)0U;
-       i <
-       /* The semicolon and parentheses at the end of loop are a workaround for
-          the following bug https://github.com/hacspec/hax/issues/720 */
-       LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT;
-       i++) {
+       i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) {
     size_t j = i;
     libcrux_ml_kem_vector_portable_vector_type_PortableVector
         coefficient_normal_form =
             libcrux_ml_kem_vector_traits_to_standard_domain_8c(
-                self->coefficients[/* The coefficients are of the form aR^{-1}
-                                      mod q, which means calling
-                                      to_montgomery_domain() on them should
-                                      return a mod q. */
-                                   j]);
+                self->coefficients[j]);
     libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 =
         libcrux_ml_kem_vector_portable_barrett_reduce_0d(
             libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form,
@@ -5892,8 +5774,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_1b(
        i++) {
     size_t i0 = i;
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0];
-    /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0.
-     */
     libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 =
         libcrux_ml_kem_polynomial_ZERO_ef_8c();
     t_as_ntt[i0] = uu____0;
@@ -5973,9 +5853,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
-                                                  := G(d || K) for ML-KEM */
-                                               key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -6008,8 +5886,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   libcrux_ml_kem_matrix_compute_As_plus_e_1b(
-      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
-      private_key->secret_as_ntt, error_as_ntt);
+      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
+      error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6141,18 +6019,12 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_43(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key) {
   uint8_t public_key_serialized[1184U];
-  libcrux_ml_kem_ind_cpa_serialize_public_key_6c(/* pk := (Encode_12(tˆ
-                                                    mod^{+}q) || ρ) */
-                                                 public_key->t_as_ntt,
-                                                 Eurydice_array_to_slice(
-                                                     (size_t)32U,
-                                                     public_key->seed_for_A,
-                                                     uint8_t),
-                                                 public_key_serialized);
+  libcrux_ml_kem_ind_cpa_serialize_public_key_6c(
+      public_key->t_as_ntt,
+      Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t),
+      public_key_serialized);
   uint8_t secret_key_serialized[1152U];
-  libcrux_ml_kem_ind_cpa_serialize_secret_key_89(/* sk := Encode_12(sˆ mod^{+}q)
-                                                  */
-                                                 private_key->secret_as_ntt,
+  libcrux_ml_kem_ind_cpa_serialize_secret_key_89(private_key->secret_as_ntt,
                                                  secret_key_serialized);
   /* Passing arrays by value in Rust generates a copy in C */
   uint8_t copy_of_secret_key_serialized[1152U];
@@ -6733,9 +6605,7 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0(
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key,
     libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) {
   uint8_t hashed[64U];
-  libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ)
-                                                  := G(d || K) for ML-KEM */
-                                               key_generation_seed, hashed);
+  libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(key_generation_seed, hashed);
   Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at(
       Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U,
       uint8_t, Eurydice_slice_uint8_t_x2);
@@ -6768,8 +6638,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0(
           .fst,
       (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d));
   libcrux_ml_kem_matrix_compute_As_plus_e_1b(
-      /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A,
-      private_key->secret_as_ntt, error_as_ntt);
+      public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt,
+      error_as_ntt);
   uint8_t uu____5[32U];
   Result_fb dst;
   Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]);
@@ -6907,10 +6777,7 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6(
     libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) {
   uint8_t t[32U];
   libcrux_ml_kem_hash_functions_portable_H_f1_e0(
-      Eurydice_array_to_subslice2(/* Eurydice can't access values directly on
-                                     the types. We need to go to the `value`
-                                     directly. */
-                                  private_key->value, (size_t)384U * (size_t)3U,
+      Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U,
                                   (size_t)768U * (size_t)3U + (size_t)32U,
                                   uint8_t),
       t);
@@ -7728,10 +7595,7 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_df(
   Eurydice_slice ind_cpa_public_key_hash = uu____0.thd;
   Eurydice_slice implicit_rejection_value = uu____0.f3;
   Eurydice_slice uu____1 = Eurydice_array_to_slice(
-      (size_t)3U,
-      /* XXX: We need to copy_from_slice here because karamel can't handle the
-         assignment cf. https://github.com/FStarLang/karamel/pull/491 */
-      key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
+      (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt,
       libcrux_ml_kem_polynomial_PolynomialRingElement_1d);
   libcrux_ml_kem_polynomial_PolynomialRingElement_1d ret[3U];
   libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(ind_cpa_secret_key, ret);
diff --git a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h
index 5955882fa..7a519bf7c 100644
--- a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h
+++ b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_sha3_avx2_H
@@ -104,9 +104,7 @@ libcrux_sha3_simd_avx2_and_not_xor_ef(__m256i a, __m256i b, __m256i c) {
 KRML_ATTRIBUTE_TARGET("avx2")
 static KRML_MUSTINLINE __m256i libcrux_sha3_simd_avx2__veorq_n_u64(__m256i a,
                                                                    uint64_t c) {
-  __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x(
-      (int64_t) /* Casting here is required, doesn't change the value. */
-      c);
+  __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x((int64_t)c);
   return libcrux_intrinsics_avx2_mm256_xor_si256(a, c0);
 }
 
@@ -1701,7 +1699,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_5b(
         __m256i);
     __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256(
         (int32_t)32,
-        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
+        s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
         s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
@@ -2036,15 +2034,7 @@ static KRML_MUSTINLINE void libcrux_sha3_avx2_x4_shake256(
     Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2,
     Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1,
     Eurydice_slice out2, Eurydice_slice out3) {
-  Eurydice_slice buf0[4U] = {
-      /* XXX: These functions could alternatively implement the same with the
-         portable implementation #[cfg(feature = "simd128")] { keccakx2::<136,
-         0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136,
-         0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136,
-         0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]);
-         keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136,
-         0x1fu8>([input3], [out3]); } */
-      input0, input1, input2, input3};
+  Eurydice_slice buf0[4U] = {input0, input1, input2, input3};
   Eurydice_slice buf[4U] = {out0, out1, out2, out3};
   libcrux_sha3_generic_keccak_keccak_fb(buf0, buf);
 }
@@ -2284,7 +2274,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_3a(
         __m256i);
     __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256(
         (int32_t)32,
-        s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U]
+        s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)1U) % (size_t)5U],
         s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U]
          [((size_t)4U * i0 + (size_t)3U) % (size_t)5U],
diff --git a/libcrux-ml-kem/cg/libcrux_sha3_portable.h b/libcrux-ml-kem/cg/libcrux_sha3_portable.h
index 211cf1919..a606f5f71 100644
--- a/libcrux-ml-kem/cg/libcrux_sha3_portable.h
+++ b/libcrux-ml-kem/cg/libcrux_sha3_portable.h
@@ -4,11 +4,11 @@
  * SPDX-License-Identifier: MIT or Apache-2.0
  *
  * This code was generated with the following revisions:
- * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f
- * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c
- * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198
- * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc
- * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1
+ * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9
+ * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20
+ * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd
+ * F*: 5643e656b989aca7629723653a2570c7df6252b9
+ * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36
  */
 
 #ifndef __libcrux_sha3_portable_H
@@ -1654,7 +1654,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out);
 }
@@ -2013,7 +2012,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out);
 }
@@ -2142,7 +2140,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out);
 }
@@ -2749,7 +2746,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out);
 }
@@ -3108,7 +3104,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out);
 }
@@ -3404,7 +3399,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6(
     Eurydice_slice data[1U], Eurydice_slice out[1U]) {
   /* Passing arrays by value in Rust generates a copy in C */
   Eurydice_slice copy_of_data[1U];
-  /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */
   memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice));
   libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out);
 }
@@ -3502,7 +3496,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0,
                                                           Eurydice_slice input1,
                                                           Eurydice_slice out0,
                                                           Eurydice_slice out1) {
-  /* TODO: make argument ordering consistent */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3517,9 +3510,6 @@ typedef struct libcrux_sha3_neon_x2_incremental_KeccakState_s {
 */
 static KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState
 libcrux_sha3_neon_x2_incremental_init(void) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let s0 = KeccakState::new(); let s1 =
-   * KeccakState::new(); [s0, s1] } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3532,10 +3522,6 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
-   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3549,10 +3535,6 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_squeeze_first_three_blocks(&mut s0, out0);
-   * shake128_squeeze_first_three_blocks(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3566,10 +3548,6 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0,
     Eurydice_slice out1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_squeeze_next_block(&mut s0, out0);
-   * shake128_squeeze_next_block(&mut s1, out1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3594,10 +3572,6 @@ static KRML_MUSTINLINE void
 libcrux_sha3_neon_x2_incremental_shake256_absorb_final(
     libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0,
     Eurydice_slice data1) {
-  /* XXX: These functions could alternatively implement the same with the
-   * portable implementation { let [mut s0, mut s1] = s;
-   * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1,
-   * data1); } */
   KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__,
                     "panic!");
   KRML_HOST_EXIT(255U);
@@ -3759,13 +3733,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (
-        /* There's something buffered internally to consume. */ self->buf_len +
-            input_len >=
-        (size_t)136U) {
-      consumed = (size_t)136U - /* We have enough data when combining the
-                                   internal buffer and the input. */
-                 self->buf_len;
+    if (self->buf_len + input_len >= (size_t)136U) {
+      consumed = (size_t)136U - self->buf_len;
       for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
         size_t i0 = i;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -3871,9 +3840,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs);
-  if (
-      /* ... buffer the rest if there's not enough input (left). */
-      input_remainder_len > (size_t)0U) {
+  if (input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
       size_t i0 = i;
@@ -4220,13 +4187,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60(
   size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
   size_t consumed = (size_t)0U;
   if (self->buf_len > (size_t)0U) {
-    if (
-        /* There's something buffered internally to consume. */ self->buf_len +
-            input_len >=
-        (size_t)168U) {
-      consumed = (size_t)168U - /* We have enough data when combining the
-                                   internal buffer and the input. */
-                 self->buf_len;
+    if (self->buf_len + input_len >= (size_t)168U) {
+      consumed = (size_t)168U - self->buf_len;
       for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
         size_t i0 = i;
         Eurydice_slice uu____0 = Eurydice_array_to_subslice_from(
@@ -4332,9 +4294,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60(
   memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice));
   size_t input_remainder_len =
       libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs);
-  if (
-      /* ... buffer the rest if there's not enough input (left). */
-      input_remainder_len > (size_t)0U) {
+  if (input_remainder_len > (size_t)0U) {
     size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t);
     for (size_t i = (size_t)0U; i < (size_t)1U; i++) {
       size_t i0 = i;
@@ -4724,13 +4684,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   size_t blocks = out_len / (size_t)136U;
   size_t last = out_len - out_len % (size_t)136U;
   size_t mid;
-  if ((size_t)136U >=
-      /* Squeeze out one to start with. XXX: Eurydice does not extract
-         `core::cmp::min`, so we do this instead. (cf.
-         https://github.com/AeneasVerif/eurydice/issues/49) */
-      out_len
-
-  ) {
+  if ((size_t)136U >= out_len) {
     mid = out_len;
   } else {
     mid = (size_t)136U;
@@ -4744,11 +4698,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
   libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){
-              .start = (size_t)1U,
-              .end = /* If we got asked for more than one block, squeeze out
-                        more. */
-              blocks}),
+          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
+                                             .end = blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -4757,11 +4708,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
-                                                            always have full
-                                                            blocks to write out.
-                                                          */
-                                                         out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
                                                          (size_t)136U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));
@@ -4856,13 +4803,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   size_t blocks = out_len / (size_t)168U;
   size_t last = out_len - out_len % (size_t)168U;
   size_t mid;
-  if ((size_t)168U >=
-      /* Squeeze out one to start with. XXX: Eurydice does not extract
-         `core::cmp::min`, so we do this instead. (cf.
-         https://github.com/AeneasVerif/eurydice/issues/49) */
-      out_len
-
-  ) {
+  if ((size_t)168U >= out_len) {
     mid = out_len;
   } else {
     mid = (size_t)168U;
@@ -4876,11 +4817,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
   libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00);
   core_ops_range_Range_08 iter =
       core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter(
-          (CLITERAL(core_ops_range_Range_08){
-              .start = (size_t)1U,
-              .end = /* If we got asked for more than one block, squeeze out
-                        more. */
-              blocks}),
+          (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U,
+                                             .end = blocks}),
           core_ops_range_Range_08, core_ops_range_Range_08);
   while (true) {
     if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next(
@@ -4889,11 +4827,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60(
       break;
     } else {
       Eurydice_slice_uint8_t_1size_t__x2 uu____1 =
-          libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we
-                                                            always have full
-                                                            blocks to write out.
-                                                          */
-                                                         out_rest,
+          libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest,
                                                          (size_t)168U);
       Eurydice_slice out0[1U];
       memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));

From 294c5806abf3c530d0425cba7382c05203f2ffe2 Mon Sep 17 00:00:00 2001
From: karthikbhargavan <karthik.bhargavan@gmail.com>
Date: Tue, 3 Dec 2024 22:16:16 +0000
Subject: [PATCH 6/7] fstar

---
 Cargo.lock                                    | 12 ++--
 fstar-helpers/fstar-bitvec/BitVecEq.fsti      |  4 +-
 .../extraction/Libcrux_ml_kem.Ind_cca.fst     |  2 +-
 .../extraction/Libcrux_ml_kem.Mlkem1024.fsti  | 59 +++++++++++++------
 .../extraction/Libcrux_ml_kem.Mlkem512.fsti   | 59 +++++++++++++------
 .../extraction/Libcrux_ml_kem.Mlkem768.fsti   | 59 +++++++++++++------
 ...crux_ml_kem.Vector.Portable.Arithmetic.fst |  2 +-
 .../proofs/fstar/spec/Spec.Utils.fst          |  5 +-
 8 files changed, 140 insertions(+), 62 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 363acf1a7..94f450b74 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -697,8 +697,8 @@ dependencies = [
 
 [[package]]
 name = "hax-lib"
-version = "0.1.0-alpha.1"
-source = "git+https://github.com/hacspec/hax/#1c5e17c9ceee5adede0f4ea7f68bb3d8337f33a0"
+version = "0.1.0-rc.1"
+source = "git+https://github.com/hacspec/hax/#de59826b832befc82905286d052c8a961c31f3cd"
 dependencies = [
  "hax-lib-macros",
  "num-bigint",
@@ -707,8 +707,8 @@ dependencies = [
 
 [[package]]
 name = "hax-lib-macros"
-version = "0.1.0-alpha.1"
-source = "git+https://github.com/hacspec/hax/#1c5e17c9ceee5adede0f4ea7f68bb3d8337f33a0"
+version = "0.1.0-rc.1"
+source = "git+https://github.com/hacspec/hax/#de59826b832befc82905286d052c8a961c31f3cd"
 dependencies = [
  "hax-lib-macros-types",
  "paste",
@@ -720,8 +720,8 @@ dependencies = [
 
 [[package]]
 name = "hax-lib-macros-types"
-version = "0.1.0-alpha.1"
-source = "git+https://github.com/hacspec/hax/#1c5e17c9ceee5adede0f4ea7f68bb3d8337f33a0"
+version = "0.1.0-rc.1"
+source = "git+https://github.com/hacspec/hax/#de59826b832befc82905286d052c8a961c31f3cd"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/fstar-helpers/fstar-bitvec/BitVecEq.fsti b/fstar-helpers/fstar-bitvec/BitVecEq.fsti
index c370f28bf..6792f2b29 100644
--- a/fstar-helpers/fstar-bitvec/BitVecEq.fsti
+++ b/fstar-helpers/fstar-bitvec/BitVecEq.fsti
@@ -1,5 +1,5 @@
 module BitVecEq
-#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
+#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
 open Core
 open FStar.Mul
 open MkSeq
@@ -72,7 +72,7 @@ let int_t_array_bitwise_eq
 //                                 else get_bit_nat (pow2 (bits n) + v x) (v nth))
 //     with get_bit_intro #n x nth
 
-#push-options "--fuel 0 --ifuel 0 --z3rlimit 80"
+#push-options "--fuel 0 --ifuel 0 --z3rlimit 150"
 /// Rewrite a `bit_vec_of_int_t_array (Seq.slice arr ...)` into a `bit_vec_sub ...`
 let int_t_seq_slice_to_bv_sub_lemma #t #n 
   (arr: t_Array (int_t t) n)
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst
index a6ffee609..ee9e56c50 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst
@@ -235,7 +235,7 @@ let serialize_kem_secret_key
 
 #pop-options
 
-#push-options "--z3rlimit 300"
+#push-options "--z3rlimit 300 --ext context_pruning --split_queries always"
 
 let encapsulate
       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti
index b31f845fc..007e5c86f 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti
@@ -3,39 +3,64 @@ module Libcrux_ml_kem.Mlkem1024
 open Core
 open FStar.Mul
 
-let v_C1_BLOCK_SIZE_1024_: usize = sz 352
+let v_ETA1: usize = sz 2
 
-let v_C1_SIZE_1024_: usize = sz 1408
+let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64
 
-let v_C2_SIZE_1024_: usize = sz 160
+let v_ETA2: usize = sz 2
 
-let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = sz 1568
+let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64
 
-let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = sz 1568
+let v_RANK_1024_: usize = sz 4
 
-let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize = sz 1536
+let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize =
+  ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
+    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
+    <:
+    usize) /!
+  sz 8
 
-let v_ETA1: usize = sz 2
+let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize =
+  (v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8
 
-let v_ETA1_RANDOMNESS_SIZE: usize = sz 128
+let v_T_AS_NTT_ENCODED_SIZE_1024_: usize =
+  ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
+    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
+    <:
+    usize) /!
+  sz 8
 
-let v_ETA2: usize = sz 2
+let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = v_T_AS_NTT_ENCODED_SIZE_1024_ +! sz 32
 
-let v_ETA2_RANDOMNESS_SIZE: usize = sz 128
+let v_SECRET_KEY_SIZE_1024_: usize =
+  ((v_CPA_PKE_SECRET_KEY_SIZE_1024_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_1024_ <: usize) +!
+    Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE
+    <:
+    usize) +!
+  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE
 
-let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1600
+let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11
 
-let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize = sz 1536
+let v_C1_BLOCK_SIZE_1024_: usize =
+  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_1024_
+    <:
+    usize) /!
+  sz 8
 
-let v_RANK_1024_: usize = sz 4
+let v_C1_SIZE_1024_: usize = v_C1_BLOCK_SIZE_1024_ *! v_RANK_1024_
 
-let v_SECRET_KEY_SIZE_1024_: usize = sz 3168
+let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5
 
-let v_T_AS_NTT_ENCODED_SIZE_1024_: usize = sz 1536
+let v_C2_SIZE_1024_: usize =
+  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_1024_
+    <:
+    usize) /!
+  sz 8
 
-let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11
+let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = v_C1_SIZE_1024_ +! v_C2_SIZE_1024_
 
-let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5
+let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize =
+  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_1024_
 
 /// Validate a private key.
 /// Returns `true` if valid, and `false` otherwise.
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti
index 28d905063..94590e2ee 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti
@@ -3,39 +3,64 @@ module Libcrux_ml_kem.Mlkem512
 open Core
 open FStar.Mul
 
-let v_C1_BLOCK_SIZE_512_: usize = sz 320
+let v_ETA1: usize = sz 3
 
-let v_C1_SIZE_512_: usize = sz 640
+let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64
 
-let v_C2_SIZE_512_: usize = sz 128
+let v_ETA2: usize = sz 2
 
-let v_CPA_PKE_CIPHERTEXT_SIZE_512_: usize = sz 768
+let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64
 
-let v_CPA_PKE_PUBLIC_KEY_SIZE_512_: usize = sz 800
+let v_RANK_512_: usize = sz 2
 
-let v_CPA_PKE_SECRET_KEY_SIZE_512_: usize = sz 768
+let v_CPA_PKE_SECRET_KEY_SIZE_512_: usize =
+  ((v_RANK_512_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
+    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
+    <:
+    usize) /!
+  sz 8
 
-let v_ETA1: usize = sz 3
+let v_RANKED_BYTES_PER_RING_ELEMENT_512_: usize =
+  (v_RANK_512_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8
 
-let v_ETA1_RANDOMNESS_SIZE: usize = sz 192
+let v_T_AS_NTT_ENCODED_SIZE_512_: usize =
+  ((v_RANK_512_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
+    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
+    <:
+    usize) /!
+  sz 8
 
-let v_ETA2: usize = sz 2
+let v_CPA_PKE_PUBLIC_KEY_SIZE_512_: usize = v_T_AS_NTT_ENCODED_SIZE_512_ +! sz 32
 
-let v_ETA2_RANDOMNESS_SIZE: usize = sz 128
+let v_SECRET_KEY_SIZE_512_: usize =
+  ((v_CPA_PKE_SECRET_KEY_SIZE_512_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_512_ <: usize) +!
+    Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE
+    <:
+    usize) +!
+  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE
 
-let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 800
+let v_VECTOR_U_COMPRESSION_FACTOR_512_: usize = sz 10
 
-let v_RANKED_BYTES_PER_RING_ELEMENT_512_: usize = sz 768
+let v_C1_BLOCK_SIZE_512_: usize =
+  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_512_
+    <:
+    usize) /!
+  sz 8
 
-let v_RANK_512_: usize = sz 2
+let v_C1_SIZE_512_: usize = v_C1_BLOCK_SIZE_512_ *! v_RANK_512_
 
-let v_SECRET_KEY_SIZE_512_: usize = sz 1632
+let v_VECTOR_V_COMPRESSION_FACTOR_512_: usize = sz 4
 
-let v_T_AS_NTT_ENCODED_SIZE_512_: usize = sz 768
+let v_C2_SIZE_512_: usize =
+  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_512_
+    <:
+    usize) /!
+  sz 8
 
-let v_VECTOR_U_COMPRESSION_FACTOR_512_: usize = sz 10
+let v_CPA_PKE_CIPHERTEXT_SIZE_512_: usize = v_C1_SIZE_512_ +! v_C2_SIZE_512_
 
-let v_VECTOR_V_COMPRESSION_FACTOR_512_: usize = sz 4
+let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize =
+  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_512_
 
 /// Validate a private key.
 /// Returns `true` if valid, and `false` otherwise.
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti
index 928e6a233..d1d7c217f 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti
@@ -3,39 +3,64 @@ module Libcrux_ml_kem.Mlkem768
 open Core
 open FStar.Mul
 
-let v_C1_BLOCK_SIZE_768_: usize = sz 320
+let v_ETA1: usize = sz 2
 
-let v_C1_SIZE_768_: usize = sz 960
+let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64
 
-let v_C2_SIZE_768_: usize = sz 128
+let v_ETA2: usize = sz 2
 
-let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = sz 1088
+let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64
 
-let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = sz 1184
+let v_RANK_768_: usize = sz 3
 
-let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize = sz 1152
+let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize =
+  ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
+    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
+    <:
+    usize) /!
+  sz 8
 
-let v_ETA1: usize = sz 2
+let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize =
+  (v_RANK_768_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8
 
-let v_ETA1_RANDOMNESS_SIZE: usize = sz 128
+let v_T_AS_NTT_ENCODED_SIZE_768_: usize =
+  ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *!
+    Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT
+    <:
+    usize) /!
+  sz 8
 
-let v_ETA2: usize = sz 2
+let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = v_T_AS_NTT_ENCODED_SIZE_768_ +! sz 32
 
-let v_ETA2_RANDOMNESS_SIZE: usize = sz 128
+let v_SECRET_KEY_SIZE_768_: usize =
+  ((v_CPA_PKE_SECRET_KEY_SIZE_768_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_768_ <: usize) +!
+    Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE
+    <:
+    usize) +!
+  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE
 
-let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1120
+let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10
 
-let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize = sz 1152
+let v_C1_BLOCK_SIZE_768_: usize =
+  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_768_
+    <:
+    usize) /!
+  sz 8
 
-let v_RANK_768_: usize = sz 3
+let v_C1_SIZE_768_: usize = v_C1_BLOCK_SIZE_768_ *! v_RANK_768_
 
-let v_SECRET_KEY_SIZE_768_: usize = sz 2400
+let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4
 
-let v_T_AS_NTT_ENCODED_SIZE_768_: usize = sz 1152
+let v_C2_SIZE_768_: usize =
+  (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_768_
+    <:
+    usize) /!
+  sz 8
 
-let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10
+let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = v_C1_SIZE_768_ +! v_C2_SIZE_768_
 
-let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4
+let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize =
+  Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_768_
 
 /// Validate a private key.
 /// Returns `true` if valid, and `false` otherwise.
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst
index 9f607fddd..f400f5ccd 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst
@@ -28,7 +28,7 @@ let get_n_least_significant_bits (n: u8) (value: u32) =
 
 #pop-options
 
-#push-options "--z3rlimit 150"
+#push-options "--z3rlimit 200"
 
 let barrett_reduce_element (value: i16) =
   let t:i32 =
diff --git a/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst b/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst
index 5c77472f2..cbe51c827 100644
--- a/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst
+++ b/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst
@@ -361,7 +361,9 @@ val lemma_mont_mul_red_i16_int (x y:i16): Lemma
           let result:i16 = mont_mul_red_i16 x y in
           is_i16b 3328 result /\
           v result % 3329 == (v x * v y * 169) % 3329))
-          
+
+#push-options "--z3rlimit 200"
+
 let lemma_mont_mul_red_i16_int (x y:i16) = 
   let vlow = x *. y in
   let prod = v x * v y in
@@ -429,6 +431,7 @@ let lemma_mont_mul_red_i16_int (x y:i16) =
       ((prod) * 169) % 3329; 
     }
 
+#pop-options
 
 val lemma_mont_mul_red_i16 (x y:i16): Lemma
   (requires (is_i16b 1664 y \/ is_intb (3326 * pow2 15) (v x * v y)))

From 83a72e794daa8e6943bc85af53bc2cfd7b592e9b Mon Sep 17 00:00:00 2001
From: karthikbhargavan <karthik.bhargavan@gmail.com>
Date: Tue, 3 Dec 2024 22:58:03 +0000
Subject: [PATCH 7/7] fstar

---
 .../fstar/extraction/Libcrux_ml_kem.Types.fst | 155 ------------------
 .../extraction/Libcrux_ml_kem.Types.fsti      | 141 ++++++++++++++--
 .../Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst |   2 +-
 .../Libcrux_ml_kem.Vector.Avx2.Serialize.fst  |   1 -
 4 files changed, 127 insertions(+), 172 deletions(-)

diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst
index 3a598d127..5748d2562 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst
@@ -9,103 +9,10 @@ let impl_13__len (v_SIZE: usize) (_: Prims.unit) = v_SIZE
 
 let impl_20__len (v_SIZE: usize) (_: Prims.unit) = v_SIZE
 
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_1 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
-    f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemCiphertext v_SIZE) -> true);
-    f_from
-    =
-    fun (value: t_Array u8 v_SIZE) ->
-      { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value }
-      <:
-      t_MlKemCiphertext v_SIZE
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_2 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemCiphertext v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_MlKemCiphertext v_SIZE) -> true);
-    f_from_post = (fun (value: t_MlKemCiphertext v_SIZE) (out: t_Array u8 v_SIZE) -> true);
-    f_from = fun (value: t_MlKemCiphertext v_SIZE) -> value.f_value
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_5 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
-    f_from_post
-    =
-    (fun (value: t_Array u8 v_SIZE) (result: t_MlKemCiphertext v_SIZE) -> result.f_value = value);
-    f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemCiphertext v_SIZE
-  }
-
 let impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) = self.f_value
 
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_8 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
-    f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPrivateKey v_SIZE) -> true);
-    f_from
-    =
-    fun (value: t_Array u8 v_SIZE) ->
-      { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value }
-      <:
-      t_MlKemPrivateKey v_SIZE
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_9 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPrivateKey v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_MlKemPrivateKey v_SIZE) -> true);
-    f_from_post = (fun (value: t_MlKemPrivateKey v_SIZE) (out: t_Array u8 v_SIZE) -> true);
-    f_from = fun (value: t_MlKemPrivateKey v_SIZE) -> value.f_value
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_12 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
-    f_from_post
-    =
-    (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPrivateKey v_SIZE) -> result.f_value = value);
-    f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPrivateKey v_SIZE
-  }
-
 let impl_13__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) = self.f_value
 
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_15 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
-    f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPublicKey v_SIZE) -> true);
-    f_from
-    =
-    fun (value: t_Array u8 v_SIZE) ->
-      { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value }
-      <:
-      t_MlKemPublicKey v_SIZE
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_16 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPublicKey v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_MlKemPublicKey v_SIZE) -> true);
-    f_from_post = (fun (value: t_MlKemPublicKey v_SIZE) (out: t_Array u8 v_SIZE) -> true);
-    f_from = fun (value: t_MlKemPublicKey v_SIZE) -> value.f_value
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_19 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) =
-  {
-    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
-    f_from_post
-    =
-    (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPublicKey v_SIZE) -> result.f_value = value);
-    f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPublicKey v_SIZE
-  }
-
 let impl_20__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) = self.f_value
 
 let impl_21__from
@@ -178,65 +85,3 @@ let unpack_private_key (v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE: usize) (private
   <:
   (t_Slice u8 & t_Slice u8 & t_Slice u8 & t_Slice u8)
 
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl (v_SIZE: usize) : Core.Default.t_Default (t_MlKemCiphertext v_SIZE) =
-  {
-    f_default_pre = (fun (_: Prims.unit) -> true);
-    f_default_post = (fun (_: Prims.unit) (out: t_MlKemCiphertext v_SIZE) -> true);
-    f_default
-    =
-    fun (_: Prims.unit) ->
-      { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemCiphertext v_SIZE
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_7 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPrivateKey v_SIZE) =
-  {
-    f_default_pre = (fun (_: Prims.unit) -> true);
-    f_default_post = (fun (_: Prims.unit) (out: t_MlKemPrivateKey v_SIZE) -> true);
-    f_default
-    =
-    fun (_: Prims.unit) ->
-      { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPrivateKey v_SIZE
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_14 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPublicKey v_SIZE) =
-  {
-    f_default_pre = (fun (_: Prims.unit) -> true);
-    f_default_post = (fun (_: Prims.unit) (out: t_MlKemPublicKey v_SIZE) -> true);
-    f_default
-    =
-    fun (_: Prims.unit) ->
-      { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPublicKey v_SIZE
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_4 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemCiphertext v_SIZE) (t_Slice u8) =
-  {
-    f_as_ref_pre = (fun (self: t_MlKemCiphertext v_SIZE) -> true);
-    f_as_ref_post
-    =
-    (fun (self___: t_MlKemCiphertext v_SIZE) (result: t_Slice u8) -> result = self___.f_value);
-    f_as_ref = fun (self: t_MlKemCiphertext v_SIZE) -> self.f_value <: t_Slice u8
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_11 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPrivateKey v_SIZE) (t_Slice u8) =
-  {
-    f_as_ref_pre = (fun (self: t_MlKemPrivateKey v_SIZE) -> true);
-    f_as_ref_post
-    =
-    (fun (self___: t_MlKemPrivateKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value);
-    f_as_ref = fun (self: t_MlKemPrivateKey v_SIZE) -> self.f_value <: t_Slice u8
-  }
-
-[@@ FStar.Tactics.Typeclasses.tcinstance]
-let impl_18 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPublicKey v_SIZE) (t_Slice u8) =
-  {
-    f_as_ref_pre = (fun (self: t_MlKemPublicKey v_SIZE) -> true);
-    f_as_ref_post
-    =
-    (fun (self___: t_MlKemPublicKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value);
-    f_as_ref = fun (self: t_MlKemPublicKey v_SIZE) -> self.f_value <: t_Slice u8
-  }
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti
index 4f76c2ffc..1947307c5 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti
@@ -19,13 +19,35 @@ val impl_20__len: v_SIZE: usize -> Prims.unit
 type t_MlKemCiphertext (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_1 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE)
+let impl_1 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
+    f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemCiphertext v_SIZE) -> true);
+    f_from
+    =
+    fun (value: t_Array u8 v_SIZE) ->
+      { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value }
+      <:
+      t_MlKemCiphertext v_SIZE
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_2 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemCiphertext v_SIZE)
+let impl_2 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemCiphertext v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_MlKemCiphertext v_SIZE) -> true);
+    f_from_post = (fun (value: t_MlKemCiphertext v_SIZE) (out: t_Array u8 v_SIZE) -> true);
+    f_from = fun (value: t_MlKemCiphertext v_SIZE) -> value.f_value
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_5 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE)
+let impl_5 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
+    f_from_post
+    =
+    (fun (value: t_Array u8 v_SIZE) (result: t_MlKemCiphertext v_SIZE) -> result.f_value = value);
+    f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemCiphertext v_SIZE
+  }
 
 /// A reference to the raw byte slice.
 val impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE)
@@ -40,13 +62,35 @@ val impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE)
 type t_MlKemPrivateKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_8 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE)
+let impl_8 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
+    f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPrivateKey v_SIZE) -> true);
+    f_from
+    =
+    fun (value: t_Array u8 v_SIZE) ->
+      { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value }
+      <:
+      t_MlKemPrivateKey v_SIZE
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_9 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPrivateKey v_SIZE)
+let impl_9 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPrivateKey v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_MlKemPrivateKey v_SIZE) -> true);
+    f_from_post = (fun (value: t_MlKemPrivateKey v_SIZE) (out: t_Array u8 v_SIZE) -> true);
+    f_from = fun (value: t_MlKemPrivateKey v_SIZE) -> value.f_value
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_12 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE)
+let impl_12 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
+    f_from_post
+    =
+    (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPrivateKey v_SIZE) -> result.f_value = value);
+    f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPrivateKey v_SIZE
+  }
 
 /// A reference to the raw byte slice.
 val impl_13__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE)
@@ -61,13 +105,35 @@ val impl_13__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE)
 type t_MlKemPublicKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_15 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE)
+let impl_15 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
+    f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPublicKey v_SIZE) -> true);
+    f_from
+    =
+    fun (value: t_Array u8 v_SIZE) ->
+      { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value }
+      <:
+      t_MlKemPublicKey v_SIZE
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_16 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPublicKey v_SIZE)
+let impl_16 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPublicKey v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_MlKemPublicKey v_SIZE) -> true);
+    f_from_post = (fun (value: t_MlKemPublicKey v_SIZE) (out: t_Array u8 v_SIZE) -> true);
+    f_from = fun (value: t_MlKemPublicKey v_SIZE) -> value.f_value
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_19 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE)
+let impl_19 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) =
+  {
+    f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true);
+    f_from_post
+    =
+    (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPublicKey v_SIZE) -> result.f_value = value);
+    f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPublicKey v_SIZE
+  }
 
 /// A reference to the raw byte slice.
 val impl_20__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE)
@@ -169,22 +235,67 @@ val unpack_private_key (v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE: usize) (private
             v Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE))
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl (v_SIZE: usize) : Core.Default.t_Default (t_MlKemCiphertext v_SIZE)
+let impl (v_SIZE: usize) : Core.Default.t_Default (t_MlKemCiphertext v_SIZE) =
+  {
+    f_default_pre = (fun (_: Prims.unit) -> true);
+    f_default_post = (fun (_: Prims.unit) (out: t_MlKemCiphertext v_SIZE) -> true);
+    f_default
+    =
+    fun (_: Prims.unit) ->
+      { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemCiphertext v_SIZE
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_7 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPrivateKey v_SIZE)
+let impl_7 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPrivateKey v_SIZE) =
+  {
+    f_default_pre = (fun (_: Prims.unit) -> true);
+    f_default_post = (fun (_: Prims.unit) (out: t_MlKemPrivateKey v_SIZE) -> true);
+    f_default
+    =
+    fun (_: Prims.unit) ->
+      { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPrivateKey v_SIZE
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_14 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPublicKey v_SIZE)
+let impl_14 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPublicKey v_SIZE) =
+  {
+    f_default_pre = (fun (_: Prims.unit) -> true);
+    f_default_post = (fun (_: Prims.unit) (out: t_MlKemPublicKey v_SIZE) -> true);
+    f_default
+    =
+    fun (_: Prims.unit) ->
+      { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPublicKey v_SIZE
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_4 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemCiphertext v_SIZE) (t_Slice u8)
+let impl_4 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemCiphertext v_SIZE) (t_Slice u8) =
+  {
+    f_as_ref_pre = (fun (self: t_MlKemCiphertext v_SIZE) -> true);
+    f_as_ref_post
+    =
+    (fun (self___: t_MlKemCiphertext v_SIZE) (result: t_Slice u8) -> result = self___.f_value);
+    f_as_ref = fun (self: t_MlKemCiphertext v_SIZE) -> self.f_value <: t_Slice u8
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_11 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPrivateKey v_SIZE) (t_Slice u8)
+let impl_11 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPrivateKey v_SIZE) (t_Slice u8) =
+  {
+    f_as_ref_pre = (fun (self: t_MlKemPrivateKey v_SIZE) -> true);
+    f_as_ref_post
+    =
+    (fun (self___: t_MlKemPrivateKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value);
+    f_as_ref = fun (self: t_MlKemPrivateKey v_SIZE) -> self.f_value <: t_Slice u8
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
-val impl_18 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPublicKey v_SIZE) (t_Slice u8)
+let impl_18 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPublicKey v_SIZE) (t_Slice u8) =
+  {
+    f_as_ref_pre = (fun (self: t_MlKemPublicKey v_SIZE) -> true);
+    f_as_ref_post
+    =
+    (fun (self___: t_MlKemPublicKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value);
+    f_as_ref = fun (self: t_MlKemPublicKey v_SIZE) -> self.f_value <: t_Slice u8
+  }
 
 [@@ FStar.Tactics.Typeclasses.tcinstance]
 let impl_3 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemCiphertext v_SIZE) (t_Slice u8) =
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst
index 6f960e706..cba0ea581 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst
@@ -184,7 +184,7 @@ let cond_subtract_3329_ (vector: Libcrux_intrinsics.Avx2_extract.t_Vec256) =
 
 #pop-options
 
-#push-options "--z3rlimit 200"
+#push-options "--z3rlimit 250"
 
 let montgomery_multiply_by_constant
       (vector: Libcrux_intrinsics.Avx2_extract.t_Vec256)
diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst
index b0c197583..00fb6832a 100644
--- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst
+++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst
@@ -38,7 +38,6 @@ let deserialize_1_ (bytes: t_Slice u8) =
   deserialize_1___deserialize_1_u8s (bytes.[ sz 0 ] <: u8) (bytes.[ sz 1 ] <: u8)
 
 [@@"opaque_to_smt"]
-
 let deserialize_4___deserialize_4_i16s (b0 b1 b2 b3 b4 b5 b6 b7: i16) =
   let coefficients:Libcrux_intrinsics.Avx2_extract.t_Vec256 =
     Libcrux_intrinsics.Avx2_extract.mm256_set_epi16 b7 b7 b6 b6 b5 b5 b4 b4 b3 b3 b2 b2 b1 b1 b0 b0