From fe49cc565ffc9668fdcff6b99788235bd8da0fcb Mon Sep 17 00:00:00 2001 From: Karthikeyan Bhargavan Date: Tue, 3 Dec 2024 18:40:41 +0100 Subject: [PATCH 1/7] fixing code to address review comments --- .../extraction/Libcrux_ml_kem.Invert_ntt.fst | 16 +-- .../extraction/Libcrux_ml_kem.Mlkem1024.fsti | 59 +++------ .../extraction/Libcrux_ml_kem.Mlkem768.fsti | 59 +++------ .../fstar/extraction/Libcrux_ml_kem.Ntt.fst | 16 +-- .../extraction/Libcrux_ml_kem.Polynomial.fst | 10 +- .../extraction/Libcrux_ml_kem.Polynomial.fsti | 2 +- libcrux-ml-kem/src/hash_functions.rs | 12 -- libcrux-ml-kem/src/ind_cca.rs | 7 +- libcrux-ml-kem/src/ind_cpa.rs | 1 + libcrux-ml-kem/src/invert_ntt.rs | 18 +-- libcrux-ml-kem/src/mlkem512.rs | 30 ++--- libcrux-ml-kem/src/ntt.rs | 18 +-- libcrux-ml-kem/src/polynomial.rs | 21 ++-- libcrux-ml-kem/src/vector/avx2.rs | 8 -- libcrux-ml-kem/src/vector/avx2/arithmetic.rs | 5 - libcrux-ml-kem/src/vector/portable/ntt.rs | 15 --- .../src/vector/portable/serialize.rs | 112 ------------------ libcrux-ml-kem/src/vector/traits.rs | 2 + 18 files changed, 100 insertions(+), 311 deletions(-) diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst index aeccf049f..53290fba7 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Invert_ntt.fst @@ -84,10 +84,10 @@ let invert_ntt_at_layer_1_ (Libcrux_ml_kem.Vector.Traits.f_inv_ntt_layer_1_step #v_Vector #FStar.Tactics.Typeclasses.solve (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 1 <: usize) <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 2 <: usize) <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 3 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 1 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 2 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 3 <: usize) <: i16) <: v_Vector) } @@ -165,8 +165,8 @@ let invert_ntt_at_layer_2_ (Libcrux_ml_kem.Vector.Traits.f_inv_ntt_layer_2_step #v_Vector #FStar.Tactics.Typeclasses.solve (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i -! sz 1 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i -! sz 1 <: usize) <: i16) <: v_Vector) } @@ -244,7 +244,7 @@ let invert_ntt_at_layer_3_ (Libcrux_ml_kem.Vector.Traits.f_inv_ntt_layer_3_step #v_Vector #FStar.Tactics.Typeclasses.solve (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) <: v_Vector) } @@ -317,7 +317,7 @@ let invert_ntt_at_layer_4_plus (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ j +! step_vec <: usize ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) in let re:Libcrux_ml_kem.Polynomial.t_PolynomialRingElement v_Vector = { diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti index 007e5c86f..b31f845fc 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti @@ -3,64 +3,39 @@ module Libcrux_ml_kem.Mlkem1024 open Core open FStar.Mul -let v_ETA1: usize = sz 2 +let v_C1_BLOCK_SIZE_1024_: usize = sz 352 -let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64 +let v_C1_SIZE_1024_: usize = sz 1408 -let v_ETA2: usize = sz 2 +let v_C2_SIZE_1024_: usize = sz 160 -let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64 +let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = sz 1568 -let v_RANK_1024_: usize = sz 4 +let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = sz 1568 -let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize = - ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! - Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT - <: - usize) /! - sz 8 +let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize = sz 1536 -let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize = - (v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8 +let v_ETA1: usize = sz 2 -let v_T_AS_NTT_ENCODED_SIZE_1024_: usize = - ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! - Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT - <: - usize) /! - sz 8 +let v_ETA1_RANDOMNESS_SIZE: usize = sz 128 -let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = v_T_AS_NTT_ENCODED_SIZE_1024_ +! sz 32 +let v_ETA2: usize = sz 2 -let v_SECRET_KEY_SIZE_1024_: usize = - ((v_CPA_PKE_SECRET_KEY_SIZE_1024_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_1024_ <: usize) +! - Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE - <: - usize) +! - Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +let v_ETA2_RANDOMNESS_SIZE: usize = sz 128 -let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11 +let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1600 -let v_C1_BLOCK_SIZE_1024_: usize = - (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_1024_ - <: - usize) /! - sz 8 +let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize = sz 1536 -let v_C1_SIZE_1024_: usize = v_C1_BLOCK_SIZE_1024_ *! v_RANK_1024_ +let v_RANK_1024_: usize = sz 4 -let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5 +let v_SECRET_KEY_SIZE_1024_: usize = sz 3168 -let v_C2_SIZE_1024_: usize = - (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_1024_ - <: - usize) /! - sz 8 +let v_T_AS_NTT_ENCODED_SIZE_1024_: usize = sz 1536 -let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = v_C1_SIZE_1024_ +! v_C2_SIZE_1024_ +let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11 -let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = - Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_1024_ +let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5 /// Validate a private key. /// Returns `true` if valid, and `false` otherwise. diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti index d1d7c217f..928e6a233 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti @@ -3,64 +3,39 @@ module Libcrux_ml_kem.Mlkem768 open Core open FStar.Mul -let v_ETA1: usize = sz 2 +let v_C1_BLOCK_SIZE_768_: usize = sz 320 -let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64 +let v_C1_SIZE_768_: usize = sz 960 -let v_ETA2: usize = sz 2 +let v_C2_SIZE_768_: usize = sz 128 -let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64 +let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = sz 1088 -let v_RANK_768_: usize = sz 3 +let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = sz 1184 -let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize = - ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! - Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT - <: - usize) /! - sz 8 +let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize = sz 1152 -let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize = - (v_RANK_768_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8 +let v_ETA1: usize = sz 2 -let v_T_AS_NTT_ENCODED_SIZE_768_: usize = - ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! - Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT - <: - usize) /! - sz 8 +let v_ETA1_RANDOMNESS_SIZE: usize = sz 128 -let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = v_T_AS_NTT_ENCODED_SIZE_768_ +! sz 32 +let v_ETA2: usize = sz 2 -let v_SECRET_KEY_SIZE_768_: usize = - ((v_CPA_PKE_SECRET_KEY_SIZE_768_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_768_ <: usize) +! - Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE - <: - usize) +! - Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +let v_ETA2_RANDOMNESS_SIZE: usize = sz 128 -let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10 +let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1120 -let v_C1_BLOCK_SIZE_768_: usize = - (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_768_ - <: - usize) /! - sz 8 +let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize = sz 1152 -let v_C1_SIZE_768_: usize = v_C1_BLOCK_SIZE_768_ *! v_RANK_768_ +let v_RANK_768_: usize = sz 3 -let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4 +let v_SECRET_KEY_SIZE_768_: usize = sz 2400 -let v_C2_SIZE_768_: usize = - (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_768_ - <: - usize) /! - sz 8 +let v_T_AS_NTT_ENCODED_SIZE_768_: usize = sz 1152 -let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = v_C1_SIZE_768_ +! v_C2_SIZE_768_ +let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10 -let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = - Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_768_ +let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4 /// Validate a private key. /// Returns `true` if valid, and `false` otherwise. diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst index 2c5a30cb2..41d6dfad3 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ntt.fst @@ -81,10 +81,10 @@ let ntt_at_layer_1_ (Libcrux_ml_kem.Vector.Traits.f_ntt_layer_1_step #v_Vector #FStar.Tactics.Typeclasses.solve (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 1 <: usize) <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 2 <: usize) <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 3 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 1 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 2 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 3 <: usize) <: i16) <: v_Vector) } @@ -163,8 +163,8 @@ let ntt_at_layer_2_ (Libcrux_ml_kem.Vector.Traits.f_ntt_layer_2_step #v_Vector #FStar.Tactics.Typeclasses.solve (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) - (Libcrux_ml_kem.Polynomial.get_zeta (zeta_i +! sz 1 <: usize) <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta (zeta_i +! sz 1 <: usize) <: i16) <: v_Vector) } @@ -243,7 +243,7 @@ let ntt_at_layer_3_ (Libcrux_ml_kem.Vector.Traits.f_ntt_layer_3_step #v_Vector #FStar.Tactics.Typeclasses.solve (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ round ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) <: v_Vector) } @@ -315,7 +315,7 @@ let ntt_at_layer_4_plus (re.Libcrux_ml_kem.Polynomial.f_coefficients.[ j +! step_vec <: usize ] <: v_Vector) - (Libcrux_ml_kem.Polynomial.get_zeta zeta_i <: i16) + (Libcrux_ml_kem.Polynomial.zeta zeta_i <: i16) in let re:Libcrux_ml_kem.Polynomial.t_PolynomialRingElement v_Vector = { diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst index 4dcc55b91..fec53d917 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fst @@ -9,7 +9,7 @@ let _ = let open Libcrux_ml_kem.Vector.Traits in () -let get_zeta (i: usize) = +let zeta (i: usize) = let result:i16 = v_ZETAS_TIMES_MONTGOMERY_R.[ i ] in let _:Prims.unit = admit () (* Panic freedom *) in result @@ -355,10 +355,10 @@ let impl_2__ntt_multiply #FStar.Tactics.Typeclasses.solve (self.f_coefficients.[ i ] <: v_Vector) (rhs.f_coefficients.[ i ] <: v_Vector) - (get_zeta (sz 64 +! (sz 4 *! i <: usize) <: usize) <: i16) - (get_zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 1 <: usize) <: i16) - (get_zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 2 <: usize) <: i16) - (get_zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 3 <: usize) <: i16) + (zeta (sz 64 +! (sz 4 *! i <: usize) <: usize) <: i16) + (zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 1 <: usize) <: i16) + (zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 2 <: usize) <: i16) + (zeta ((sz 64 +! (sz 4 *! i <: usize) <: usize) +! sz 3 <: usize) <: i16) <: v_Vector) <: diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti index 6ad4d7a0b..6dd0db075 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Polynomial.fsti @@ -29,7 +29,7 @@ let v_ZETAS_TIMES_MONTGOMERY_R: t_Array i16 (sz 128) = FStar.Pervasives.assert_norm (Prims.eq2 (List.Tot.length list) 128); Rust_primitives.Hax.array_of_list 128 list -val get_zeta (i: usize) +val zeta (i: usize) : Prims.Pure i16 (requires i <. sz 128) (ensures diff --git a/libcrux-ml-kem/src/hash_functions.rs b/libcrux-ml-kem/src/hash_functions.rs index 7641a7266..17d34fdc2 100644 --- a/libcrux-ml-kem/src/hash_functions.rs +++ b/libcrux-ml-kem/src/hash_functions.rs @@ -171,7 +171,6 @@ pub(crate) mod portable { #[hax_lib::attributes] impl Hash for PortableHash { - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("$out == Spec.Utils.v_G $input")) ] @@ -180,7 +179,6 @@ pub(crate) mod portable { G(input) } - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("$out == Spec.Utils.v_H $input")) ] @@ -190,7 +188,6 @@ pub(crate) mod portable { } #[requires(fstar!("v $LEN < pow2 32"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784 fstar!("v $LEN < pow2 32 ==> $out == Spec.Utils.v_PRF $LEN $input")) @@ -201,7 +198,6 @@ pub(crate) mod portable { } #[requires(fstar!("v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("(v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)) ==> $out == Spec.Utils.v_PRFxN $K $LEN $input")) @@ -428,7 +424,6 @@ pub(crate) mod avx2 { #[hax_lib::attributes] impl Hash for Simd256Hash { - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("$out == Spec.Utils.v_G $input")) ] @@ -437,7 +432,6 @@ pub(crate) mod avx2 { G(input) } - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("$out == Spec.Utils.v_H $input")) ] @@ -447,7 +441,6 @@ pub(crate) mod avx2 { } #[requires(fstar!("v $LEN < pow2 32"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[hax_lib::ensures(|out| // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784 fstar!("v $LEN < pow2 32 ==> $out == Spec.Utils.v_PRF $LEN $input")) @@ -458,7 +451,6 @@ pub(crate) mod avx2 { } #[requires(fstar!("v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("(v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)) ==> $out == Spec.Utils.v_PRFxN $K $LEN $input")) @@ -710,7 +702,6 @@ pub(crate) mod neon { #[hax_lib::attributes] impl Hash for Simd128Hash { - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("$out == Spec.Utils.v_G $input")) ] @@ -719,7 +710,6 @@ pub(crate) mod neon { G(input) } - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("$out == Spec.Utils.v_H $input")) ] @@ -729,7 +719,6 @@ pub(crate) mod neon { } #[requires(fstar!("v $LEN < pow2 32"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784 fstar!("v $LEN < pow2 32 ==> $out == Spec.Utils.v_PRF $LEN $input")) @@ -740,7 +729,6 @@ pub(crate) mod neon { } #[requires(fstar!("v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| // We need to repeat the pre-condition here because of https://github.com/hacspec/hax/issues/784 fstar!("(v $LEN < pow2 32 /\\ (v $K == 2 \\/ v $K == 3 \\/ v $K == 4)) ==> diff --git a/libcrux-ml-kem/src/ind_cca.rs b/libcrux-ml-kem/src/ind_cca.rs index 33ec390e5..18ae0db4a 100644 --- a/libcrux-ml-kem/src/ind_cca.rs +++ b/libcrux-ml-kem/src/ind_cca.rs @@ -427,13 +427,12 @@ pub(crate) fn decapsulate< Scheme::kdf::(&implicit_rejection_shared_secret, ciphertext); let shared_secret = Scheme::kdf::(shared_secret, ciphertext); - let shared_secret = compare_ciphertexts_select_shared_secret_in_constant_time( + compare_ciphertexts_select_shared_secret_in_constant_time( ciphertext.as_ref(), &expected_ciphertext, &shared_secret, &implicit_rejection_shared_secret, - ); - shared_secret + ) } /// Types for the unpacked API. @@ -821,7 +820,7 @@ pub(crate) mod unpacked { Seq.index (Seq.index $result i) j == Seq.index (Seq.index $ind_cpa_a j) i)")) ] - pub(crate) fn transpose_a( + fn transpose_a( ind_cpa_a: [[PolynomialRingElement; K]; K], ) -> [[PolynomialRingElement; K]; K] { // We need to un-transpose the A_transpose matrix provided by IND-CPA diff --git a/libcrux-ml-kem/src/ind_cpa.rs b/libcrux-ml-kem/src/ind_cpa.rs index 935ef0c95..b40bd07ae 100644 --- a/libcrux-ml-kem/src/ind_cpa.rs +++ b/libcrux-ml-kem/src/ind_cpa.rs @@ -200,6 +200,7 @@ fn sample_ring_element_cbd< ) -> ([PolynomialRingElement; K], u8) { let mut error_1 = from_fn(|_i| PolynomialRingElement::::ZERO()); let mut prf_inputs = [prf_input; K]; + // See https://github.com/hacspec/hax/issues/1167 let _domain_separator_init = domain_separator; domain_separator = prf_input_inc::(&mut prf_inputs, domain_separator); hax_lib::fstar!("let lemma_aux (i:nat{ i < v $K }) : Lemma (${prf_inputs}.[sz i] == (Seq.append (Seq.slice $prf_input 0 32) diff --git a/libcrux-ml-kem/src/invert_ntt.rs b/libcrux-ml-kem/src/invert_ntt.rs index 24866eb82..7f9506731 100644 --- a/libcrux-ml-kem/src/invert_ntt.rs +++ b/libcrux-ml-kem/src/invert_ntt.rs @@ -1,6 +1,6 @@ use crate::{ hax_utils::hax_debug_assert, - polynomial::{get_zeta, PolynomialRingElement}, + polynomial::{zeta, PolynomialRingElement}, vector::{montgomery_multiply_fe, Operations, FIELD_ELEMENTS_IN_VECTOR}, }; @@ -55,10 +55,10 @@ pub(crate) fn invert_ntt_at_layer_1( (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); re.coefficients[round] = Vector::inv_ntt_layer_1_step( re.coefficients[round], - get_zeta(*zeta_i), - get_zeta(*zeta_i - 1), - get_zeta(*zeta_i - 2), - get_zeta(*zeta_i - 3), + zeta(*zeta_i), + zeta(*zeta_i - 1), + zeta(*zeta_i - 2), + zeta(*zeta_i - 3), ); *zeta_i -= 3; hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) @@ -104,8 +104,8 @@ pub(crate) fn invert_ntt_at_layer_2( (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); re.coefficients[round] = Vector::inv_ntt_layer_2_step( re.coefficients[round], - get_zeta(*zeta_i), - get_zeta(*zeta_i - 1), + zeta(*zeta_i), + zeta(*zeta_i - 1), ); *zeta_i -= 1; hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) @@ -150,7 +150,7 @@ pub(crate) fn invert_ntt_at_layer_3( (Spec.Utils.is_i16b_array_opaque 3328 (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); re.coefficients[round] = - Vector::inv_ntt_layer_3_step(re.coefficients[round], get_zeta(*zeta_i)); + Vector::inv_ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i)); hax_lib::fstar!( "reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque 3328 @@ -210,7 +210,7 @@ pub(crate) fn invert_ntt_at_layer_4_plus( let (x, y) = inv_ntt_layer_int_vec_step_reduce( re.coefficients[j], re.coefficients[j + step_vec], - get_zeta(*zeta_i), + zeta(*zeta_i), ); re.coefficients[j] = x; re.coefficients[j + step_vec] = y; diff --git a/libcrux-ml-kem/src/mlkem512.rs b/libcrux-ml-kem/src/mlkem512.rs index 0d82a07a8..1af827529 100644 --- a/libcrux-ml-kem/src/mlkem512.rs +++ b/libcrux-ml-kem/src/mlkem512.rs @@ -3,31 +3,25 @@ use super::{constants::*, ind_cca::*, types::*, *}; // Kyber 512 parameters const RANK_512: usize = 2; -const RANKED_BYTES_PER_RING_ELEMENT_512: usize = 768; -const T_AS_NTT_ENCODED_SIZE_512: usize = 768; +const RANKED_BYTES_PER_RING_ELEMENT_512: usize = RANK_512 * BITS_PER_RING_ELEMENT / 8; +const T_AS_NTT_ENCODED_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8; const VECTOR_U_COMPRESSION_FACTOR_512: usize = 10; -// [hax]: hacspec/hacspec-v2#27 stealing error -// block_len::() -const C1_BLOCK_SIZE_512: usize = 320; -// [hax]: hacspec/hacspec-v2#27 stealing error -// serialized_len::() -const C1_SIZE_512: usize = 640; +const C1_BLOCK_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_U_COMPRESSION_FACTOR_512) / 8; +const C1_SIZE_512: usize = C1_BLOCK_SIZE_512 * RANK_512; const VECTOR_V_COMPRESSION_FACTOR_512: usize = 4; -// [hax]: hacspec/hacspec-v2#27 stealing error -// block_len::() -const C2_SIZE_512: usize = 128; -const CPA_PKE_SECRET_KEY_SIZE_512: usize = 768; -pub(crate) const CPA_PKE_PUBLIC_KEY_SIZE_512: usize = 800; -const CPA_PKE_CIPHERTEXT_SIZE_512: usize = 768; +const C2_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_V_COMPRESSION_FACTOR_512) / 8; +const CPA_PKE_SECRET_KEY_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8; +pub(crate) const CPA_PKE_PUBLIC_KEY_SIZE_512: usize = T_AS_NTT_ENCODED_SIZE_512 + 32; +const CPA_PKE_CIPHERTEXT_SIZE_512: usize = C1_SIZE_512 + C2_SIZE_512; -pub(crate) const SECRET_KEY_SIZE_512: usize = 1632; +pub(crate) const SECRET_KEY_SIZE_512: usize = CPA_PKE_SECRET_KEY_SIZE_512 + CPA_PKE_PUBLIC_KEY_SIZE_512 + H_DIGEST_SIZE + SHARED_SECRET_SIZE; const ETA1: usize = 3; -const ETA1_RANDOMNESS_SIZE: usize = 192; +const ETA1_RANDOMNESS_SIZE: usize = ETA1 * 64; const ETA2: usize = 2; -const ETA2_RANDOMNESS_SIZE: usize = 128; +const ETA2_RANDOMNESS_SIZE: usize = ETA2 * 64; -const IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = 800; +const IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = SHARED_SECRET_SIZE + CPA_PKE_CIPHERTEXT_SIZE_512; // Kyber 512 types /// An ML-KEM 512 Ciphertext diff --git a/libcrux-ml-kem/src/ntt.rs b/libcrux-ml-kem/src/ntt.rs index bb769cf1a..973a6d945 100644 --- a/libcrux-ml-kem/src/ntt.rs +++ b/libcrux-ml-kem/src/ntt.rs @@ -1,6 +1,6 @@ use crate::{ hax_utils::hax_debug_assert, - polynomial::{get_zeta, PolynomialRingElement, VECTORS_IN_RING_ELEMENT}, + polynomial::{zeta, PolynomialRingElement, VECTORS_IN_RING_ELEMENT}, vector::{montgomery_multiply_fe, Operations}, }; @@ -56,10 +56,10 @@ pub(crate) fn ntt_at_layer_1( (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); re.coefficients[round] = Vector::ntt_layer_1_step( re.coefficients[round], - get_zeta(*zeta_i), - get_zeta(*zeta_i + 1), - get_zeta(*zeta_i + 2), - get_zeta(*zeta_i + 3), + zeta(*zeta_i), + zeta(*zeta_i + 1), + zeta(*zeta_i + 2), + zeta(*zeta_i + 3), ); *zeta_i += 3; hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) @@ -116,8 +116,8 @@ pub(crate) fn ntt_at_layer_2( (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); re.coefficients[round] = Vector::ntt_layer_2_step( re.coefficients[round], - get_zeta(*zeta_i), - get_zeta(*zeta_i + 1), + zeta(*zeta_i), + zeta(*zeta_i + 1), ); *zeta_i += 1; hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) @@ -173,7 +173,7 @@ pub(crate) fn ntt_at_layer_3( (Spec.Utils.is_i16b_array_opaque (11207+3*3328) (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); re.coefficients[round] = - Vector::ntt_layer_3_step(re.coefficients[round], get_zeta(*zeta_i)); + Vector::ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i)); hax_lib::fstar!( "reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque (11207+4*3328) @@ -243,7 +243,7 @@ pub(crate) fn ntt_at_layer_4_plus( let (x, y) = ntt_layer_int_vec_step( re.coefficients[j], re.coefficients[j + step_vec], - get_zeta(*zeta_i), + zeta(*zeta_i), ); re.coefficients[j] = x; re.coefficients[j + step_vec] = y; diff --git a/libcrux-ml-kem/src/polynomial.rs b/libcrux-ml-kem/src/polynomial.rs index 9460a0cba..cb6f0fe8b 100644 --- a/libcrux-ml-kem/src/polynomial.rs +++ b/libcrux-ml-kem/src/polynomial.rs @@ -15,11 +15,12 @@ pub(crate) const ZETAS_TIMES_MONTGOMERY_R: [i16; 128] = { ] }; +// A function to retrieve zetas so that we can add a post-condition #[inline(always)] #[hax_lib::fstar::verification_status(panic_free)] #[hax_lib::requires(i < 128)] #[hax_lib::ensures(|result| fstar!("Spec.Utils.is_i16b 1664 result"))] -pub fn get_zeta(i: usize) -> i16 { +pub fn zeta(i: usize) -> i16 { ZETAS_TIMES_MONTGOMERY_R[i] } @@ -67,7 +68,6 @@ impl PolynomialRingElement { #[allow(non_snake_case)] pub(crate) fn ZERO() -> Self { Self { - // FIXME: The THIR body of item DefId(0:415 ~ libcrux_ml_kem[9000]::polynomial::{impl#0}::ZERO::{constant#0}) was stolen. coefficients: [Vector::ZERO(); 16], } } @@ -213,13 +213,13 @@ impl PolynomialRingElement { /// /// The NIST FIPS 203 standard can be found at /// . + // TODO: Remove or replace with something that works and is useful for the proof. // #[cfg_attr(hax, hax_lib::requires( // hax_lib::forall(|i:usize| // hax_lib::implies(i < COEFFICIENTS_IN_RING_ELEMENT, || // (lhs.coefficients[i] >= 0 && lhs.coefficients[i] < 4096) && // (rhs.coefficients[i].abs() <= FIELD_MODULUS) - // ))))] // #[cfg_attr(hax, hax_lib::ensures(|result| // hax_lib::forall(|i:usize| @@ -228,23 +228,18 @@ impl PolynomialRingElement { // ))))] #[inline(always)] pub(crate) fn ntt_multiply(&self, rhs: &Self) -> Self { - // Using `hax_lib::fstar::verification_status(lax)` works but produces an error while extracting hax_lib::fstar!("admit ()"); - // hax_debug_debug_assert!(lhs - // .coefficients - // .into_iter() - // .all(|coefficient| coefficient >= 0 && coefficient < 4096)); - + let mut out = PolynomialRingElement::ZERO(); for i in 0..VECTORS_IN_RING_ELEMENT { out.coefficients[i] = Vector::ntt_multiply( &self.coefficients[i], &rhs.coefficients[i], - get_zeta(64 + 4 * i), - get_zeta(64 + 4 * i + 1), - get_zeta(64 + 4 * i + 2), - get_zeta(64 + 4 * i + 3), + zeta(64 + 4 * i), + zeta(64 + 4 * i + 1), + zeta(64 + 4 * i + 2), + zeta(64 + 4 * i + 3), ); } diff --git a/libcrux-ml-kem/src/vector/avx2.rs b/libcrux-ml-kem/src/vector/avx2.rs index 9f3035fde..61c7ae159 100644 --- a/libcrux-ml-kem/src/vector/avx2.rs +++ b/libcrux-ml-kem/src/vector/avx2.rs @@ -285,7 +285,6 @@ impl Operations for SIMD256Vector { } #[requires(fstar!("Spec.MLKEM.serialize_pre 1 (impl.f_repr $vector)"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 1 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 1 (impl.f_repr $vector) $out"))] #[inline(always)] fn serialize_1(vector: Self) -> [u8; 2] { @@ -293,7 +292,6 @@ impl Operations for SIMD256Vector { } #[requires(bytes.len() == 2)] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 2 ==> Spec.MLKEM.deserialize_post 1 $bytes (impl.f_repr $out)"))] #[inline(always)] fn deserialize_1(bytes: &[u8]) -> Self { @@ -303,7 +301,6 @@ impl Operations for SIMD256Vector { } #[requires(fstar!("Spec.MLKEM.serialize_pre 4 (impl.f_repr $vector)"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 4 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 4 (impl.f_repr $vector) $out"))] #[inline(always)] fn serialize_4(vector: Self) -> [u8; 8] { @@ -311,7 +308,6 @@ impl Operations for SIMD256Vector { } #[requires(bytes.len() == 8)] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 8 ==> Spec.MLKEM.deserialize_post 4 $bytes (impl.f_repr $out)"))] #[inline(always)] fn deserialize_4(bytes: &[u8]) -> Self { @@ -336,7 +332,6 @@ impl Operations for SIMD256Vector { } #[requires(fstar!("Spec.MLKEM.serialize_pre 10 (impl.f_repr $vector)"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 10 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 10 (impl.f_repr $vector) $out"))] #[inline(always)] fn serialize_10(vector: Self) -> [u8; 20] { @@ -344,7 +339,6 @@ impl Operations for SIMD256Vector { } #[requires(bytes.len() == 20)] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 20 ==> Spec.MLKEM.deserialize_post 10 $bytes (impl.f_repr $out)"))] #[inline(always)] fn deserialize_10(bytes: &[u8]) -> Self { @@ -367,7 +361,6 @@ impl Operations for SIMD256Vector { } #[requires(fstar!("Spec.MLKEM.serialize_pre 12 (impl.f_repr $vector)"))] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("Spec.MLKEM.serialize_pre 12 (impl.f_repr $vector) ==> Spec.MLKEM.serialize_post 12 (impl.f_repr $vector) $out"))] #[inline(always)] fn serialize_12(vector: Self) -> [u8; 24] { @@ -375,7 +368,6 @@ impl Operations for SIMD256Vector { } #[requires(bytes.len() == 24)] - // Output name has be `out` https://github.com/hacspec/hax/issues/832 #[ensures(|out| fstar!("sz (Seq.length $bytes) =. sz 24 ==> Spec.MLKEM.deserialize_post 12 $bytes (impl.f_repr $out)"))] #[inline(always)] fn deserialize_12(bytes: &[u8]) -> Self { diff --git a/libcrux-ml-kem/src/vector/avx2/arithmetic.rs b/libcrux-ml-kem/src/vector/avx2/arithmetic.rs index 1032ee28d..8c9f3ae9a 100644 --- a/libcrux-ml-kem/src/vector/avx2/arithmetic.rs +++ b/libcrux-ml-kem/src/vector/avx2/arithmetic.rs @@ -94,11 +94,6 @@ pub(crate) fn shift_right(vector: Vec256) -> Vec256 { result } -// #[inline(always)] -// pub(crate) fn shift_left(vector: Vec256) -> Vec256 { -// mm256_slli_epi16::<{ SHIFT_BY }>(vector) -// } - #[inline(always)] #[cfg_attr(hax, hax_lib::fstar::options("--z3rlimit 100"))] #[hax_lib::requires(fstar!("Spec.Utils.is_i16b_array (pow2 12 - 1) (Libcrux_intrinsics.Avx2_extract.vec256_as_i16x16 $vector)"))] diff --git a/libcrux-ml-kem/src/vector/portable/ntt.rs b/libcrux-ml-kem/src/vector/portable/ntt.rs index 3cfafc9ea..46ef118d5 100644 --- a/libcrux-ml-kem/src/vector/portable/ntt.rs +++ b/libcrux-ml-kem/src/vector/portable/ntt.rs @@ -367,21 +367,6 @@ pub(crate) fn ntt_multiply_binomials( ); } -// #[inline(always)] -// pub(crate) fn ntt_multiply_binomials( -// (a0, a1): (FieldElement, FieldElement), -// (b0, b1): (FieldElement, FieldElement), -// zeta: FieldElementTimesMontgomeryR, -// ) -> (MontgomeryFieldElement, MontgomeryFieldElement) { -// ( -// montgomery_reduce_element( -// (a0 as i32) * (b0 as i32) -// + (montgomery_reduce_element((a1 as i32) * (b1 as i32)) as i32) * (zeta as i32), -// ), -// montgomery_reduce_element((a0 as i32) * (b1 as i32) + (a1 as i32) * (b0 as i32)), -// ) -// } - #[inline(always)] #[hax_lib::fstar::verification_status(panic_free)] #[hax_lib::fstar::options("--z3rlimit 100")] diff --git a/libcrux-ml-kem/src/vector/portable/serialize.rs b/libcrux-ml-kem/src/vector/portable/serialize.rs index 550ed5170..9a6522847 100644 --- a/libcrux-ml-kem/src/vector/portable/serialize.rs +++ b/libcrux-ml-kem/src/vector/portable/serialize.rs @@ -332,35 +332,6 @@ pub(crate) fn serialize_5_int(v: &[i16]) -> (u8, u8, u8, u8, u8) { (r0, r1, r2, r3, r4) } -// #[cfg_attr(hax, hax_lib::fstar::after(interface, " -// val serialize_5_lemma (inputs: Libcrux_ml_kem.Vector.Portable.Vector_type.t_PortableVector) : Lemma -// (requires (forall i. Rust_primitives.bounded (Seq.index inputs.f_elements i) 5)) -// (ensures bit_vec_of_int_t_array (${serialize_5} inputs) 8 == bit_vec_of_int_t_array inputs.f_elements 5) -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--z3rlimit 300\" - -// let serialize_5_lemma inputs = -// serialize_5_bit_vec_lemma inputs.f_elements (); -// BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${serialize_5} inputs) 8) -// (BitVecEq.retype (bit_vec_of_int_t_array inputs.f_elements 5)) - -// #pop-options -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\" - -// let serialize_5_bit_vec_lemma (v: t_Array i16 (sz 16)) -// (_: squash (forall i. Rust_primitives.bounded (Seq.index v i) 5)) -// : squash ( -// let inputs = bit_vec_of_int_t_array v 5 in -// let outputs = bit_vec_of_int_t_array (${serialize_5} ({ f_elements = v })) 8 in -// (forall (i: nat {i < 80}). inputs i == outputs i) -// ) = -// _ by (Tactics.GetBit.prove_bit_vector_equality' ()) - -// #pop-options -// "))] #[inline(always)] pub(crate) fn serialize_5(v: PortableVector) -> [u8; 10] { let r0_4 = serialize_5_int(&v.elements[0..8]); @@ -386,33 +357,6 @@ pub(crate) fn deserialize_5_int(bytes: &[u8]) -> (i16, i16, i16, i16, i16, i16, (v0, v1, v2, v3, v4, v5, v6, v7) } -// #[cfg_attr(hax, hax_lib::fstar::after(interface, " -// val deserialize_5_lemma (inputs: t_Array u8 (sz 10)) : Lemma -// (ensures bit_vec_of_int_t_array (${deserialize_5} inputs).f_elements 5 == bit_vec_of_int_t_array inputs 8) -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--z3rlimit 300\" - -// let deserialize_5_lemma inputs = -// deserialize_5_bit_vec_lemma inputs; -// BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${deserialize_5} inputs).f_elements 5) -// (BitVecEq.retype (bit_vec_of_int_t_array inputs 8)) - -// #pop-options -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\" - -// let deserialize_5_bit_vec_lemma (v: t_Array u8 (sz 10)) -// : squash ( -// let inputs = bit_vec_of_int_t_array v 8 in -// let outputs = bit_vec_of_int_t_array (${deserialize_5} v).f_elements 5 in -// (forall (i: nat {i < 80}). inputs i == outputs i) -// ) = -// _ by (Tactics.GetBit.prove_bit_vector_equality' ()) - -// #pop-options -// "))] #[hax_lib::requires(fstar!(r#" ${bytes.len() == 10} "#))] @@ -601,35 +545,6 @@ pub(crate) fn serialize_11_int(v: &[i16]) -> (u8, u8, u8, u8, u8, u8, u8, u8, u8 (r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10) } -// #[cfg_attr(hax, hax_lib::fstar::after(interface, " -// val serialize_11_lemma (inputs: Libcrux_ml_kem.Vector.Portable.Vector_type.t_PortableVector) : Lemma -// (requires (forall i. Rust_primitives.bounded (Seq.index inputs.f_elements i) 11)) -// (ensures bit_vec_of_int_t_array (${serialize_11} inputs) 8 == bit_vec_of_int_t_array inputs.f_elements 11) -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--z3rlimit 300\" - -// let serialize_11_lemma inputs = -// serialize_11_bit_vec_lemma inputs.f_elements (); -// BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${serialize_11} inputs) 8) -// (BitVecEq.retype (bit_vec_of_int_t_array inputs.f_elements 11)) - -// #pop-options -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\" - -// let serialize_11_bit_vec_lemma (v: t_Array i16 (sz 16)) -// (_: squash (forall i. Rust_primitives.bounded (Seq.index v i) 11)) -// : squash ( -// let inputs = bit_vec_of_int_t_array v 11 in -// let outputs = bit_vec_of_int_t_array (${serialize_11} ({ f_elements = v })) 8 in -// (forall (i: nat {i < 176}). inputs i == outputs i) -// ) = -// _ by (Tactics.GetBit.prove_bit_vector_equality' ()) - -// #pop-options -// "))] #[inline(always)] pub(crate) fn serialize_11(v: PortableVector) -> [u8; 22] { let r0_10 = serialize_11_int(&v.elements[0..8]); @@ -657,33 +572,6 @@ pub(crate) fn deserialize_11_int(bytes: &[u8]) -> (i16, i16, i16, i16, i16, i16, (r0, r1, r2, r3, r4, r5, r6, r7) } -// #[cfg_attr(hax, hax_lib::fstar::after(interface, " -// val deserialize_11_lemma (inputs: t_Array u8 (sz 22)) : Lemma -// (ensures bit_vec_of_int_t_array (${deserialize_11} inputs).f_elements 11 == bit_vec_of_int_t_array inputs 8) -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--z3rlimit 300\" - -// let deserialize_11_lemma inputs = -// deserialize_11_bit_vec_lemma inputs; -// BitVecEq.bit_vec_equal_intro (bit_vec_of_int_t_array (${deserialize_11} inputs).f_elements 11) -// (BitVecEq.retype (bit_vec_of_int_t_array inputs 8)) - -// #pop-options -// "))] -// #[cfg_attr(hax, hax_lib::fstar::after(" -// #push-options \"--compat_pre_core 2 --z3rlimit 300 --z3refresh\" - -// let deserialize_11_bit_vec_lemma (v: t_Array u8 (sz 22)) -// : squash ( -// let inputs = bit_vec_of_int_t_array v 8 in -// let outputs = bit_vec_of_int_t_array (${deserialize_11} v).f_elements 11 in -// (forall (i: nat {i < 176}). inputs i == outputs i) -// ) = -// _ by (Tactics.GetBit.prove_bit_vector_equality' ()) - -// #pop-options -// "))] #[hax_lib::requires(fstar!(r#" ${bytes.len() == 22} "#))] diff --git a/libcrux-ml-kem/src/vector/traits.rs b/libcrux-ml-kem/src/vector/traits.rs index 62e67a770..193d0edf6 100644 --- a/libcrux-ml-kem/src/vector/traits.rs +++ b/libcrux-ml-kem/src/vector/traits.rs @@ -5,6 +5,8 @@ pub const INVERSE_OF_MODULUS_MOD_MONTGOMERY_R: u32 = 62209; // FIELD_MODULUS^{-1 pub const BARRETT_SHIFT: i32 = 26; pub const BARRETT_R: i32 = 1 << BARRETT_SHIFT; +// We define a trait that allows us to talk about the contents of a vector. +// This is used extensively in pre- and post-conditions to reason about the code. #[cfg(hax)] #[hax_lib::attributes] pub trait Repr: Copy + Clone { From 0e587d6e842717408ea9357e00d47e372e505c80 Mon Sep 17 00:00:00 2001 From: Karthikeyan Bhargavan Date: Tue, 3 Dec 2024 19:44:12 +0100 Subject: [PATCH 2/7] assert to help proofs --- libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst b/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst index f598ee0ff..0ea02db6c 100644 --- a/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst +++ b/libcrux-ml-kem/proofs/fstar/spec/Spec.MLKEM.Instances.fst @@ -11,13 +11,14 @@ open Spec.MLKEM let mlkem768_rank : rank = sz 3 -#push-options "--z3rlimit 300" +#set-options "--z3rlimit 350" let mlkem768_generate_keypair (randomness:t_Array u8 (sz 64)): (t_Array u8 (sz 2400) & t_Array u8 (sz 1184)) & bool = ind_cca_generate_keypair mlkem768_rank randomness let mlkem768_encapsulate (public_key: t_Array u8 (sz 1184)) (randomness: t_Array u8 (sz 32)): (t_Array u8 (sz 1088) & t_Array u8 (sz 32)) & bool = + assert (v_CPA_CIPHERTEXT_SIZE mlkem768_rank == sz 1088); ind_cca_encapsulate mlkem768_rank public_key randomness let mlkem768_decapsulate (secret_key: t_Array u8 (sz 2400)) (ciphertext: t_Array u8 (sz 1088)): @@ -32,7 +33,6 @@ let mlkem1024_generate_keypair (randomness:t_Array u8 (sz 64)): (t_Array u8 (sz 3168) & t_Array u8 (sz 1568)) & bool = ind_cca_generate_keypair mlkem1024_rank randomness -#set-options "--z3rlimit 100" let mlkem1024_encapsulate (public_key: t_Array u8 (sz 1568)) (randomness: t_Array u8 (sz 32)): (t_Array u8 (sz 1568) & t_Array u8 (sz 32)) & bool = assert (v_CPA_CIPHERTEXT_SIZE mlkem1024_rank == sz 1568); From cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 Mon Sep 17 00:00:00 2001 From: karthikbhargavan Date: Tue, 3 Dec 2024 18:45:17 +0000 Subject: [PATCH 3/7] fmt --- libcrux-ml-kem/c/code_gen.txt | 10 +- libcrux-ml-kem/c/internal/libcrux_core.h | 10 +- .../c/internal/libcrux_mlkem_avx2.h | 10 +- .../c/internal/libcrux_mlkem_portable.h | 12 +- libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h | 10 +- .../c/internal/libcrux_sha3_internal.h | 78 +- libcrux-ml-kem/c/libcrux_core.c | 10 +- libcrux-ml-kem/c/libcrux_core.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_portable.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_portable.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem512.h | 70 +- libcrux-ml-kem/c/libcrux_mlkem512_avx2.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem512_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem512_portable.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem512_portable.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem768.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_avx2.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_portable.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_portable.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem_avx2.c | 1385 +++++++++++++---- libcrux-ml-kem/c/libcrux_mlkem_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem_portable.c | 455 ++++-- libcrux-ml-kem/c/libcrux_mlkem_portable.h | 10 +- libcrux-ml-kem/c/libcrux_sha3.h | 10 +- libcrux-ml-kem/c/libcrux_sha3_avx2.c | 52 +- libcrux-ml-kem/c/libcrux_sha3_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_sha3_internal.h | 16 +- libcrux-ml-kem/c/libcrux_sha3_neon.c | 30 +- libcrux-ml-kem/c/libcrux_sha3_neon.h | 10 +- libcrux-ml-kem/src/invert_ntt.rs | 7 +- libcrux-ml-kem/src/mlkem512.rs | 12 +- libcrux-ml-kem/src/ntt.rs | 10 +- libcrux-ml-kem/src/polynomial.rs | 4 +- 37 files changed, 1739 insertions(+), 642 deletions(-) diff --git a/libcrux-ml-kem/c/code_gen.txt b/libcrux-ml-kem/c/code_gen.txt index 420446603..8606206e0 100644 --- a/libcrux-ml-kem/c/code_gen.txt +++ b/libcrux-ml-kem/c/code_gen.txt @@ -1,6 +1,6 @@ This code was generated with the following revisions: -Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 -Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 -Karamel: 8c3612018c25889288da6857771be3ad03b75bcd -F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty -Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a +Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f +Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c +Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 +F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc +Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 diff --git a/libcrux-ml-kem/c/internal/libcrux_core.h b/libcrux-ml-kem/c/internal/libcrux_core.h index 69032a33e..fe0dc7d7d 100644 --- a/libcrux-ml-kem/c/internal/libcrux_core.h +++ b/libcrux-ml-kem/c/internal/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __internal_libcrux_core_H diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h index 9baf58ca5..48345a968 100644 --- a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h +++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __internal_libcrux_mlkem_avx2_H diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h index 7ba532d5e..e89d87311 100644 --- a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h +++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __internal_libcrux_mlkem_portable_H @@ -23,7 +23,7 @@ extern "C" { #include "internal/libcrux_core.h" #include "internal/libcrux_sha3_internal.h" -int16_t libcrux_ml_kem_polynomial_get_zeta(size_t i); +int16_t libcrux_ml_kem_polynomial_zeta(size_t i); #define LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT \ (LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / \ diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h index 0d99b2edd..78fe0a95b 100644 --- a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __internal_libcrux_sha3_avx2_H diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h index 6d47ffcbc..92381f50f 100644 --- a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h +++ b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __internal_libcrux_sha3_internal_H @@ -273,8 +273,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if (self->buf_len + input_len >= (size_t)136U) { - consumed = (size_t)136U - self->buf_len; + if ( + /* There's something buffered internally to consume. */ self->buf_len + + input_len >= + (size_t)136U) { + consumed = (size_t)136U - /* We have enough data when combining the + internal buffer and the input. */ + self->buf_len; { size_t i = (size_t)0U; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -380,7 +385,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs); - if (input_remainder_len > (size_t)0U) { + if ( + /* ... buffer the rest if there's not enough input (left). */ + input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); { size_t i = (size_t)0U; @@ -727,8 +734,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if (self->buf_len + input_len >= (size_t)168U) { - consumed = (size_t)168U - self->buf_len; + if ( + /* There's something buffered internally to consume. */ self->buf_len + + input_len >= + (size_t)168U) { + consumed = (size_t)168U - /* We have enough data when combining the + internal buffer and the input. */ + self->buf_len; { size_t i = (size_t)0U; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -834,7 +846,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs); - if (input_remainder_len > (size_t)0U) { + if ( + /* ... buffer the rest if there's not enough input (left). */ + input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); { size_t i = (size_t)0U; @@ -1224,7 +1238,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( size_t blocks = out_len / (size_t)136U; size_t last = out_len - out_len % (size_t)136U; size_t mid; - if ((size_t)136U >= out_len) { + if ((size_t)136U >= + /* Squeeze out one to start with. XXX: Eurydice does not extract + `core::cmp::min`, so we do this instead. (cf. + https://github.com/AeneasVerif/eurydice/issues/49) */ + out_len + + ) { mid = out_len; } else { mid = (size_t)136U; @@ -1238,8 +1258,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, - .end = blocks}), + (CLITERAL(core_ops_range_Range_08){ + .start = (size_t)1U, + .end = /* If we got asked for more than one block, squeeze out + more. */ + blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -1248,7 +1271,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we + always have full + blocks to write out. + */ + out_rest, (size_t)136U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); @@ -1343,7 +1370,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( size_t blocks = out_len / (size_t)168U; size_t last = out_len - out_len % (size_t)168U; size_t mid; - if ((size_t)168U >= out_len) { + if ((size_t)168U >= + /* Squeeze out one to start with. XXX: Eurydice does not extract + `core::cmp::min`, so we do this instead. (cf. + https://github.com/AeneasVerif/eurydice/issues/49) */ + out_len + + ) { mid = out_len; } else { mid = (size_t)168U; @@ -1357,8 +1390,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, - .end = blocks}), + (CLITERAL(core_ops_range_Range_08){ + .start = (size_t)1U, + .end = /* If we got asked for more than one block, squeeze out + more. */ + blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -1367,7 +1403,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we + always have full + blocks to write out. + */ + out_rest, (size_t)168U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); diff --git a/libcrux-ml-kem/c/libcrux_core.c b/libcrux-ml-kem/c/libcrux_core.c index 03c9cddb6..de354115a 100644 --- a/libcrux-ml-kem/c/libcrux_core.c +++ b/libcrux-ml-kem/c/libcrux_core.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "internal/libcrux_core.h" diff --git a/libcrux-ml-kem/c/libcrux_core.h b/libcrux-ml-kem/c/libcrux_core.h index f1e63c7a9..55c5c5d8e 100644 --- a/libcrux-ml-kem/c/libcrux_core.h +++ b/libcrux-ml-kem/c/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_core_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024.h b/libcrux-ml-kem/c/libcrux_mlkem1024.h index 6ba68daf6..37334a9b1 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem1024_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c index 6aa0b5776..778d6fbf3 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "libcrux_mlkem1024_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h index c662e3584..854751c45 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem1024_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c index bc4294748..e463cb267 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "libcrux_mlkem1024_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h index 90211f1e5..430c904d1 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem1024_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512.h b/libcrux-ml-kem/c/libcrux_mlkem512.h index d27735aa5..fb7755a5a 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem512_H @@ -21,28 +21,52 @@ extern "C" { #include "eurydice_glue.h" #include "libcrux_core.h" -#define LIBCRUX_ML_KEM_MLKEM512_C1_BLOCK_SIZE_512 ((size_t)320U) +#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_U_COMPRESSION_FACTOR_512 ((size_t)10U) + +#define LIBCRUX_ML_KEM_MLKEM512_C1_BLOCK_SIZE_512 \ + (LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT * \ + LIBCRUX_ML_KEM_MLKEM512_VECTOR_U_COMPRESSION_FACTOR_512 / (size_t)8U) + +#define LIBCRUX_ML_KEM_MLKEM512_RANK_512 ((size_t)2U) + +#define LIBCRUX_ML_KEM_MLKEM512_C1_SIZE_512 \ + (LIBCRUX_ML_KEM_MLKEM512_C1_BLOCK_SIZE_512 * LIBCRUX_ML_KEM_MLKEM512_RANK_512) + +#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_V_COMPRESSION_FACTOR_512 ((size_t)4U) -#define LIBCRUX_ML_KEM_MLKEM512_C1_SIZE_512 ((size_t)640U) +#define LIBCRUX_ML_KEM_MLKEM512_C2_SIZE_512 \ + (LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT * \ + LIBCRUX_ML_KEM_MLKEM512_VECTOR_V_COMPRESSION_FACTOR_512 / (size_t)8U) -#define LIBCRUX_ML_KEM_MLKEM512_C2_SIZE_512 ((size_t)128U) +#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_CIPHERTEXT_SIZE_512 \ + (LIBCRUX_ML_KEM_MLKEM512_C1_SIZE_512 + LIBCRUX_ML_KEM_MLKEM512_C2_SIZE_512) -#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_CIPHERTEXT_SIZE_512 ((size_t)768U) +#define LIBCRUX_ML_KEM_MLKEM512_T_AS_NTT_ENCODED_SIZE_512 \ + (LIBCRUX_ML_KEM_MLKEM512_RANK_512 * \ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT * \ + LIBCRUX_ML_KEM_CONSTANTS_BITS_PER_COEFFICIENT / (size_t)8U) -#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_PUBLIC_KEY_SIZE_512 ((size_t)800U) +#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_PUBLIC_KEY_SIZE_512 \ + (LIBCRUX_ML_KEM_MLKEM512_T_AS_NTT_ENCODED_SIZE_512 + (size_t)32U) -#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_SECRET_KEY_SIZE_512 ((size_t)768U) +#define LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_SECRET_KEY_SIZE_512 \ + (LIBCRUX_ML_KEM_MLKEM512_RANK_512 * \ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT * \ + LIBCRUX_ML_KEM_CONSTANTS_BITS_PER_COEFFICIENT / (size_t)8U) #define LIBCRUX_ML_KEM_MLKEM512_ETA1 ((size_t)3U) -#define LIBCRUX_ML_KEM_MLKEM512_ETA1_RANDOMNESS_SIZE ((size_t)192U) +#define LIBCRUX_ML_KEM_MLKEM512_ETA1_RANDOMNESS_SIZE \ + (LIBCRUX_ML_KEM_MLKEM512_ETA1 * (size_t)64U) #define LIBCRUX_ML_KEM_MLKEM512_ETA2 ((size_t)2U) -#define LIBCRUX_ML_KEM_MLKEM512_ETA2_RANDOMNESS_SIZE ((size_t)128U) +#define LIBCRUX_ML_KEM_MLKEM512_ETA2_RANDOMNESS_SIZE \ + (LIBCRUX_ML_KEM_MLKEM512_ETA2 * (size_t)64U) #define LIBCRUX_ML_KEM_MLKEM512_IMPLICIT_REJECTION_HASH_INPUT_SIZE \ - ((size_t)800U) + (LIBCRUX_ML_KEM_CONSTANTS_SHARED_SECRET_SIZE + \ + LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_CIPHERTEXT_SIZE_512) typedef libcrux_ml_kem_types_MlKemCiphertext_1a libcrux_ml_kem_mlkem512_MlKem512Ciphertext; @@ -56,17 +80,15 @@ typedef libcrux_ml_kem_types_MlKemPrivateKey_fa typedef libcrux_ml_kem_types_MlKemPublicKey_52 libcrux_ml_kem_mlkem512_MlKem512PublicKey; -#define LIBCRUX_ML_KEM_MLKEM512_RANKED_BYTES_PER_RING_ELEMENT_512 ((size_t)768U) +#define LIBCRUX_ML_KEM_MLKEM512_RANKED_BYTES_PER_RING_ELEMENT_512 \ + (LIBCRUX_ML_KEM_MLKEM512_RANK_512 * \ + LIBCRUX_ML_KEM_CONSTANTS_BITS_PER_RING_ELEMENT / (size_t)8U) -#define LIBCRUX_ML_KEM_MLKEM512_RANK_512 ((size_t)2U) - -#define LIBCRUX_ML_KEM_MLKEM512_SECRET_KEY_SIZE_512 ((size_t)1632U) - -#define LIBCRUX_ML_KEM_MLKEM512_T_AS_NTT_ENCODED_SIZE_512 ((size_t)768U) - -#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_U_COMPRESSION_FACTOR_512 ((size_t)10U) - -#define LIBCRUX_ML_KEM_MLKEM512_VECTOR_V_COMPRESSION_FACTOR_512 ((size_t)4U) +#define LIBCRUX_ML_KEM_MLKEM512_SECRET_KEY_SIZE_512 \ + (LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_SECRET_KEY_SIZE_512 + \ + LIBCRUX_ML_KEM_MLKEM512_CPA_PKE_PUBLIC_KEY_SIZE_512 + \ + LIBCRUX_ML_KEM_CONSTANTS_H_DIGEST_SIZE + \ + LIBCRUX_ML_KEM_CONSTANTS_SHARED_SECRET_SIZE) #if defined(__cplusplus) } diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c index b45c8295b..3e9fbd0cc 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "libcrux_mlkem512_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h index d5ec40d83..79012290d 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem512_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c index 2fc72d307..8639c4603 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "libcrux_mlkem512_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h index 6e3d9755b..faea31c8a 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem512_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768.h b/libcrux-ml-kem/c/libcrux_mlkem768.h index bcfb76ff3..474b96082 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem768_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c index fdf226bd8..a7a0f7e7d 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "libcrux_mlkem768_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h index 08c3fa5b7..35608499b 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem768_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c index c59bc0046..2d21b9d89 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "libcrux_mlkem768_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h index 03f9d22a4..514894426 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem768_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c index 61f343a77..64e5d2462 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "internal/libcrux_mlkem_avx2.h" @@ -141,11 +141,16 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) { __m256i field_modulus = mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); - __m256i v_minus_field_modulus = mm256_sub_epi16(vector, field_modulus); + __m256i v_minus_field_modulus = + mm256_sub_epi16(/* Compute v_i - Q and crate a mask from the sign bit of + each of these quantities. */ + vector, + field_modulus); __m256i sign_mask = mm256_srai_epi16((int32_t)15, v_minus_field_modulus, __m256i); - __m256i conditional_add_field_modulus = - mm256_and_si256(sign_mask, field_modulus); + __m256i conditional_add_field_modulus = mm256_and_si256( + /* If v_i - Q < 0 then add back Q to (v_i - Q). */ sign_mask, + field_modulus); return mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus); } @@ -450,6 +455,7 @@ libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(__m256i vec) { KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2, int16_t zeta3) { + /* Compute the first term of the product */ __m256i shuffle_with = mm256_set_epi8( (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, @@ -457,7 +463,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4, (int8_t)1, (int8_t)0); - __m256i lhs_shuffled = mm256_shuffle_epi8(lhs, shuffle_with); + __m256i lhs_shuffled = + mm256_shuffle_epi8(/* Prepare the left hand side */ lhs, shuffle_with); __m256i lhs_shuffled0 = mm256_permute4x64_epi64((int32_t)216, lhs_shuffled, __m256i); __m128i lhs_evens = mm256_castsi256_si128(lhs_shuffled0); @@ -465,7 +472,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i lhs_odds = mm256_extracti128_si256((int32_t)1, lhs_shuffled0, __m128i); __m256i lhs_odds0 = mm256_cvtepi16_epi32(lhs_odds); - __m256i rhs_shuffled = mm256_shuffle_epi8(rhs, shuffle_with); + __m256i rhs_shuffled = + mm256_shuffle_epi8(/* Prepare the right hand side */ rhs, shuffle_with); __m256i rhs_shuffled0 = mm256_permute4x64_epi64((int32_t)216, rhs_shuffled, __m256i); __m128i rhs_evens = mm256_castsi256_si128(rhs_shuffled0); @@ -473,7 +481,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i rhs_odds = mm256_extracti128_si256((int32_t)1, rhs_shuffled0, __m128i); __m256i rhs_odds0 = mm256_cvtepi16_epi32(rhs_odds); - __m256i left = mm256_mullo_epi32(lhs_evens0, rhs_evens0); + __m256i left = + mm256_mullo_epi32(/* Start operating with them */ lhs_evens0, rhs_evens0); __m256i right = mm256_mullo_epi32(lhs_odds0, rhs_odds0); __m256i right0 = libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(right); @@ -486,7 +495,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s( products_left); __m256i rhs_adjacent_swapped = mm256_shuffle_epi8( - rhs, + /* Compute the second term of the product */ rhs, mm256_set_epi8((int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6, (int8_t)1, (int8_t)0, (int8_t)3, @@ -500,8 +509,9 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( products_right); __m256i products_right1 = mm256_slli_epi32((int32_t)16, products_right0, __m256i); - return mm256_blend_epi16((int32_t)170, products_left0, products_right1, - __m256i); + return mm256_blend_epi16((int32_t)170, + /* Combine them into one vector */ products_left0, + products_right1, __m256i); } /** @@ -517,11 +527,44 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09( KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1( __m256i vector, uint8_t ret[2U]) { - __m256i lsb_to_msb = mm256_slli_epi16((int32_t)15, vector, __m256i); - __m128i low_msbs = mm256_castsi256_si128(lsb_to_msb); - __m128i high_msbs = mm256_extracti128_si256((int32_t)1, lsb_to_msb, __m128i); - __m128i msbs = mm_packs_epi16(low_msbs, high_msbs); - int32_t bits_packed = mm_movemask_epi8(msbs); + __m256i lsb_to_msb = mm256_slli_epi16( + (int32_t)15, + /* Suppose |vector| is laid out as follows (superscript number indicates + the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀ + 0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least + significant bit in each lane, move it to the most significant position + to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ + d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ + n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */ + vector, __m256i); + __m128i low_msbs = mm256_castsi256_si128( + /* Get the first 8 16-bit elements ... */ lsb_to_msb); + __m128i high_msbs = mm256_extracti128_si256( + (int32_t)1, + /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i); + __m128i msbs = + mm_packs_epi16(/* ... and then pack them into 8-bit values using signed + saturation. This function packs all the |low_msbs|, and + then the high ones. low_msbs = a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | + e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ + l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ We shifted by 15 above + to take advantage of the signed saturation performed by + mm_packs_epi16: - if the sign bit of the 16-bit element + being packed is 1, the corresponding 8-bit element in + |msbs| will be 0xFF. - if the sign bit of the 16-bit + element being packed is 0, the corresponding 8-bit + element in |msbs| will be 0. Thus, if, for example, a₀ = + 1, e₀ = 1, and p₀ = 1, and every other bit is 0, after + packing into 8 bit value, |msbs| will look like: 0xFF + 0x00 0x00 0x00 | 0xFF 0x00 0x00 0x00 | 0x00 0x00 0x00 + 0x00 | 0x00 0x00 0x00 0xFF */ + low_msbs, + high_msbs); + int32_t bits_packed = + mm_movemask_epi8(/* Now that every element is either 0xFF or 0x00, we just + extract the most significant bit from each element and + collate them into two bytes. */ + msbs); uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)}; memcpy(ret, result, (size_t)2U * sizeof(uint8_t)); } @@ -539,16 +582,39 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s( int16_t a, int16_t b) { __m256i coefficients = - mm256_set_epi16(b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); - __m256i coefficients_in_msb = mm256_mullo_epi16( - coefficients, - mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U, - (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U, - (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U, - (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U, - (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U, - (int16_t)-32768)); - return mm256_srli_epi16((int32_t)15, coefficients_in_msb, __m256i); + mm256_set_epi16(/* We need to take each bit from the 2 bytes of input and + put them into their own 16-bit lane. Ideally, we'd load + the two bytes into the vector, duplicate them, and + right-shift the 0th element by 0 bits, the first + element by 1 bit, the second by 2 bits and so on before + AND-ing with 0x1 to leave only the least signifinicant + bit. But since |_mm256_srlv_epi16| does not exist, so + we have to resort to a workaround. Rather than shifting + each element by a different amount, we'll multiply each + element by a value such that the bit we're interested + in becomes the most significant bit. The coefficients + are loaded as follows: */ + b, + b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); + __m256i coefficients_in_msb = + mm256_mullo_epi16(/* And this vector, when multiplied with the previous + one, ensures that the bit we'd like to keep in each + lane becomes the most significant bit upon + multiplication. */ + coefficients, + mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, + (int16_t)1 << 10U, (int16_t)1 << 11U, + (int16_t)1 << 12U, (int16_t)1 << 13U, + (int16_t)1 << 14U, (int16_t)-32768, + (int16_t)1 << 8U, (int16_t)1 << 9U, + (int16_t)1 << 10U, (int16_t)1 << 11U, + (int16_t)1 << 12U, (int16_t)1 << 13U, + (int16_t)1 << 14U, (int16_t)-32768)); + return mm256_srli_epi16( + (int32_t)15, + /* Now that they're all in the most significant bit position, shift them + down to the least significant bit. */ + coefficients_in_msb, __m256i); } KRML_MUSTINLINE __m256i @@ -561,7 +627,23 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( - Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index( + bytes, + /* We need to take each bit from the 2 bytes of input and put them + into their own 16-bit lane. Ideally, we'd load the two bytes into + the vector, duplicate them, and right-shift the 0th element by 0 + bits, the first element by 1 bit, the second by 2 bits and so on + before AND-ing with 0x1 to leave only the least signifinicant bit. + But since |_mm256_srlv_epi16| does not exist, so we have to resort + to a workaround. Rather than shifting each element by a different + amount, we'll multiply each element by a value such that the bit + we're interested in becomes the most significant bit. The + coefficients are loaded as follows: And this vector, when + multiplied with the previous one, ensures that the bit we'd like to + keep in each lane becomes the most significant bit upon + multiplication. Now that they're all in the most significant bit + position, shift them down to the least significant bit. */ + (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *)); } @@ -594,23 +676,47 @@ KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4( __m256i vector, uint8_t ret[8U]) { uint8_t serialized[16U] = {0U}; __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector); - __m256i adjacent_8_combined = mm256_shuffle_epi8( - adjacent_2_combined, - mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, - (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, - (int8_t)4, (int8_t)0)); - __m256i combined = mm256_permutevar8x32_epi32( - adjacent_8_combined, - mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( + 4U, + /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D | + 0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be + laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA + 0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */ + vector); + __m256i adjacent_8_combined = + mm256_shuffle_epi8(/* Recall that |adjacent_2_combined| goes as follows: + 0x00_00_00_BA 0x00_00_00_DC | 0x00_00_00_FE + 0x00_00_00_HG | ... Out of this, we only need the + first byte, the 4th byte, the 8th byte and so on + from the bottom and the top 128 bits. */ + adjacent_2_combined, + mm256_set_epi8( + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0)); + __m256i combined = + mm256_permutevar8x32_epi32(/* |adjacent_8_combined| looks like this: 0: + 0xHG_FE_DC_BA 1: 0x00_00_00_00 | 2: + 0x00_00_00_00 3: 0x00_00_00_00 | 4: + 0xPO_NM_LK_JI .... We put the element at 4 + after the element at 0 ... */ + adjacent_8_combined, + mm256_set_epi32((int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)0, + (int32_t)4, (int32_t)0)); __m128i combined0 = mm256_castsi256_si128(combined); mm_storeu_bytes_si128( - Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0); + Eurydice_array_to_slice( + (size_t)16U, + /* ... so that we can read them out in one go. */ serialized, + uint8_t), + combined0); uint8_t ret0[8U]; core_result_Result_15 dst; Eurydice_slice_to_array2( @@ -634,8 +740,23 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5, int16_t b6, int16_t b7) { - __m256i coefficients = mm256_set_epi16(b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, - b2, b2, b1, b1, b0, b0); + __m256i coefficients = + mm256_set_epi16(/* Every 4 bits from each byte of input should be put into + its own 16-bit lane. Since |_mm256_srlv_epi16| does not + exist, we have to resort to a workaround. Rather than + shifting each element by a different amount, we'll + multiply each element by a value such that the bits + we're interested in become the most significant bits + (of an 8-bit value). In this lane, the 4 bits we need + to put are already the most significant bits of + |bytes[7]| (that is, b7). */ + b7, + /* In this lane, the 4 bits we need to put are the least + significant bits, so we need to shift the 4 + least-significant bits of |b7| to the most significant + bits (of an 8-bit value). */ + b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, + b0); __m256i coefficients_in_msb = mm256_mullo_epi16( coefficients, mm256_set_epi16((int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, @@ -644,9 +765,12 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U)); - __m256i coefficients_in_lsb = - mm256_srli_epi16((int32_t)4, coefficients_in_msb, __m256i); - return mm256_and_si256(coefficients_in_lsb, + __m256i coefficients_in_lsb = mm256_srli_epi16( + (int32_t)4, + /* Once the 4-bit coefficients are in the most significant positions (of + an 8-bit value), shift them all down by 4. */ + coefficients_in_msb, __m256i); + return mm256_and_si256(/* Zero the remaining bits. */ coefficients_in_lsb, mm256_set1_epi16(((int16_t)1 << 4U) - (int16_t)1)); } @@ -662,7 +786,23 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( - Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index( + bytes, + /* Every 4 bits from each byte of input should be put into its own + 16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to + resort to a workaround. Rather than shifting each element by a + different amount, we'll multiply each element by a value such that + the bits we're interested in become the most significant bits (of + an 8-bit value). In this lane, the 4 bits we need to put are + already the most significant bits of |bytes[7]| (that is, b7). In + this lane, the 4 bits we need to put are the least significant + bits, so we need to shift the 4 least-significant bits of |b7| to + the most significant bits (of an 8-bit value). These constants are + chosen to shift the bits of the values that we loaded into + |coefficients|. Once the 4-bit coefficients are in the most + significant positions (of an 8-bit value), shift them all down + by 4. Zero the remaining bits. */ + (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *), @@ -684,32 +824,78 @@ libcrux_ml_kem_vector_avx2_deserialize_4_09(Eurydice_slice bytes) { KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5( __m256i vector, uint8_t ret[10U]) { uint8_t serialized[32U] = {0U}; - __m256i adjacent_2_combined = mm256_madd_epi16( - vector, mm256_set_epi16( - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1)); - __m256i adjacent_4_combined = mm256_sllv_epi32( - adjacent_2_combined, - mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22)); - __m256i adjacent_4_combined0 = - mm256_srli_epi64((int32_t)22, adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = - mm256_shuffle_epi32((int32_t)8, adjacent_4_combined0, __m256i); - __m256i adjacent_8_combined0 = mm256_sllv_epi32( - adjacent_8_combined, - mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12)); + __m256i adjacent_2_combined = + mm256_madd_epi16(/* If |vector| is laid out as follows (superscript number + indicates the corresponding bit is duplicated that + many times): 0¹¹a₄a₃a₂a₁a₀ 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀ + 0¹¹d₄d₃d₂d₁d₀ | ↩ 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀ + 0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | ↩ |adjacent_2_combined| + will be laid out as a series of 32-bit integers, as + follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ + 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ + 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... */ + vector, + mm256_set_epi16( + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, + (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, + (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, + (int16_t)1)); + __m256i adjacent_4_combined = + mm256_sllv_epi32(/* Recall that |adjacent_2_combined| is laid out as + follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ + 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ + 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... This shift results + in: b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | + ↩ f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ + .... */ + adjacent_2_combined, + mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, + (int32_t)22, (int32_t)0, (int32_t)22, + (int32_t)0, (int32_t)22)); + __m256i adjacent_4_combined0 = mm256_srli_epi64( + (int32_t)22, + /* |adjacent_4_combined|, when viewed as 64-bit lanes, is: + 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩ + 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift + down by 22 bits to remove the least significant 0 bits that aren't part + of the bits we need. */ + adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = mm256_shuffle_epi32( + (int32_t)8, + /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks + like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³² + 2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to + read out the bytes in one go, we need to shifts the bits in position 2 + to position 1 in each 128-bit lane. */ + adjacent_4_combined0, __m256i); + __m256i adjacent_8_combined0 = + mm256_sllv_epi32(/* |adjacent_8_combined|, when viewed as a set of 32-bit + values, now looks like: + 0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ + 0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 0³² 0³² | + ↩ Once again, we line these bits up by shifting the up + values at indices 0 and 5 by 12, viewing the resulting + register as a set of 64-bit values, and then shifting + down the 64-bit values by 12 bits. */ + adjacent_8_combined, + mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, + (int32_t)12, (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)12)); __m256i adjacent_8_combined1 = mm256_srli_epi64((int32_t)12, adjacent_8_combined0, __m256i); - __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined1); + __m128i lower_8 = + mm256_castsi256_si128(/* We now have 40 bits starting at position 0 in the + lower 128-bit lane, ... */ + adjacent_8_combined1); mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t), lower_8); - __m128i upper_8 = - mm256_extracti128_si256((int32_t)1, adjacent_8_combined1, __m128i); + __m128i upper_8 = mm256_extracti128_si256( + (int32_t)1, + /* ... and the second 40 bits at position 0 in the upper 128-bit lane */ + adjacent_8_combined1, __m128i); mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t), upper_8); @@ -803,25 +989,67 @@ core_core_arch_x86___m128i_x2 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( __m256i vector) { __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector); - __m256i adjacent_4_combined = mm256_sllv_epi32( - adjacent_2_combined, - mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( + 10U, + /* If |vector| is laid out as follows (superscript number indicates + the corresponding bit is duplicated that many times): + 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ + 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩ + 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ + 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ... + |adjacent_2_combined| will be laid out as a series of 32-bit + integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ + 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */ + vector); + __m256i adjacent_4_combined = + mm256_sllv_epi32(/* Shifting up the values at the even indices by 12, we + get: b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ + f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ ... */ + adjacent_2_combined, + mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, + (int32_t)12, (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)12)); __m256i adjacent_4_combined0 = - mm256_srli_epi64((int32_t)12, adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = mm256_shuffle_epi8( - adjacent_4_combined0, - mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, - (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, - (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, - (int8_t)9, (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, - (int8_t)1, (int8_t)0)); - __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined); - __m128i upper_8 = - mm256_extracti128_si256((int32_t)1, adjacent_8_combined, __m128i); + mm256_srli_epi64((int32_t)12, + /* Viewing this as a set of 64-bit integers we get: + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + | ↩ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + | ↩ ... Shifting down by 12 gives us: + 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + | ↩ + 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + | ↩ ... */ + adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = + mm256_shuffle_epi8(/* |adjacent_4_combined|, when the bottom and top 128 + bit-lanes are grouped into bytes, looks like: + 0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ In + each 128-bit lane, we want to put bytes 8, 9, 10, + 11, 12 after bytes 0, 1, 2, 3 to allow for + sequential reading. */ + adjacent_4_combined0, + mm256_set_epi8( + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, + (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, + (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, + (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, + (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0)); + __m128i lower_8 = + mm256_castsi256_si128(/* We now have 64 bits starting at position 0 in the + lower 128-bit lane, ... */ + adjacent_8_combined); + __m128i upper_8 = mm256_extracti128_si256( + (int32_t)1, + /* and 64 bits starting at position 0 in the upper 128-bit lane. */ + adjacent_8_combined, __m128i); return ( CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8}); } @@ -829,8 +1057,167 @@ libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10( __m256i vector, uint8_t ret[20U]) { core_core_arch_x86___m128i_x2 uu____0 = - libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( - vector); + libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If + |vector| + is + laid + out + as + follows + (superscript + number + indicates + the + corresponding + bit + is + duplicated + that + many + times): + 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ + 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ + 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ + | ↩ + 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ + 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ + 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ + | ↩ + ... + |adjacent_2_combined| + will + be + laid + out + as a + series + of + 32-bit + integers, + as + follows: + 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ + | ↩ + 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ + | ↩ + .... + Shifting + up + the + values + at + the + even + indices + by + 12, + we + get: + b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ + | ↩ + f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ + | ↩ + ... + Viewing + this + as a + set + of + 64-bit + integers + we + get: + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + | ↩ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + | ↩ + ... + Shifting + down + by + 12 + gives + us: + 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + | ↩ + 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + | ↩ + ... + |adjacent_4_combined|, + when + the + bottom + and + top + 128 + bit-lanes + are + grouped + into + bytes, + looks + like: + 0₇0₆0₅B₄B₃B₂B₁B₀ + | ↩ + 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ + | ↩ + In + each + 128-bit + lane, + we + want + to + put + bytes + 8, + 9, + 10, + 11, + 12 + after + bytes + 0, + 1, + 2, 3 + to + allow + for + sequential + reading. + We + now + have + 64 + bits + starting + at + position + 0 in + the + lower + 128-bit + lane, + ... + and + 64 + bits + starting + at + position + 0 in + the + upper + 128-bit + lane. + */ + vector); __m128i lower_8 = uu____0.fst; __m128i upper_8 = uu____0.snd; uint8_t serialized[32U] = {0U}; @@ -880,14 +1267,16 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( (int16_t)1 << 0U, (int16_t)1 << 2U, (int16_t)1 << 4U, (int16_t)1 << 6U)); __m256i coefficients1 = mm256_srli_epi16((int32_t)6, coefficients0, __m256i); - return mm256_and_si256(coefficients1, - mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1)); + return mm256_and_si256( + /* Here I can prove this `and` is not useful */ coefficients1, + mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1)); } KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) { - Eurydice_slice lower_coefficients = - Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t); + Eurydice_slice lower_coefficients = Eurydice_slice_subslice2( + /* Here I can prove this `and` is not useful */ bytes, (size_t)0U, + (size_t)16U, uint8_t); Eurydice_slice upper_coefficients = Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t); return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( @@ -1053,26 +1442,64 @@ KRML_MUSTINLINE size_t libcrux_ml_kem_vector_avx2_sampling_rejection_sample( __m256i field_modulus = mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i potential_coefficients = - libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input); + libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can + be interpreted as a + sequence of + serialized 12-bit + (i.e. uncompressed) + coefficients. Not + all coefficients + may be less than + FIELD_MODULUS + though. */ + input); __m256i compare_with_field_modulus = - mm256_cmpgt_epi16(field_modulus, potential_coefficients); + mm256_cmpgt_epi16(/* Suppose we view |potential_coefficients| as follows + (grouping 64-bit elements): A B C D | E F G H | .... + and A < 3329, D < 3329 and H < 3329, + |compare_with_field_modulus| will look like: 0xFF 0 0 + 0xFF | 0 0 0 0xFF | ... */ + field_modulus, + potential_coefficients); uint8_t good[2U]; - libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus, + libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each + lane is either 0 or 1, we + only need one bit from + each lane in the register + to tell us what + coefficients to keep and + what to throw-away. + Combine all the bits + (there are 16) into two + bytes. */ + compare_with_field_modulus, good); uint8_t lower_shuffles[16U]; memcpy(lower_shuffles, + /* Each bit (and its corresponding position) represents an element we + want to sample. We'd like all such elements to be next to each other + starting at index 0, so that they can be read from the vector + easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level + shuffling indices needed to make this happen. For e.g. if good[0] = + 0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit + lane to the first. To do this, we need the byte-level shuffle + indices to be 2 3 X X X X ... */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[0U]], (size_t)16U * sizeof(uint8_t)); - __m128i lower_shuffles0 = mm_loadu_si128( - Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t)); + __m128i lower_shuffles0 = mm_loadu_si128(Eurydice_array_to_slice( + (size_t)16U, + /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles, + uint8_t)); __m128i lower_coefficients = mm256_castsi256_si128(potential_coefficients); __m128i lower_coefficients0 = mm_shuffle_epi8(lower_coefficients, lower_shuffles0); - mm_storeu_si128(output, lower_coefficients0); + mm_storeu_si128(/* ... then write them out ... */ output, + lower_coefficients0); size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]); uint8_t upper_shuffles[16U]; memcpy(upper_shuffles, + /* Do the same for |goood[1]| */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[1U]], (size_t)16U * sizeof(uint8_t)); @@ -1425,9 +1852,13 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; - H_a9_e0(Eurydice_array_to_subslice2( - private_key->value, (size_t)384U * (size_t)3U, - (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), + H_a9_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly + on the types. We need to go to the + `value` directly. */ + private_key->value, + (size_t)384U * (size_t)3U, + (size_t)768U * (size_t)3U + (size_t)32U, + uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)3U + (size_t)32U, @@ -1914,6 +2345,10 @@ static KRML_MUSTINLINE void sample_from_xof_6c1( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -1972,7 +2407,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c1( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -2187,7 +2622,12 @@ with const generics static KRML_MUSTINLINE void ntt_at_layer_7_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; i < step; i++) { + for (size_t i = (size_t)0U; + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + step; + i++) { size_t j = i; __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09( re->coefficients[j + step], (int16_t)-1600); @@ -2239,7 +2679,13 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2248,9 +2694,9 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_61( for (size_t i = offset_vec; i < offset_vec + step_vec; i++) { size_t j = i; libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 = - ntt_layer_int_vec_step_61( - re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + ntt_layer_int_vec_step_61(re->coefficients[j], + re->coefficients[j + step_vec], + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); __m256i x = uu____0.fst; __m256i y = uu____0.snd; re->coefficients[j] = x; @@ -2272,7 +2718,7 @@ static KRML_MUSTINLINE void ntt_at_layer_3_61( zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_3_step_09( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));); } /** @@ -2287,9 +2733,8 @@ static KRML_MUSTINLINE void ntt_at_layer_2_61( i, (size_t)0U, (size_t)16U, (size_t)1U, size_t round = i; zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_2_step_09( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U)); zeta_i[0U] = zeta_i[0U] + (size_t)1U;); } @@ -2305,11 +2750,10 @@ static KRML_MUSTINLINE void ntt_at_layer_1_61( i, (size_t)0U, (size_t)16U, (size_t)1U, size_t round = i; zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_1_step_09( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U)); zeta_i[0U] = zeta_i[0U] + (size_t)3U;); } @@ -2327,7 +2771,11 @@ with const generics static KRML_MUSTINLINE void poly_barrett_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]); @@ -2342,7 +2790,9 @@ with const generics */ static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { - ntt_at_layer_7_61(re); + ntt_at_layer_7_61(/* Due to the small coefficient bound, we can skip the first + round of Montgomery reductions. */ + re); size_t zeta_i = (size_t)1U; ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U); ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)5U); @@ -2449,13 +2899,13 @@ ntt_multiply_ef_61(libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, size_t i0 = i; out.coefficients[i0] = libcrux_ml_kem_vector_avx2_ntt_multiply_09( &self->coefficients[i0], &rhs->coefficients[i0], - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)3U)); + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)1U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)2U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)3U)); } return out; } @@ -2475,9 +2925,14 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, self->coefficients, __m256i), - __m256i); + i < + Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, + /* The semicolon and parentheses at the end of + loop are a workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -2511,10 +2966,17 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; - __m256i coefficient_normal_form = - to_standard_domain_61(self->coefficients[j]); + __m256i coefficient_normal_form = to_standard_domain_61( + self->coefficients[/* The coefficients are of the form aR^{-1} mod q, + which means calling to_montgomery_domain() on them + should return a mod q. */ + j]); self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09( libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &error->coefficients[j])); @@ -2544,6 +3006,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_ab( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -2619,7 +3083,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221( IndCpaPrivateKeyUnpacked_63 *private_key, IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_be(key_generation_seed, hashed); + cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for + ML-KEM */ + key_generation_seed, + hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -2649,8 +3116,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221( sample_vector_cbd_then_ntt_out_b41(copy_of_prf_input, domain_separator) .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_ab(public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_ab(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, + public_key->A, private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -2675,11 +3142,13 @@ serialize_unpacked_secret_key_8c(IndCpaPublicKeyUnpacked_63 *public_key, IndCpaPrivateKeyUnpacked_63 *private_key) { uint8_t public_key_serialized[1184U]; serialize_public_key_ed( - public_key->t_as_ntt, + /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1152U]; - serialize_secret_key_ed(private_key->secret_as_ntt, secret_key_serialized); + serialize_secret_key_ed( + /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, + secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -2866,11 +3335,15 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa1( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); deserialize_ring_elements_reduced_ab(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, + (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -2979,10 +3452,10 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_1_61( re->coefficients[round] = libcrux_ml_kem_vector_avx2_inv_ntt_layer_1_step_09( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U)); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U)); zeta_i[0U] = zeta_i[0U] - (size_t)3U;); } @@ -3000,8 +3473,8 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_2_61( re->coefficients[round] = libcrux_ml_kem_vector_avx2_inv_ntt_layer_2_step_09( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U)); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U)); zeta_i[0U] = zeta_i[0U] - (size_t)1U;); } @@ -3018,7 +3491,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_3_61( re->coefficients[round] = libcrux_ml_kem_vector_avx2_inv_ntt_layer_3_step_09( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]));); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]));); } /** @@ -3047,7 +3520,13 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3060,7 +3539,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_61( libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 = inv_ntt_layer_int_vec_step_reduce_61( re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); __m256i x = uu____0.fst; __m256i y = uu____0.snd; re->coefficients[j] = x; @@ -3078,7 +3557,10 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -3104,7 +3586,11 @@ static KRML_MUSTINLINE void add_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( @@ -3217,8 +3703,26 @@ add_message_error_reduce_ef_61( __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( result.coefficients[i0], (int16_t)1441); - __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0], - &message->coefficients[i0]); + __m256i tmp = libcrux_ml_kem_vector_avx2_add_09( + self->coefficients + [/* FIXME: Eurydice crashes with: Warning 11: in top-level + declaration + libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: + this expression is not Low*; the enclosing function cannot be + translated into C*: let mutable ret(Mark.Present,(Mark.AtMost + 2), ): int16_t[16size_t] = $any in + libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add + ((@9: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] + &(((@8: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) + @0; @0 Warning 11 is fatal, exiting. On the following code: + ```rust result.coefficients[i] = + Vector::barrett_reduce(Vector::add( coefficient_normal_form, + &Vector::add(self.coefficients[i], &message.coefficients[i]), + )); ``` */ + i0], + &message->coefficients[i0]); __m256i tmp0 = libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp); result.coefficients[i0] = @@ -3266,8 +3770,18 @@ compress_ciphertext_coefficient_ef(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1); - __m128i coefficients_low = mm256_castsi256_si128(vector); - __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); + __m128i coefficients_low = + mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take + the bottom 128 bits, i.e. the first 8 16-bit + coefficients */ + vector); + __m256i coefficients_low0 = + mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A + coefficients_low[16:31] = B + coefficients_low[32:63] = C and so on ... after + this step: coefficients_low[0:31] = A + coefficients_low[32:63] = B and so on ... */ + coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)10, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3275,12 +3789,18 @@ compress_ciphertext_coefficient_ef(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = - mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); + __m256i compressed_low2 = mm256_srli_epi32( + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)10, coefficients_high0, __m256i); @@ -3293,8 +3813,20 @@ compress_ciphertext_coefficient_ef(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + compressed_low3, + compressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -3348,8 +3880,18 @@ compress_ciphertext_coefficient_c4(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1); - __m128i coefficients_low = mm256_castsi256_si128(vector); - __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); + __m128i coefficients_low = + mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take + the bottom 128 bits, i.e. the first 8 16-bit + coefficients */ + vector); + __m256i coefficients_low0 = + mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A + coefficients_low[16:31] = B + coefficients_low[32:63] = C and so on ... after + this step: coefficients_low[0:31] = A + coefficients_low[32:63] = B and so on ... */ + coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)11, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3357,12 +3899,18 @@ compress_ciphertext_coefficient_c4(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = - mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); + __m256i compressed_low2 = mm256_srli_epi32( + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)11, coefficients_high0, __m256i); @@ -3375,8 +3923,20 @@ compress_ciphertext_coefficient_c4(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + compressed_low3, + compressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -3454,8 +4014,18 @@ compress_ciphertext_coefficient_d1(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1); - __m128i coefficients_low = mm256_castsi256_si128(vector); - __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); + __m128i coefficients_low = + mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take + the bottom 128 bits, i.e. the first 8 16-bit + coefficients */ + vector); + __m256i coefficients_low0 = + mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A + coefficients_low[16:31] = B + coefficients_low[32:63] = C and so on ... after + this step: coefficients_low[0:31] = A + coefficients_low[32:63] = B and so on ... */ + coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)4, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3463,12 +4033,18 @@ compress_ciphertext_coefficient_d1(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = - mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); + __m256i compressed_low2 = mm256_srli_epi32( + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)4, coefficients_high0, __m256i); @@ -3481,8 +4057,20 @@ compress_ciphertext_coefficient_d1(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + compressed_low3, + compressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -3508,7 +4096,11 @@ static KRML_MUSTINLINE void compress_then_serialize_4_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; __m256i coefficient = compress_09_d1(to_unsigned_field_modulus_61(re.coefficients[i0])); @@ -3535,8 +4127,18 @@ compress_ciphertext_coefficient_f4(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1); - __m128i coefficients_low = mm256_castsi256_si128(vector); - __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); + __m128i coefficients_low = + mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take + the bottom 128 bits, i.e. the first 8 16-bit + coefficients */ + vector); + __m256i coefficients_low0 = + mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A + coefficients_low[16:31] = B + coefficients_low[32:63] = C and so on ... after + this step: coefficients_low[0:31] = A + coefficients_low[32:63] = B and so on ... */ + coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)5, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3544,12 +4146,18 @@ compress_ciphertext_coefficient_f4(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = - mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); + __m256i compressed_low2 = mm256_srli_epi32( + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)5, coefficients_high0, __m256i); @@ -3562,8 +4170,20 @@ compress_ciphertext_coefficient_f4(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + compressed_low3, + compressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -3589,7 +4209,11 @@ static KRML_MUSTINLINE void compress_then_serialize_5_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; __m256i coefficients = compress_09_f4(to_unsigned_representative_61(re.coefficients[i0])); @@ -3677,7 +4301,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, + prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -3689,6 +4317,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_23 uu____3 = sample_ring_element_cbd_b41(copy_of_prf_input, domain_separator0); @@ -3697,7 +4326,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; PRF_a9_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -3705,9 +4334,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U]; - compute_vector_u_ab(public_key->A, r_as_ntt, error_1, u); + compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, + r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -3716,12 +4347,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_8c( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_ed( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -3916,7 +4549,8 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)10); - __m128i coefficients_low = mm256_castsi256_si128(vector); + __m128i coefficients_low = mm256_castsi256_si128( + /* ---- Compress the first 8 coefficients ---- */ vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -3924,12 +4558,16 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = - mm256_srli_epi32((int32_t)10, decompressed_low1, __m256i); + __m256i decompressed_low2 = mm256_srli_epi32( + (int32_t)10, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -3937,12 +4575,27 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = - mm256_srli_epi32((int32_t)10, decompressed_high1, __m256i); + __m256i decompressed_high2 = mm256_srli_epi32( + (int32_t)10, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + decompressed_low3, + decompressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -3997,7 +4650,8 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)11); - __m128i coefficients_low = mm256_castsi256_si128(vector); + __m128i coefficients_low = mm256_castsi256_si128( + /* ---- Compress the first 8 coefficients ---- */ vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4005,12 +4659,16 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = - mm256_srli_epi32((int32_t)11, decompressed_low1, __m256i); + __m256i decompressed_low2 = mm256_srli_epi32( + (int32_t)11, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4018,12 +4676,27 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = - mm256_srli_epi32((int32_t)11, decompressed_high1, __m256i); + __m256i decompressed_high2 = mm256_srli_epi32( + (int32_t)11, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + decompressed_low3, + decompressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -4146,7 +4819,8 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)4); - __m128i coefficients_low = mm256_castsi256_si128(vector); + __m128i coefficients_low = mm256_castsi256_si128( + /* ---- Compress the first 8 coefficients ---- */ vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4154,12 +4828,16 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = - mm256_srli_epi32((int32_t)4, decompressed_low1, __m256i); + __m256i decompressed_low2 = mm256_srli_epi32( + (int32_t)4, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4167,12 +4845,27 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = - mm256_srli_epi32((int32_t)4, decompressed_high1, __m256i); + __m256i decompressed_high2 = mm256_srli_epi32( + (int32_t)4, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + decompressed_low3, + decompressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -4222,7 +4915,8 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)5); - __m128i coefficients_low = mm256_castsi256_si128(vector); + __m128i coefficients_low = mm256_castsi256_si128( + /* ---- Compress the first 8 coefficients ---- */ vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4230,12 +4924,16 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = - mm256_srli_epi32((int32_t)5, decompressed_low1, __m256i); + __m256i decompressed_low2 = mm256_srli_epi32( + (int32_t)5, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = - mm256_extracti128_si256((int32_t)1, vector, __m128i); + __m128i coefficients_high = mm256_extracti128_si256( + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4243,12 +4941,27 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = - mm256_srli_epi32((int32_t)5, decompressed_high1, __m256i); + __m256i decompressed_high2 = mm256_srli_epi32( + (int32_t)5, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); - return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); + __m256i compressed = + mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, + this function results in: 0: low low low low | 1: + high high high high | 2: low low low low | 3: high + high high high where each |low| and |high| is a + 16-bit element */ + decompressed_low3, + decompressed_high3); + return mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -4415,11 +5128,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_2f( IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U]; - deserialize_then_decompress_u_ed(ciphertext, u_as_ntt); + deserialize_then_decompress_u_ed( + /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_ed( - Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, - (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)1088U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -4440,7 +5156,8 @@ with const generics static KRML_MUSTINLINE void decrypt_2f(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U]; - deserialize_secret_key_ab(secret_key, secret_as_ntt); + deserialize_secret_key_ab(/* sˆ := Decode_12(sk) */ secret_key, + secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U]; memcpy( @@ -4555,17 +5272,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_a11( kdf_d8_ae(Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - kdf_d8_ae(shared_secret0, shared_secret1); uint8_t shared_secret[32U]; + kdf_d8_ae(shared_secret0, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_80(ciphertext), Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -4760,9 +5477,13 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_5e( libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) { uint8_t t[32U]; - H_a9_ac(Eurydice_array_to_subslice2( - private_key->value, (size_t)384U * (size_t)4U, - (size_t)768U * (size_t)4U + (size_t)32U, uint8_t), + H_a9_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly + on the types. We need to go to the + `value` directly. */ + private_key->value, + (size_t)384U * (size_t)4U, + (size_t)768U * (size_t)4U + (size_t)32U, + uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)4U + (size_t)32U, @@ -5239,6 +5960,10 @@ static KRML_MUSTINLINE void sample_from_xof_6c( memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_78( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -5297,7 +6022,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -5448,9 +6173,14 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_42( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, self->coefficients, __m256i), - __m256i); + i < + Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, + /* The semicolon and parentheses at the end of + loop are a workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -5481,6 +6211,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_42( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -5556,7 +6288,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22( IndCpaPrivateKeyUnpacked_39 *private_key, IndCpaPublicKeyUnpacked_39 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_6a(key_generation_seed, hashed); + cpa_keygen_seed_d8_6a(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for + ML-KEM */ + key_generation_seed, + hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -5586,8 +6321,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22( sample_vector_cbd_then_ntt_out_b4(copy_of_prf_input, domain_separator) .fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_42(public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_42(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, + public_key->A, private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -5612,11 +6347,13 @@ serialize_unpacked_secret_key_c9(IndCpaPublicKeyUnpacked_39 *public_key, IndCpaPrivateKeyUnpacked_39 *private_key) { uint8_t public_key_serialized[1568U]; serialize_public_key_1e( - public_key->t_as_ntt, + /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1536U]; - serialize_secret_key_78(private_key->secret_as_ntt, secret_key_serialized); + serialize_secret_key_78( + /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, + secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1536U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -5803,11 +6540,15 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa0( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_39 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t); deserialize_ring_elements_reduced_42(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, + (size_t)1536U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[4U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -5899,7 +6640,10 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_42( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -6129,7 +6873,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( IndCpaPublicKeyUnpacked_39 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1568U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, + prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -6141,6 +6889,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_dd uu____3 = sample_ring_element_cbd_b4(copy_of_prf_input, domain_separator0); @@ -6149,7 +6898,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( error_1, uu____3.fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; PRF_a9_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -6157,9 +6906,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[4U]; - compute_vector_u_42(public_key->A, r_as_ntt, error_1, u); + compute_vector_u_42(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, + r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -6168,12 +6919,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( &message_as_ring_element); uint8_t ciphertext[1568U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[4U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_c9( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)1408U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_1e( uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, @@ -6487,11 +7240,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_37( IndCpaPrivateKeyUnpacked_39 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[4U]; - deserialize_then_decompress_u_1e(ciphertext, u_as_ntt); + deserialize_then_decompress_u_1e( + /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_78( - Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, - (size_t)1408U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)1568U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)1408U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_42(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -6512,7 +7268,8 @@ with const generics static KRML_MUSTINLINE void decrypt_37(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[4U]; - deserialize_secret_key_42(secret_key, secret_as_ntt); + deserialize_secret_key_42(/* sˆ := Decode_12(sk) */ secret_key, + secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[4U]; memcpy( @@ -6615,17 +7372,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_a10( kdf_d8_5e(Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - kdf_d8_5e(shared_secret0, shared_secret1); uint8_t shared_secret[32U]; + kdf_d8_5e(shared_secret0, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_af(ciphertext), Eurydice_array_to_slice((size_t)1568U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -6820,9 +7577,13 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_4d( libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) { uint8_t t[32U]; - H_a9_fd(Eurydice_array_to_subslice2( - private_key->value, (size_t)384U * (size_t)2U, - (size_t)768U * (size_t)2U + (size_t)32U, uint8_t), + H_a9_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly + on the types. We need to go to the + `value` directly. */ + private_key->value, + (size_t)384U * (size_t)2U, + (size_t)768U * (size_t)2U + (size_t)32U, + uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)2U + (size_t)32U, @@ -7273,6 +8034,10 @@ static KRML_MUSTINLINE void sample_from_xof_6c0( memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_29( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -7331,7 +8096,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c0( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -7487,9 +8252,14 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_89( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, self->coefficients, __m256i), - __m256i); + i < + Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, + /* The semicolon and parentheses at the end of + loop are a workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -7520,6 +8290,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_89( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -7595,7 +8367,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220( IndCpaPrivateKeyUnpacked_94 *private_key, IndCpaPublicKeyUnpacked_94 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_f8(key_generation_seed, hashed); + cpa_keygen_seed_d8_f8(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for + ML-KEM */ + key_generation_seed, + hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -7625,8 +8400,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220( sample_vector_cbd_then_ntt_out_b40(copy_of_prf_input, domain_separator) .fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_89(public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_89(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, + public_key->A, private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -7651,11 +8426,13 @@ serialize_unpacked_secret_key_2d(IndCpaPublicKeyUnpacked_94 *public_key, IndCpaPrivateKeyUnpacked_94 *private_key) { uint8_t public_key_serialized[800U]; serialize_public_key_ba( - public_key->t_as_ntt, + /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[768U]; - serialize_secret_key_29(private_key->secret_as_ntt, secret_key_serialized); + serialize_secret_key_29( + /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, + secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[768U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -7842,11 +8619,15 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_94 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t); deserialize_ring_elements_reduced_89(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, + (size_t)768U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[2U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -7984,7 +8765,10 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_89( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -8176,7 +8960,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( IndCpaPublicKeyUnpacked_94 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[768U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, + prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -8188,6 +8976,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_40 uu____3 = sample_ring_element_cbd_b40(copy_of_prf_input, domain_separator0); @@ -8196,7 +8985,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( error_1, uu____3.fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; PRF_a9_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -8204,9 +8993,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[2U]; - compute_vector_u_89(public_key->A, r_as_ntt, error_1, u); + compute_vector_u_89(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, + r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -8215,12 +9006,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( &message_as_ring_element); uint8_t ciphertext[768U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[2U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_2d( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_ba( uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext, @@ -8504,11 +9297,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_4b( IndCpaPrivateKeyUnpacked_94 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[2U]; - deserialize_then_decompress_u_ba(ciphertext, u_as_ntt); + deserialize_then_decompress_u_ba( + /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_29( - Eurydice_array_to_subslice_from((size_t)768U, ciphertext, - (size_t)640U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)768U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)640U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_89(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -8529,7 +9325,8 @@ with const generics static KRML_MUSTINLINE void decrypt_4b(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[2U]; - deserialize_secret_key_89(secret_key, secret_as_ntt); + deserialize_secret_key_89(/* sˆ := Decode_12(sk) */ secret_key, + secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[2U]; memcpy( @@ -8631,15 +9428,15 @@ void libcrux_ml_kem_ind_cca_decapsulate_a1( kdf_d8_4d(Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - kdf_d8_4d(shared_secret0, shared_secret1); uint8_t shared_secret[32U]; + kdf_d8_4d(shared_secret0, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_d0(ciphertext), Eurydice_array_to_slice((size_t)768U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h index c127a7b25..addfdaf30 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.c b/libcrux-ml-kem/c/libcrux_mlkem_portable.c index 128049b3b..fddae347c 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "internal/libcrux_mlkem_portable.h" @@ -66,7 +66,7 @@ static const int16_t ZETAS_TIMES_MONTGOMERY_R[128U] = { (int16_t)-108, (int16_t)-308, (int16_t)996, (int16_t)991, (int16_t)958, (int16_t)-1460, (int16_t)1522, (int16_t)1628}; -int16_t libcrux_ml_kem_polynomial_get_zeta(size_t i) { +int16_t libcrux_ml_kem_polynomial_zeta(size_t i) { return ZETAS_TIMES_MONTGOMERY_R[i]; } @@ -1152,11 +1152,28 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( */ uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient( uint16_t fe) { - int16_t shifted = (int16_t)1664 - (int16_t)fe; - int16_t mask = shifted >> 15U; + int16_t shifted = + (int16_t)1664 - + (int16_t) /* The approach used here is inspired by: + https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150 + If 833 <= fe <= 2496, then -832 <= shifted <= 831 */ + fe; + int16_t mask = + /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) = + -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive + <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so + if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */ + shifted + + >> 15U; int16_t shifted_to_positive = mask ^ shifted; int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832; - int16_t r0 = shifted_positive_in_range >> 15U; + int16_t r0 = + /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the + most significant bit of shifted_positive_in_range will be 1. */ + shifted_positive_in_range + + >> 15U; int16_t r1 = r0 & (int16_t)1; return (uint8_t)r1; } @@ -1192,7 +1209,16 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits( int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient( uint8_t coefficient_bits, uint16_t fe) { - uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits; + uint64_t compressed = + (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits + == 5 || coefficient_bits == 10 || coefficient_bits == 11 ); + hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to + be constant time due to: + https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ + */ + fe + + << (uint32_t)coefficient_bits; compressed = compressed + 1664ULL; compressed = compressed * 10321340ULL; compressed = compressed >> 35U; @@ -2712,9 +2738,13 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_60( libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) { uint8_t t[32U]; - H_f1_ac(Eurydice_array_to_subslice2( - private_key->value, (size_t)384U * (size_t)4U, - (size_t)768U * (size_t)4U + (size_t)32U, uint8_t), + H_f1_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly + on the types. We need to go to the + `value` directly. */ + private_key->value, + (size_t)384U * (size_t)4U, + (size_t)768U * (size_t)4U + (size_t)32U, + uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)4U + (size_t)32U, @@ -3204,6 +3234,10 @@ static KRML_MUSTINLINE void sample_from_xof_2b( memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_ff( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -3263,7 +3297,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -3461,7 +3495,12 @@ with const generics static KRML_MUSTINLINE void ntt_at_layer_7_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; i < step; i++) { + for (size_t i = (size_t)0U; + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + step; + i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector t = libcrux_ml_kem_vector_portable_multiply_by_constant_0d( @@ -3523,7 +3562,13 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3532,9 +3577,9 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_8c( for (size_t i = offset_vec; i < offset_vec + step_vec; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 = - ntt_layer_int_vec_step_8c( - re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + ntt_layer_int_vec_step_8c(re->coefficients[j], + re->coefficients[j + step_vec], + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst; libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd; re->coefficients[j] = x; @@ -3557,7 +3602,7 @@ static KRML_MUSTINLINE void ntt_at_layer_3_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_ntt_layer_3_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); re->coefficients[round] = uu____0;); } @@ -3575,8 +3620,8 @@ static KRML_MUSTINLINE void ntt_at_layer_2_8c( re->coefficients[round] = libcrux_ml_kem_vector_portable_ntt_layer_2_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U)); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U)); zeta_i[0U] = zeta_i[0U] + (size_t)1U;); } @@ -3594,10 +3639,10 @@ static KRML_MUSTINLINE void ntt_at_layer_1_8c( re->coefficients[round] = libcrux_ml_kem_vector_portable_ntt_layer_1_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U)); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U)); zeta_i[0U] = zeta_i[0U] + (size_t)3U;); } @@ -3615,7 +3660,11 @@ with const generics static KRML_MUSTINLINE void poly_barrett_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( @@ -3632,7 +3681,9 @@ with const generics */ static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { - ntt_at_layer_7_8c(re); + ntt_at_layer_7_8c(/* Due to the small coefficient bound, we can skip the first + round of Montgomery reductions. */ + re); size_t zeta_i = (size_t)1U; ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U); ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)5U); @@ -3742,13 +3793,13 @@ ntt_multiply_ef_8c(libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_ntt_multiply_0d( &self->coefficients[i0], &rhs->coefficients[i0], - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)3U)); + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)1U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)2U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)3U)); out.coefficients[i0] = uu____0; } return out; @@ -3771,7 +3822,11 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_d0( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, self->coefficients, + (size_t)16U, + /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -3811,10 +3866,18 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector - coefficient_normal_form = to_standard_domain_8c(self->coefficients[j]); + coefficient_normal_form = to_standard_domain_8c( + self->coefficients[/* The coefficients are of the form aR^{-1} mod + q, which means calling to_montgomery_domain() + on them should return a mod q. */ + j]); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, @@ -3846,6 +3909,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_d0( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -3921,7 +3986,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c( IndCpaPrivateKeyUnpacked_af *private_key, IndCpaPublicKeyUnpacked_af *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_03(key_generation_seed, hashed); + cpa_keygen_seed_d8_03(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for + ML-KEM */ + key_generation_seed, + hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -3951,8 +4019,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c( sample_vector_cbd_then_ntt_out_3b(copy_of_prf_input, domain_separator) .fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_d0(public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_d0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, + public_key->A, private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -3977,11 +4045,13 @@ serialize_unpacked_secret_key_2f(IndCpaPublicKeyUnpacked_af *public_key, IndCpaPrivateKeyUnpacked_af *private_key) { uint8_t public_key_serialized[1568U]; serialize_public_key_00( - public_key->t_as_ntt, + /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1536U]; - serialize_secret_key_ff(private_key->secret_as_ntt, secret_key_serialized); + serialize_secret_key_ff( + /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, + secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1536U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -4169,11 +4239,15 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_af *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t); deserialize_ring_elements_reduced_d0(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, + (size_t)1536U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[4U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4284,10 +4358,10 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_1_8c( re->coefficients[round] = libcrux_ml_kem_vector_portable_inv_ntt_layer_1_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U)); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U)); zeta_i[0U] = zeta_i[0U] - (size_t)3U;); } @@ -4305,8 +4379,8 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_2_8c( re->coefficients[round] = libcrux_ml_kem_vector_portable_inv_ntt_layer_2_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U)); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U)); zeta_i[0U] = zeta_i[0U] - (size_t)1U;); } @@ -4324,7 +4398,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_3_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_inv_ntt_layer_3_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); re->coefficients[round] = uu____0;); } @@ -4360,7 +4434,13 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -4373,7 +4453,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 = inv_ntt_layer_int_vec_step_reduce_8c( re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst; libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd; re->coefficients[j] = x; @@ -4391,7 +4471,10 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_d0( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -4417,7 +4500,11 @@ static KRML_MUSTINLINE void add_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = @@ -4543,8 +4630,27 @@ add_message_error_reduce_ef_8c( libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( result.coefficients[i0], (int16_t)1441); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp = - libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0], - &message->coefficients[i0]); + libcrux_ml_kem_vector_portable_add_0d( + self->coefficients[/* FIXME: Eurydice crashes with: Warning 11: in + top-level declaration + libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: + this expression is not Low*; the enclosing + function cannot be translated into C*: let + mutable ret(Mark.Present,(Mark.AtMost 2), ): + int16_t[16size_t] = $any in + libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add + ((@9: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] + &(((@8: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) + @0; @0 Warning 11 is fatal, exiting. On the + following code: ```rust result.coefficients[i] + = Vector::barrett_reduce(Vector::add( + coefficient_normal_form, + &Vector::add(self.coefficients[i], + &message.coefficients[i]), )); ``` */ + i0], + &message->coefficients[i0]); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 = libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = @@ -4757,7 +4863,11 @@ static KRML_MUSTINLINE void compress_then_serialize_4_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient = compress_0d_d1(to_unsigned_field_modulus_8c(re.coefficients[i0])); @@ -4812,7 +4922,11 @@ static KRML_MUSTINLINE void compress_then_serialize_5_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients = compress_0d_f4(to_unsigned_representative_8c(re.coefficients[i0])); @@ -4901,7 +5015,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( IndCpaPublicKeyUnpacked_af *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1568U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, + prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -4913,6 +5031,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_dd0 uu____3 = sample_ring_element_cbd_3b(copy_of_prf_input, domain_separator0); @@ -4921,7 +5040,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( error_1, uu____3.fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; PRF_f1_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -4929,9 +5048,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[4U]; - compute_vector_u_d0(public_key->A, r_as_ntt, error_1, u); + compute_vector_u_d0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, + r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -4940,12 +5061,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( &message_as_ring_element); uint8_t ciphertext[1568U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[4U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_2f( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)1408U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_00( uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, @@ -5584,11 +5707,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_7d( IndCpaPrivateKeyUnpacked_af *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[4U]; - deserialize_then_decompress_u_00(ciphertext, u_as_ntt); + deserialize_then_decompress_u_00( + /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_ff( - Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, - (size_t)1408U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)1568U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)1408U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_d0(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -5609,7 +5735,8 @@ with const generics static KRML_MUSTINLINE void decrypt_7d(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[4U]; - deserialize_secret_key_d0(secret_key, secret_as_ntt); + deserialize_secret_key_d0(/* sˆ := Decode_12(sk) */ secret_key, + secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[4U]; memcpy( @@ -5724,17 +5851,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_621( kdf_d8_60(Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - kdf_d8_60(shared_secret0, shared_secret1); uint8_t shared_secret[32U]; + kdf_d8_60(shared_secret0, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_af(ciphertext), Eurydice_array_to_slice((size_t)1568U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -5929,9 +6056,13 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_30( libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) { uint8_t t[32U]; - H_f1_fd(Eurydice_array_to_subslice2( - private_key->value, (size_t)384U * (size_t)2U, - (size_t)768U * (size_t)2U + (size_t)32U, uint8_t), + H_f1_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly + on the types. We need to go to the + `value` directly. */ + private_key->value, + (size_t)384U * (size_t)2U, + (size_t)768U * (size_t)2U + (size_t)32U, + uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)2U + (size_t)32U, @@ -6381,6 +6512,10 @@ static KRML_MUSTINLINE void sample_from_xof_2b0( memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_64( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -6440,7 +6575,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b0( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -6586,7 +6721,11 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_a0( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, self->coefficients, + (size_t)16U, + /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -6621,6 +6760,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_a0( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -6696,7 +6837,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0( IndCpaPrivateKeyUnpacked_d4 *private_key, IndCpaPublicKeyUnpacked_d4 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_10(key_generation_seed, hashed); + cpa_keygen_seed_d8_10(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for + ML-KEM */ + key_generation_seed, + hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6726,8 +6870,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0( sample_vector_cbd_then_ntt_out_3b0(copy_of_prf_input, domain_separator) .fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_a0(public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_a0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, + public_key->A, private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6752,11 +6896,13 @@ serialize_unpacked_secret_key_6d(IndCpaPublicKeyUnpacked_d4 *public_key, IndCpaPrivateKeyUnpacked_d4 *private_key) { uint8_t public_key_serialized[800U]; serialize_public_key_86( - public_key->t_as_ntt, + /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[768U]; - serialize_secret_key_64(private_key->secret_as_ntt, secret_key_serialized); + serialize_secret_key_64( + /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, + secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[768U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -6944,11 +7090,15 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f0( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_d4 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t); deserialize_ring_elements_reduced_a0(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, + (size_t)768U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[2U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -7074,7 +7224,10 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_a0( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -7305,7 +7458,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( IndCpaPublicKeyUnpacked_d4 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[768U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, + prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -7318,6 +7475,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_400 uu____3 = sample_ring_element_cbd_3b0(copy_of_prf_input, domain_separator0); @@ -7326,7 +7484,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( error_1, uu____3.fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; PRF_f1_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -7334,9 +7492,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[2U]; - compute_vector_u_a0(public_key->A, r_as_ntt, error_1, u); + compute_vector_u_a0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, + r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -7345,12 +7505,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( &message_as_ring_element); uint8_t ciphertext[768U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[2U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_6d( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_86( uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext, @@ -7665,11 +7827,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_d1( IndCpaPrivateKeyUnpacked_d4 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[2U]; - deserialize_then_decompress_u_86(ciphertext, u_as_ntt); + deserialize_then_decompress_u_86( + /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_64( - Eurydice_array_to_subslice_from((size_t)768U, ciphertext, - (size_t)640U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)768U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)640U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_a0(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -7690,7 +7855,8 @@ with const generics static KRML_MUSTINLINE void decrypt_d1(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[2U]; - deserialize_secret_key_a0(secret_key, secret_as_ntt); + deserialize_secret_key_a0(/* sˆ := Decode_12(sk) */ secret_key, + secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[2U]; memcpy( @@ -7793,17 +7959,17 @@ void libcrux_ml_kem_ind_cca_decapsulate_620( kdf_d8_30(Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - kdf_d8_30(shared_secret0, shared_secret1); uint8_t shared_secret[32U]; + kdf_d8_30(shared_secret0, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_d0(ciphertext), Eurydice_array_to_slice((size_t)768U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -7998,9 +8164,13 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; - H_f1_e0(Eurydice_array_to_subslice2( - private_key->value, (size_t)384U * (size_t)3U, - (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), + H_f1_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly + on the types. We need to go to the + `value` directly. */ + private_key->value, + (size_t)384U * (size_t)3U, + (size_t)768U * (size_t)3U + (size_t)32U, + uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)3U + (size_t)32U, @@ -8456,6 +8626,10 @@ static KRML_MUSTINLINE void sample_from_xof_2b1( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_89( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -8515,7 +8689,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b1( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -8650,7 +8824,11 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_1b( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, self->coefficients, + (size_t)16U, + /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -8685,6 +8863,8 @@ static KRML_MUSTINLINE void compute_As_plus_e_1b( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -8760,7 +8940,10 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1( IndCpaPrivateKeyUnpacked_a0 *private_key, IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_9c(key_generation_seed, hashed); + cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for + ML-KEM */ + key_generation_seed, + hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -8790,8 +8973,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1( sample_vector_cbd_then_ntt_out_3b1(copy_of_prf_input, domain_separator) .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_1b(public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_1b(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, + public_key->A, private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -8816,11 +8999,13 @@ serialize_unpacked_secret_key_43(IndCpaPublicKeyUnpacked_a0 *public_key, IndCpaPrivateKeyUnpacked_a0 *private_key) { uint8_t public_key_serialized[1184U]; serialize_public_key_6c( - public_key->t_as_ntt, + /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1152U]; - serialize_secret_key_89(private_key->secret_as_ntt, secret_key_serialized); + serialize_secret_key_89( + /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, + secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -9008,11 +9193,15 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f1( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); deserialize_ring_elements_reduced_1b(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, + (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -9106,7 +9295,10 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_1b( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -9299,7 +9491,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, + prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -9312,6 +9508,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_230 uu____3 = sample_ring_element_cbd_3b1(copy_of_prf_input, domain_separator0); @@ -9320,7 +9517,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; PRF_f1_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -9328,9 +9525,11 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U]; - compute_vector_u_1b(public_key->A, r_as_ntt, error_1, u); + compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, + r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -9339,12 +9538,14 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_43( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_6c( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -9629,11 +9830,14 @@ static KRML_MUSTINLINE void decrypt_unpacked_42( IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U]; - deserialize_then_decompress_u_6c(ciphertext, u_as_ntt); + deserialize_then_decompress_u_6c( + /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_89( - Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, - (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)1088U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -9654,7 +9858,8 @@ with const generics static KRML_MUSTINLINE void decrypt_42(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U]; - deserialize_secret_key_1b(secret_key, secret_as_ntt); + deserialize_secret_key_1b(/* sˆ := Decode_12(sk) */ secret_key, + secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U]; memcpy( @@ -9756,15 +9961,15 @@ void libcrux_ml_kem_ind_cca_decapsulate_62( kdf_d8_d6(Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - kdf_d8_d6(shared_secret0, shared_secret1); uint8_t shared_secret[32U]; + kdf_d8_d6(shared_secret0, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_80(ciphertext), Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/libcrux_mlkem_portable.h index 33fff6338..012f00992 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_mlkem_portable_H diff --git a/libcrux-ml-kem/c/libcrux_sha3.h b/libcrux-ml-kem/c/libcrux_sha3.h index 3101a818f..16a61b7e6 100644 --- a/libcrux-ml-kem/c/libcrux_sha3.h +++ b/libcrux-ml-kem/c/libcrux_sha3.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_sha3_H diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.c b/libcrux-ml-kem/c/libcrux_sha3_avx2.c index 4e234ddec..23fa30cd5 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_avx2.c +++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "internal/libcrux_sha3_avx2.h" @@ -77,7 +77,8 @@ static KRML_MUSTINLINE __m256i and_not_xor_ef(__m256i a, __m256i b, __m256i c) { } static KRML_MUSTINLINE __m256i _veorq_n_u64(__m256i a, uint64_t c) { - __m256i c0 = mm256_set1_epi64x((int64_t)c); + __m256i c0 = mm256_set1_epi64x( + (int64_t) /* Casting here is required, doesn't change the value. */ c); return mm256_xor_si256(a, c0); } @@ -1430,13 +1431,13 @@ static KRML_MUSTINLINE void store_block_5b(__m256i (*s)[5U], s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U] [((size_t)4U * i0 + (size_t)2U) % (size_t)5U], __m256i); - __m256i v1h = - mm256_permute2x128_si256((int32_t)32, - s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], - s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], - __m256i); + __m256i v1h = mm256_permute2x128_si256( + (int32_t)32, + s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], + s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], + __m256i); __m256i v2l = mm256_permute2x128_si256( (int32_t)49, s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U], @@ -1747,7 +1748,16 @@ void libcrux_sha3_avx2_x4_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2, Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1, Eurydice_slice out2, Eurydice_slice out3) { - Eurydice_slice buf0[4U] = {input0, input1, input2, input3}; + Eurydice_slice buf0[4U] = { + /* XXX: These functions could alternatively implement the same with the + portable implementation #[cfg(feature = "simd128")] { keccakx2::<136, + 0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136, + 0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136, + 0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]); + keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136, + 0x1fu8>([input3], [out3]); } */ + input0, + input1, input2, input3}; Eurydice_slice buf[4U] = {out0, out1, out2, out3}; keccak_fb(buf0, buf); } @@ -1962,13 +1972,13 @@ static KRML_MUSTINLINE void store_block_3a(__m256i (*s)[5U], s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U] [((size_t)4U * i0 + (size_t)2U) % (size_t)5U], __m256i); - __m256i v1h = - mm256_permute2x128_si256((int32_t)32, - s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], - s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], - __m256i); + __m256i v1h = mm256_permute2x128_si256( + (int32_t)32, + s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], + s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], + __m256i); __m256i v2l = mm256_permute2x128_si256( (int32_t)49, s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U], diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/libcrux_sha3_avx2.h index 7a6e0c8cb..645f80b34 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_sha3_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_sha3_internal.h b/libcrux-ml-kem/c/libcrux_sha3_internal.h index 7c140d2b8..74eeb47a3 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_internal.h +++ b/libcrux-ml-kem/c/libcrux_sha3_internal.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_sha3_internal_H @@ -1811,6 +1811,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out); } @@ -2159,6 +2160,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out); } @@ -2507,6 +2509,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out); } @@ -2695,6 +2698,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out); } @@ -2813,6 +2817,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out); } @@ -3161,6 +3166,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out); } diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.c b/libcrux-ml-kem/c/libcrux_sha3_neon.c index c16b77594..5e4416bcd 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_neon.c +++ b/libcrux-ml-kem/c/libcrux_sha3_neon.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #include "libcrux_sha3_neon.h" @@ -62,6 +62,7 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice out0, Eurydice_slice out1) { + /* TODO: make argument ordering consistent */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -72,6 +73,9 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, */ KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState libcrux_sha3_neon_x2_incremental_init(void) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let s0 = KeccakState::new(); let s1 = + * KeccakState::new(); [s0, s1] } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -83,6 +87,10 @@ libcrux_sha3_neon_x2_incremental_init(void) { KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, + * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -96,6 +104,10 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_squeeze_first_three_blocks(&mut s0, out0); + * shake128_squeeze_first_three_blocks(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -109,6 +121,10 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_squeeze_next_block(&mut s0, out0); + * shake128_squeeze_next_block(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -132,6 +148,10 @@ libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_five_blocks( KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, + * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.h b/libcrux-ml-kem/c/libcrux_sha3_neon.h index 2f179ee38..6e264c84f 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_neon.h +++ b/libcrux-ml-kem/c/libcrux_sha3_neon.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 */ #ifndef __libcrux_sha3_neon_H diff --git a/libcrux-ml-kem/src/invert_ntt.rs b/libcrux-ml-kem/src/invert_ntt.rs index 7f9506731..87bc90fed 100644 --- a/libcrux-ml-kem/src/invert_ntt.rs +++ b/libcrux-ml-kem/src/invert_ntt.rs @@ -102,11 +102,8 @@ pub(crate) fn invert_ntt_at_layer_2( hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque 3328 (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); - re.coefficients[round] = Vector::inv_ntt_layer_2_step( - re.coefficients[round], - zeta(*zeta_i), - zeta(*zeta_i - 1), - ); + re.coefficients[round] = + Vector::inv_ntt_layer_2_step(re.coefficients[round], zeta(*zeta_i), zeta(*zeta_i - 1)); *zeta_i -= 1; hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque 3328 diff --git a/libcrux-ml-kem/src/mlkem512.rs b/libcrux-ml-kem/src/mlkem512.rs index 1af827529..b9b33596d 100644 --- a/libcrux-ml-kem/src/mlkem512.rs +++ b/libcrux-ml-kem/src/mlkem512.rs @@ -4,17 +4,21 @@ use super::{constants::*, ind_cca::*, types::*, *}; // Kyber 512 parameters const RANK_512: usize = 2; const RANKED_BYTES_PER_RING_ELEMENT_512: usize = RANK_512 * BITS_PER_RING_ELEMENT / 8; -const T_AS_NTT_ENCODED_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8; +const T_AS_NTT_ENCODED_SIZE_512: usize = + (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8; const VECTOR_U_COMPRESSION_FACTOR_512: usize = 10; -const C1_BLOCK_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_U_COMPRESSION_FACTOR_512) / 8; +const C1_BLOCK_SIZE_512: usize = + (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_U_COMPRESSION_FACTOR_512) / 8; const C1_SIZE_512: usize = C1_BLOCK_SIZE_512 * RANK_512; const VECTOR_V_COMPRESSION_FACTOR_512: usize = 4; const C2_SIZE_512: usize = (COEFFICIENTS_IN_RING_ELEMENT * VECTOR_V_COMPRESSION_FACTOR_512) / 8; -const CPA_PKE_SECRET_KEY_SIZE_512: usize = (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8; +const CPA_PKE_SECRET_KEY_SIZE_512: usize = + (RANK_512 * COEFFICIENTS_IN_RING_ELEMENT * BITS_PER_COEFFICIENT) / 8; pub(crate) const CPA_PKE_PUBLIC_KEY_SIZE_512: usize = T_AS_NTT_ENCODED_SIZE_512 + 32; const CPA_PKE_CIPHERTEXT_SIZE_512: usize = C1_SIZE_512 + C2_SIZE_512; -pub(crate) const SECRET_KEY_SIZE_512: usize = CPA_PKE_SECRET_KEY_SIZE_512 + CPA_PKE_PUBLIC_KEY_SIZE_512 + H_DIGEST_SIZE + SHARED_SECRET_SIZE; +pub(crate) const SECRET_KEY_SIZE_512: usize = + CPA_PKE_SECRET_KEY_SIZE_512 + CPA_PKE_PUBLIC_KEY_SIZE_512 + H_DIGEST_SIZE + SHARED_SECRET_SIZE; const ETA1: usize = 3; const ETA1_RANDOMNESS_SIZE: usize = ETA1 * 64; diff --git a/libcrux-ml-kem/src/ntt.rs b/libcrux-ml-kem/src/ntt.rs index 973a6d945..fa08e35e5 100644 --- a/libcrux-ml-kem/src/ntt.rs +++ b/libcrux-ml-kem/src/ntt.rs @@ -114,11 +114,8 @@ pub(crate) fn ntt_at_layer_2( hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque (11207+4*3328) (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); - re.coefficients[round] = Vector::ntt_layer_2_step( - re.coefficients[round], - zeta(*zeta_i), - zeta(*zeta_i + 1), - ); + re.coefficients[round] = + Vector::ntt_layer_2_step(re.coefficients[round], zeta(*zeta_i), zeta(*zeta_i + 1)); *zeta_i += 1; hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque (11207+5*3328) @@ -172,8 +169,7 @@ pub(crate) fn ntt_at_layer_3( hax_lib::fstar!("reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque (11207+3*3328) (Libcrux_ml_kem.Vector.Traits.f_to_i16_array (re.f_coefficients.[ round ])))"); - re.coefficients[round] = - Vector::ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i)); + re.coefficients[round] = Vector::ntt_layer_3_step(re.coefficients[round], zeta(*zeta_i)); hax_lib::fstar!( "reveal_opaque (`%Spec.Utils.is_i16b_array_opaque) (Spec.Utils.is_i16b_array_opaque (11207+4*3328) diff --git a/libcrux-ml-kem/src/polynomial.rs b/libcrux-ml-kem/src/polynomial.rs index cb6f0fe8b..5bad1d43a 100644 --- a/libcrux-ml-kem/src/polynomial.rs +++ b/libcrux-ml-kem/src/polynomial.rs @@ -213,7 +213,7 @@ impl PolynomialRingElement { /// /// The NIST FIPS 203 standard can be found at /// . - + // TODO: Remove or replace with something that works and is useful for the proof. // #[cfg_attr(hax, hax_lib::requires( // hax_lib::forall(|i:usize| @@ -229,7 +229,7 @@ impl PolynomialRingElement { #[inline(always)] pub(crate) fn ntt_multiply(&self, rhs: &Self) -> Self { hax_lib::fstar!("admit ()"); - + let mut out = PolynomialRingElement::ZERO(); for i in 0..VECTORS_IN_RING_ELEMENT { From fbef3649fa222b800fc7dcc349855bcd7de48e36 Mon Sep 17 00:00:00 2001 From: karthikbhargavan Date: Tue, 3 Dec 2024 18:47:29 +0000 Subject: [PATCH 4/7] c code refresh --- libcrux-ml-kem/cg/code_gen.txt | 10 +- libcrux-ml-kem/cg/libcrux_core.h | 10 +- libcrux-ml-kem/cg/libcrux_ct_ops.h | 10 +- libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h | 1378 ++++++++++++++--- libcrux-ml-kem/cg/libcrux_mlkem768_portable.h | 302 +++- libcrux-ml-kem/cg/libcrux_sha3_avx2.h | 28 +- libcrux-ml-kem/cg/libcrux_sha3_portable.h | 104 +- 7 files changed, 1463 insertions(+), 379 deletions(-) diff --git a/libcrux-ml-kem/cg/code_gen.txt b/libcrux-ml-kem/cg/code_gen.txt index 420446603..7e79f022e 100644 --- a/libcrux-ml-kem/cg/code_gen.txt +++ b/libcrux-ml-kem/cg/code_gen.txt @@ -1,6 +1,6 @@ This code was generated with the following revisions: -Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 -Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 -Karamel: 8c3612018c25889288da6857771be3ad03b75bcd -F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty -Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a +Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f +Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c +Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 +F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc +Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 diff --git a/libcrux-ml-kem/cg/libcrux_core.h b/libcrux-ml-kem/cg/libcrux_core.h index b5a34d0e2..ca8a53171 100644 --- a/libcrux-ml-kem/cg/libcrux_core.h +++ b/libcrux-ml-kem/cg/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 */ #ifndef __libcrux_core_H diff --git a/libcrux-ml-kem/cg/libcrux_ct_ops.h b/libcrux-ml-kem/cg/libcrux_ct_ops.h index ddf47bd96..5f693d09c 100644 --- a/libcrux-ml-kem/cg/libcrux_ct_ops.h +++ b/libcrux-ml-kem/cg/libcrux_ct_ops.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 */ #ifndef __libcrux_ct_ops_H diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h index aa0858642..bb50d3eaf 100644 --- a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h +++ b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 */ #ifndef __libcrux_mlkem768_avx2_H @@ -171,11 +171,16 @@ libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) { __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16( LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i v_minus_field_modulus = - libcrux_intrinsics_avx2_mm256_sub_epi16(vector, field_modulus); + libcrux_intrinsics_avx2_mm256_sub_epi16(/* Compute v_i - Q and crate a + mask from the sign bit of each + of these quantities. */ + vector, field_modulus); __m256i sign_mask = libcrux_intrinsics_avx2_mm256_srai_epi16( (int32_t)15, v_minus_field_modulus, __m256i); __m256i conditional_add_field_modulus = - libcrux_intrinsics_avx2_mm256_and_si256(sign_mask, field_modulus); + libcrux_intrinsics_avx2_mm256_and_si256(/* If v_i - Q < 0 then add back Q + to (v_i - Q). */ + sign_mask, field_modulus); return libcrux_intrinsics_avx2_mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus); } @@ -557,6 +562,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2, int16_t zeta3) { + /* Compute the first term of the product */ __m256i shuffle_with = libcrux_intrinsics_avx2_mm256_set_epi8( (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, @@ -564,8 +570,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4, (int8_t)1, (int8_t)0); - __m256i lhs_shuffled = - libcrux_intrinsics_avx2_mm256_shuffle_epi8(lhs, shuffle_with); + __m256i lhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8( + /* Prepare the left hand side */ lhs, shuffle_with); __m256i lhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64( (int32_t)216, lhs_shuffled, __m256i); __m128i lhs_evens = @@ -574,8 +580,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i lhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256( (int32_t)1, lhs_shuffled0, __m128i); __m256i lhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(lhs_odds); - __m256i rhs_shuffled = - libcrux_intrinsics_avx2_mm256_shuffle_epi8(rhs, shuffle_with); + __m256i rhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8( + /* Prepare the right hand side */ rhs, shuffle_with); __m256i rhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64( (int32_t)216, rhs_shuffled, __m256i); __m128i rhs_evens = @@ -584,8 +590,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i rhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256( (int32_t)1, rhs_shuffled0, __m128i); __m256i rhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(rhs_odds); - __m256i left = - libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_evens0, rhs_evens0); + __m256i left = libcrux_intrinsics_avx2_mm256_mullo_epi32( + /* Start operating with them */ lhs_evens0, rhs_evens0); __m256i right = libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_odds0, rhs_odds0); __m256i right0 = @@ -600,7 +606,7 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s( products_left); __m256i rhs_adjacent_swapped = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - rhs, + /* Compute the second term of the product */ rhs, libcrux_intrinsics_avx2_mm256_set_epi8( (int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6, @@ -615,8 +621,10 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( products_right); __m256i products_right1 = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)16, products_right0, __m256i); - return libcrux_intrinsics_avx2_mm256_blend_epi16((int32_t)170, products_left0, - products_right1, __m256i); + return libcrux_intrinsics_avx2_mm256_blend_epi16( + (int32_t)170, + /* Combine them into one vector */ products_left0, products_right1, + __m256i); } /** @@ -634,13 +642,60 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09( KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1( __m256i vector, uint8_t ret[2U]) { - __m256i lsb_to_msb = - libcrux_intrinsics_avx2_mm256_slli_epi16((int32_t)15, vector, __m256i); - __m128i low_msbs = libcrux_intrinsics_avx2_mm256_castsi256_si128(lsb_to_msb); + __m256i lsb_to_msb = libcrux_intrinsics_avx2_mm256_slli_epi16( + (int32_t)15, + /* Suppose |vector| is laid out as follows (superscript number indicates + the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀ + 0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least + significant bit in each lane, move it to the most significant position + to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ + d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ + n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */ + vector, __m256i); + __m128i low_msbs = + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* Get the first 8 16-bit + elements ... */ + lsb_to_msb); __m128i high_msbs = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, lsb_to_msb, __m128i); - __m128i msbs = libcrux_intrinsics_avx2_mm_packs_epi16(low_msbs, high_msbs); - int32_t bits_packed = libcrux_intrinsics_avx2_mm_movemask_epi8(msbs); + (int32_t)1, + /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i); + __m128i msbs = + libcrux_intrinsics_avx2_mm_packs_epi16(/* ... and then pack them into + 8-bit values using signed + saturation. This function packs + all the |low_msbs|, and then the + high ones. low_msbs = a₀0¹⁵ + b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ + g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ + j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ + o₀0¹⁵ p₀0¹⁵ We shifted by 15 + above to take advantage of the + signed saturation performed by + mm_packs_epi16: - if the sign + bit of the 16-bit element being + packed is 1, the corresponding + 8-bit element in |msbs| will be + 0xFF. - if the sign bit of the + 16-bit element being packed is + 0, the corresponding 8-bit + element in |msbs| will be 0. + Thus, if, for example, a₀ = 1, + e₀ = 1, and p₀ = 1, and every + other bit is 0, after packing + into 8 bit value, |msbs| will + look like: 0xFF 0x00 0x00 0x00 | + 0xFF 0x00 0x00 0x00 | 0x00 0x00 + 0x00 0x00 | 0x00 0x00 0x00 0xFF + */ + low_msbs, high_msbs); + int32_t bits_packed = + libcrux_intrinsics_avx2_mm_movemask_epi8(/* Now that every element is + either 0xFF or 0x00, we just + extract the most significant + bit from each element and + collate them into two bytes. + */ + msbs); uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)}; memcpy(ret, result, (size_t)2U * sizeof(uint8_t)); } @@ -659,18 +714,63 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s( int16_t a, int16_t b) { - __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16( - b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); - __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16( - coefficients, libcrux_intrinsics_avx2_mm256_set_epi16( - (int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U, - (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U, - (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U, - (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U, - (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U, - (int16_t)-32768)); - return libcrux_intrinsics_avx2_mm256_srli_epi16((int32_t)15, - coefficients_in_msb, __m256i); + __m256i coefficients = + libcrux_intrinsics_avx2_mm256_set_epi16(/* We need to take each bit from + the 2 bytes of input and put + them into their own 16-bit + lane. Ideally, we'd load the + two bytes into the vector, + duplicate them, and right-shift + the 0th element by 0 bits, the + first element by 1 bit, the + second by 2 bits and so on + before AND-ing with 0x1 to + leave only the least + signifinicant bit. But since + |_mm256_srlv_epi16| does not + exist, so we have to resort to + a workaround. Rather than + shifting each element by a + different amount, we'll + multiply each element by a + value such that the bit we're + interested in becomes the most + significant bit. The + coefficients are loaded as + follows: */ + b, b, b, b, b, b, b, b, a, a, a, + a, a, a, a, a); + __m256i coefficients_in_msb = + libcrux_intrinsics_avx2_mm256_mullo_epi16(/* And this vector, when + multiplied with the previous + one, ensures that the bit + we'd like to keep in each + lane becomes the most + significant bit upon + multiplication. */ + coefficients, + libcrux_intrinsics_avx2_mm256_set_epi16( + (int16_t)1 << 8U, + (int16_t)1 << 9U, + (int16_t)1 << 10U, + (int16_t)1 << 11U, + (int16_t)1 << 12U, + (int16_t)1 << 13U, + (int16_t)1 << 14U, + (int16_t)-32768, + (int16_t)1 << 8U, + (int16_t)1 << 9U, + (int16_t)1 << 10U, + (int16_t)1 << 11U, + (int16_t)1 << 12U, + (int16_t)1 << 13U, + (int16_t)1 << 14U, + (int16_t)-32768)); + return libcrux_intrinsics_avx2_mm256_srli_epi16( + (int32_t)15, + /* Now that they're all in the most significant bit position, shift them + down to the least significant bit. */ + coefficients_in_msb, __m256i); } KRML_ATTRIBUTE_TARGET("avx2") @@ -685,7 +785,23 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( - Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index( + bytes, + /* We need to take each bit from the 2 bytes of input and put them + into their own 16-bit lane. Ideally, we'd load the two bytes into + the vector, duplicate them, and right-shift the 0th element by 0 + bits, the first element by 1 bit, the second by 2 bits and so on + before AND-ing with 0x1 to leave only the least signifinicant bit. + But since |_mm256_srlv_epi16| does not exist, so we have to resort + to a workaround. Rather than shifting each element by a different + amount, we'll multiply each element by a value such that the bit + we're interested in becomes the most significant bit. The + coefficients are loaded as follows: And this vector, when + multiplied with the previous one, ensures that the bit we'd like to + keep in each lane becomes the most significant bit upon + multiplication. Now that they're all in the most significant bit + position, shift them down to the least significant bit. */ + (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *)); } @@ -721,23 +837,70 @@ static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4( __m256i vector, uint8_t ret[8U]) { uint8_t serialized[16U] = {0U}; __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector); - __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi8( - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0)); - __m256i combined = libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32( - adjacent_8_combined, libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( + 4U, + /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D | + 0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be + laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA + 0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */ + vector); + __m256i adjacent_8_combined = + libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* Recall that + |adjacent_2_combined| goes + as follows: 0x00_00_00_BA + 0x00_00_00_DC | + 0x00_00_00_FE 0x00_00_00_HG + | ... Out of this, we only + need the first byte, the 4th + byte, the 8th byte and so on + from the bottom and the top + 128 bits. */ + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi8( + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)8, + (int8_t)4, (int8_t)0, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)8, + (int8_t)4, (int8_t)0)); + __m256i combined = + libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32(/* |adjacent_8_combined| + looks like this: 0: + 0xHG_FE_DC_BA 1: + 0x00_00_00_00 | 2: + 0x00_00_00_00 3: + 0x00_00_00_00 | 4: + 0xPO_NM_LK_JI .... + We put the element + at 4 after the + element at 0 ... */ + adjacent_8_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, + (int32_t)0, + (int32_t)0, + (int32_t)0, + (int32_t)0, + (int32_t)0, + (int32_t)4, + (int32_t)0)); __m128i combined0 = libcrux_intrinsics_avx2_mm256_castsi256_si128(combined); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( - Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0); + Eurydice_array_to_slice( + (size_t)16U, + /* ... so that we can read them out in one go. */ serialized, + uint8_t), + combined0); uint8_t ret0[8U]; Result_15 dst; Eurydice_slice_to_array2( @@ -763,8 +926,33 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5, int16_t b6, int16_t b7) { - __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16( - b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, b0); + __m256i coefficients = + libcrux_intrinsics_avx2_mm256_set_epi16(/* Every 4 bits from each byte of + input should be put into its + own 16-bit lane. Since + |_mm256_srlv_epi16| does not + exist, we have to resort to a + workaround. Rather than + shifting each element by a + different amount, we'll + multiply each element by a + value such that the bits we're + interested in become the most + significant bits (of an 8-bit + value). In this lane, the 4 + bits we need to put are already + the most significant bits of + |bytes[7]| (that is, b7). */ + b7, + /* In this lane, the 4 bits we + need to put are the least + significant bits, so we need to + shift the 4 least-significant + bits of |b7| to the most + significant bits (of an 8-bit + value). */ + b7, b6, b6, b5, b5, b4, b4, b3, + b3, b2, b2, b1, b1, b0, b0); __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16( coefficients, libcrux_intrinsics_avx2_mm256_set_epi16( (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, @@ -774,10 +962,14 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U)); __m256i coefficients_in_lsb = libcrux_intrinsics_avx2_mm256_srli_epi16( - (int32_t)4, coefficients_in_msb, __m256i); + (int32_t)4, + /* Once the 4-bit coefficients are in the most significant positions (of + an 8-bit value), shift them all down by 4. */ + coefficients_in_msb, __m256i); return libcrux_intrinsics_avx2_mm256_and_si256( - coefficients_in_lsb, libcrux_intrinsics_avx2_mm256_set1_epi16( - ((int16_t)1 << 4U) - (int16_t)1)); + /* Zero the remaining bits. */ coefficients_in_lsb, + libcrux_intrinsics_avx2_mm256_set1_epi16(((int16_t)1 << 4U) - + (int16_t)1)); } KRML_ATTRIBUTE_TARGET("avx2") @@ -794,7 +986,23 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( - Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index( + bytes, + /* Every 4 bits from each byte of input should be put into its own + 16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to + resort to a workaround. Rather than shifting each element by a + different amount, we'll multiply each element by a value such that + the bits we're interested in become the most significant bits (of + an 8-bit value). In this lane, the 4 bits we need to put are + already the most significant bits of |bytes[7]| (that is, b7). In + this lane, the 4 bits we need to put are the least significant + bits, so we need to shift the 4 least-significant bits of |b7| to + the most significant bits (of an 8-bit value). These constants are + chosen to shift the bits of the values that we loaded into + |coefficients|. Once the 4-bit coefficients are in the most + significant positions (of an 8-bit value), shift them all down + by 4. Zero the remaining bits. */ + (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *), @@ -818,35 +1026,106 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5( __m256i vector, uint8_t ret[10U]) { uint8_t serialized[32U] = {0U}; - __m256i adjacent_2_combined = libcrux_intrinsics_avx2_mm256_madd_epi16( - vector, libcrux_intrinsics_avx2_mm256_set_epi16( - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1)); - __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32( - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, (int32_t)0, - (int32_t)22, (int32_t)0, (int32_t)22)); + __m256i adjacent_2_combined = + libcrux_intrinsics_avx2_mm256_madd_epi16(/* If |vector| is laid out as + follows (superscript number + indicates the corresponding + bit is duplicated that many + times): 0¹¹a₄a₃a₂a₁a₀ + 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀ + 0¹¹d₄d₃d₂d₁d₀ | ↩ + 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀ + 0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | + ↩ |adjacent_2_combined| will + be laid out as a series of + 32-bit integers, as follows: + 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ + 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ + 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ + 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ + .... */ + vector, + libcrux_intrinsics_avx2_mm256_set_epi16( + (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, + (int16_t)1)); + __m256i adjacent_4_combined = + libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Recall that + |adjacent_2_combined| is laid + out as follows: + 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ + 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ + 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ + 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ + .... This shift results in: + b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² + 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ + f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² + 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ + .... */ + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)22, + (int32_t)0, (int32_t)22, + (int32_t)0, (int32_t)22, + (int32_t)0, (int32_t)22)); __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64( - (int32_t)22, adjacent_4_combined, __m256i); + (int32_t)22, + /* |adjacent_4_combined|, when viewed as 64-bit lanes, is: + 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩ + 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift + down by 22 bits to remove the least significant 0 bits that aren't part + of the bits we need. */ + adjacent_4_combined, __m256i); __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi32( - (int32_t)8, adjacent_4_combined0, __m256i); - __m256i adjacent_8_combined0 = libcrux_intrinsics_avx2_mm256_sllv_epi32( - adjacent_8_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, (int32_t)0, - (int32_t)0, (int32_t)0, (int32_t)12)); + (int32_t)8, + /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks + like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³² + 2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to + read out the bytes in one go, we need to shifts the bits in position 2 + to position 1 in each 128-bit lane. */ + adjacent_4_combined0, __m256i); + __m256i adjacent_8_combined0 = + libcrux_intrinsics_avx2_mm256_sllv_epi32(/* |adjacent_8_combined|, when + viewed as a set of 32-bit + values, now looks like: + 0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ + 0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ + 0³² 0³² | ↩ Once again, we + line these bits up by shifting + the up values at indices 0 and + 5 by 12, viewing the resulting + register as a set of 64-bit + values, and then shifting down + the 64-bit values by 12 bits. + */ + adjacent_8_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)12)); __m256i adjacent_8_combined1 = libcrux_intrinsics_avx2_mm256_srli_epi64( (int32_t)12, adjacent_8_combined0, __m256i); __m128i lower_8 = - libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined1); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 40 bits + starting at position 0 in + the lower 128-bit lane, + ... */ + adjacent_8_combined1); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t), lower_8); __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, adjacent_8_combined1, __m128i); + (int32_t)1, + /* ... and the second 40 bits at position 0 in the upper 128-bit lane */ + adjacent_8_combined1, __m128i); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t), upper_8); @@ -952,27 +1231,87 @@ static inline core_core_arch_x86___m128i_x2 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( __m256i vector) { __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector); - __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32( - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, (int32_t)0, - (int32_t)12, (int32_t)0, (int32_t)12)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( + 10U, + /* If |vector| is laid out as follows (superscript number indicates + the corresponding bit is duplicated that many times): + 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ + 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩ + 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ + 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ... + |adjacent_2_combined| will be laid out as a series of 32-bit + integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ + 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */ + vector); + __m256i adjacent_4_combined = + libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Shifting up the values at the + even indices by 12, we get: + b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ + | ↩ + f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ + | ↩ ... */ + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)12)); __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64( - (int32_t)12, adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - adjacent_4_combined0, - libcrux_intrinsics_avx2_mm256_set_epi8( - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, - (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0)); + (int32_t)12, + /* Viewing this as a set of 64-bit integers we get: + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + | ↩ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + | ↩ ... Shifting down by 12 gives us: + 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + | ↩ + 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + | ↩ ... */ + adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = + libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* |adjacent_4_combined|, when + the bottom and top 128 + bit-lanes are grouped into + bytes, looks like: + 0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ + 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ + In each 128-bit lane, we + want to put bytes 8, 9, 10, + 11, 12 after bytes 0, 1, 2, + 3 to allow for sequential + reading. */ + adjacent_4_combined0, + libcrux_intrinsics_avx2_mm256_set_epi8( + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)11, + (int8_t)10, (int8_t)9, + (int8_t)8, (int8_t)4, + (int8_t)3, (int8_t)2, + (int8_t)1, (int8_t)0, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)11, + (int8_t)10, (int8_t)9, + (int8_t)8, (int8_t)4, + (int8_t)3, (int8_t)2, + (int8_t)1, (int8_t)0)); __m128i lower_8 = - libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 64 bits + starting at position 0 in + the lower 128-bit lane, + ... */ + adjacent_8_combined); __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, adjacent_8_combined, __m128i); + (int32_t)1, + /* and 64 bits starting at position 0 in the upper 128-bit lane. */ + adjacent_8_combined, __m128i); return ( CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8}); } @@ -981,8 +1320,167 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10( __m256i vector, uint8_t ret[20U]) { core_core_arch_x86___m128i_x2 uu____0 = - libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( - vector); + libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If + |vector| + is + laid + out + as + follows + (superscript + number + indicates + the + corresponding + bit + is + duplicated + that + many + times): + 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ + 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ + 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ + | ↩ + 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ + 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ + 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ + | ↩ + ... + |adjacent_2_combined| + will + be + laid + out + as a + series + of + 32-bit + integers, + as + follows: + 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ + | ↩ + 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ + | ↩ + .... + Shifting + up + the + values + at + the + even + indices + by + 12, + we + get: + b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ + | ↩ + f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ + | ↩ + ... + Viewing + this + as a + set + of + 64-bit + integers + we + get: + 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² + | ↩ + 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² + | ↩ + ... + Shifting + down + by + 12 + gives + us: + 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ + | ↩ + 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ + | ↩ + ... + |adjacent_4_combined|, + when + the + bottom + and + top + 128 + bit-lanes + are + grouped + into + bytes, + looks + like: + 0₇0₆0₅B₄B₃B₂B₁B₀ + | ↩ + 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ + | ↩ + In + each + 128-bit + lane, + we + want + to + put + bytes + 8, + 9, + 10, + 11, + 12 + after + bytes + 0, + 1, + 2, 3 + to + allow + for + sequential + reading. + We + now + have + 64 + bits + starting + at + position + 0 in + the + lower + 128-bit + lane, + ... + and + 64 + bits + starting + at + position + 0 in + the + upper + 128-bit + lane. + */ + vector); __m128i lower_8 = uu____0.fst; __m128i upper_8 = uu____0.snd; uint8_t serialized[32U] = {0U}; @@ -1038,16 +1536,20 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( (int16_t)1 << 6U)); __m256i coefficients1 = libcrux_intrinsics_avx2_mm256_srli_epi16( (int32_t)6, coefficients0, __m256i); - return libcrux_intrinsics_avx2_mm256_and_si256( - coefficients1, libcrux_intrinsics_avx2_mm256_set1_epi16( - ((int16_t)1 << 10U) - (int16_t)1)); + return libcrux_intrinsics_avx2_mm256_and_si256(/* Here I can prove this `and` + is not useful */ + coefficients1, + libcrux_intrinsics_avx2_mm256_set1_epi16( + ((int16_t)1 << 10U) - + (int16_t)1)); } KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) { - Eurydice_slice lower_coefficients = - Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t); + Eurydice_slice lower_coefficients = Eurydice_slice_subslice2( + /* Here I can prove this `and` is not useful */ bytes, (size_t)0U, + (size_t)16U, uint8_t); Eurydice_slice upper_coefficients = Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t); return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( @@ -1233,28 +1735,70 @@ libcrux_ml_kem_vector_avx2_sampling_rejection_sample(Eurydice_slice input, __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16( LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i potential_coefficients = - libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input); + libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can + be interpreted as a + sequence of + serialized 12-bit + (i.e. uncompressed) + coefficients. Not + all coefficients + may be less than + FIELD_MODULUS + though. */ + input); __m256i compare_with_field_modulus = - libcrux_intrinsics_avx2_mm256_cmpgt_epi16(field_modulus, + libcrux_intrinsics_avx2_mm256_cmpgt_epi16(/* Suppose we view + |potential_coefficients| as + follows (grouping 64-bit + elements): A B C D | E F G H + | .... and A < 3329, D < 3329 + and H < 3329, + |compare_with_field_modulus| + will look like: 0xFF 0 0 0xFF + | 0 0 0 0xFF | ... */ + field_modulus, potential_coefficients); uint8_t good[2U]; - libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus, + libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each + lane is either 0 or 1, we + only need one bit from + each lane in the register + to tell us what + coefficients to keep and + what to throw-away. + Combine all the bits + (there are 16) into two + bytes. */ + compare_with_field_modulus, good); uint8_t lower_shuffles[16U]; memcpy(lower_shuffles, + /* Each bit (and its corresponding position) represents an element we + want to sample. We'd like all such elements to be next to each other + starting at index 0, so that they can be read from the vector + easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level + shuffling indices needed to make this happen. For e.g. if good[0] = + 0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit + lane to the first. To do this, we need the byte-level shuffle + indices to be 2 3 X X X X ... */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[0U]], (size_t)16U * sizeof(uint8_t)); - __m128i lower_shuffles0 = libcrux_intrinsics_avx2_mm_loadu_si128( - Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t)); + __m128i lower_shuffles0 = + libcrux_intrinsics_avx2_mm_loadu_si128(Eurydice_array_to_slice( + (size_t)16U, + /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles, + uint8_t)); __m128i lower_coefficients = libcrux_intrinsics_avx2_mm256_castsi256_si128(potential_coefficients); __m128i lower_coefficients0 = libcrux_intrinsics_avx2_mm_shuffle_epi8( lower_coefficients, lower_shuffles0); - libcrux_intrinsics_avx2_mm_storeu_si128(output, lower_coefficients0); + libcrux_intrinsics_avx2_mm_storeu_si128( + /* ... then write them out ... */ output, lower_coefficients0); size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]); uint8_t upper_shuffles[16U]; memcpy(upper_shuffles, + /* Do the same for |goood[1]| */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[1U]], (size_t)16U * sizeof(uint8_t)); @@ -1435,7 +1979,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)10); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- */ + vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1445,11 +1991,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)10, decompressed_low1, __m256i); + (int32_t)10, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1459,13 +2009,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)10, decompressed_high1, __m256i); + (int32_t)10, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - decompressed_low3, decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + decompressed_low3, + decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -1531,7 +2097,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)11); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- */ + vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1541,11 +2109,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)11, decompressed_low1, __m256i); + (int32_t)11, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1555,13 +2127,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)11, decompressed_high1, __m256i); + (int32_t)11, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - decompressed_low3, decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + decompressed_low3, + decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -1665,7 +2253,13 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer, size_t _initial_coefficient_bound) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -1676,7 +2270,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61( libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 = libcrux_ml_kem_ntt_ntt_layer_int_vec_step_61( re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); __m256i x = uu____0.fst; __m256i y = uu____0.snd; re->coefficients[j] = x; @@ -1699,8 +2293,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_3_61( size_t round = i; zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_3_step_09( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); } } @@ -1718,8 +2311,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_2_61( size_t round = i; zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_2_step_09( - re->coefficients[round], libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U)); zeta_i[0U] = zeta_i[0U] + (size_t)1U; } } @@ -1738,10 +2331,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_1_61( size_t round = i; zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_ntt_layer_1_step_09( - re->coefficients[round], libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U)); zeta_i[0U] = zeta_i[0U] + (size_t)3U; } } @@ -1761,7 +2354,11 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]); @@ -1858,7 +2455,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)4); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- */ + vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1868,11 +2467,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)4, decompressed_low1, __m256i); + (int32_t)4, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1882,13 +2485,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)4, decompressed_high1, __m256i); + (int32_t)4, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - decompressed_low3, decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + decompressed_low3, + decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -1949,7 +2568,9 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)5); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- */ + vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1959,11 +2580,15 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)5, decompressed_low1, __m256i); + (int32_t)5, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1973,13 +2598,29 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)5, decompressed_high1, __m256i); + (int32_t)5, + /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of + support for const generic expressions. */ + decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - decompressed_low3, decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + decompressed_low3, + decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -2062,13 +2703,13 @@ libcrux_ml_kem_polynomial_ntt_multiply_ef_61( size_t i0 = i; out.coefficients[i0] = libcrux_ml_kem_vector_avx2_ntt_multiply_09( &self->coefficients[i0], &rhs->coefficients[i0], - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)3U)); + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)1U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)2U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)3U)); } return out; } @@ -2089,9 +2730,14 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, self->coefficients, __m256i), - __m256i); + i < + Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, + /* The semicolon and parentheses at the end of + loop are a workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -2114,11 +2760,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_61( zeta_i[0U] = zeta_i[0U] - (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_inv_ntt_layer_1_step_09( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U)); zeta_i[0U] = zeta_i[0U] - (size_t)3U; } } @@ -2138,9 +2783,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_61( zeta_i[0U] = zeta_i[0U] - (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_avx2_inv_ntt_layer_2_step_09( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U)); zeta_i[0U] = zeta_i[0U] - (size_t)1U; } } @@ -2161,7 +2805,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_61( re->coefficients[round] = libcrux_ml_kem_vector_avx2_inv_ntt_layer_3_step_09( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); } } @@ -2196,7 +2840,13 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2209,7 +2859,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_61( libcrux_ml_kem_vector_avx2_SIMD256Vector_x2 uu____0 = libcrux_ml_kem_invert_ntt_inv_ntt_layer_int_vec_step_reduce_61( re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); __m256i x = uu____0.fst; __m256i y = uu____0.snd; re->coefficients[j] = x; @@ -2228,7 +2878,10 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_61(&zeta_i, re, (size_t)1U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_61(&zeta_i, re, (size_t)2U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_61(&zeta_i, re, (size_t)3U); @@ -2423,11 +3076,16 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_2f( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(ciphertext, u_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(/* u := + Decompress_q(Decode_{d_u}(c), + d_u) */ + ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_ed( - Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, - (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)1088U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = libcrux_ml_kem_matrix_compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt); @@ -2450,7 +3108,8 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_2f( Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(secret_key, secret_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab( + /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U]; memcpy( @@ -2999,6 +3658,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_6c( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -3065,7 +3728,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_6c( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -3087,12 +3750,15 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_fa( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -3362,7 +4028,12 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; i < step; i++) { + for (size_t i = (size_t)0U; + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + step; + i++) { size_t j = i; __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09( re->coefficients[j + step], (int16_t)-1600); @@ -3383,7 +4054,10 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { - libcrux_ml_kem_ntt_ntt_at_layer_7_61(re); + libcrux_ml_kem_ntt_ntt_at_layer_7_61(/* Due to the small coefficient bound, we + can skip the first round of Montgomery + reductions. */ + re); size_t zeta_i = (size_t)1U; libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U, (size_t)11207U); @@ -3594,7 +4268,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( @@ -3717,8 +4395,26 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_61( __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( result.coefficients[i0], (int16_t)1441); - __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0], - &message->coefficients[i0]); + __m256i tmp = libcrux_ml_kem_vector_avx2_add_09( + self->coefficients + [/* FIXME: Eurydice crashes with: Warning 11: in top-level + declaration + libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: + this expression is not Low*; the enclosing function cannot be + translated into C*: let mutable ret(Mark.Present,(Mark.AtMost + 2), ): int16_t[16size_t] = $any in + libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add + ((@9: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] + &(((@8: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) + @0; @0 Warning 11 is fatal, exiting. On the following code: + ```rust result.coefficients[i] = + Vector::barrett_reduce(Vector::add( coefficient_normal_form, + &Vector::add(self.coefficients[i], &message.coefficients[i]), + )); ``` */ + i0], + &message->coefficients[i0]); __m256i tmp0 = libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp); result.coefficients[i0] = @@ -3776,9 +4472,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- Take + the bottom 128 bits, i.e. + the first 8 16-bit + coefficients */ + vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] + = A + coefficients_low[16:31] = + B coefficients_low[32:63] + = C and so on ... after + this step: + coefficients_low[0:31] = A + coefficients_low[32:63] = + B and so on ... */ + coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)10, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -3787,11 +4497,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, compressed_low1, __m256i); + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -3805,10 +4521,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - compressed_low3, compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + compressed_low3, + compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -3872,9 +4601,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- Take + the bottom 128 bits, i.e. + the first 8 16-bit + coefficients */ + vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] + = A + coefficients_low[16:31] = + B coefficients_low[32:63] + = C and so on ... after + this step: + coefficients_low[0:31] = A + coefficients_low[32:63] = + B and so on ... */ + coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)11, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -3883,11 +4626,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, compressed_low1, __m256i); + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -3901,10 +4650,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - compressed_low3, compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + compressed_low3, + compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -4020,9 +4782,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- Take + the bottom 128 bits, i.e. + the first 8 16-bit + coefficients */ + vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] + = A + coefficients_low[16:31] = + B coefficients_low[32:63] + = C and so on ... after + this step: + coefficients_low[0:31] = A + coefficients_low[32:63] = + B and so on ... */ + coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)4, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4031,11 +4807,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, compressed_low1, __m256i); + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4049,10 +4831,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - compressed_low3, compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + compressed_low3, + compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -4083,7 +4878,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; __m256i coefficient = libcrux_ml_kem_vector_avx2_compress_09_d1( libcrux_ml_kem_serialize_to_unsigned_field_modulus_61( @@ -4115,9 +4914,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 + coefficients ---- Take + the bottom 128 bits, i.e. + the first 8 16-bit + coefficients */ + vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] + = A + coefficients_low[16:31] = + B coefficients_low[32:63] + = C and so on ... after + this step: + coefficients_low[0:31] = A + coefficients_low[32:63] = + B and so on ... */ + coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)5, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4126,11 +4939,17 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, compressed_low1, __m256i); + (int32_t)3, + /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we + just need to shift right by 35 - 32 = 3 more. */ + compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, vector, __m128i); + (int32_t)1, + /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, + i.e. the next 8 16-bit coefficients */ + vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4144,10 +4963,23 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( - compressed_low3, compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, - compressed, __m256i); + __m256i compressed = + libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping + each set of 64-bits, this + function results in: 0: low + low low low | 1: high high + high high | 2: low low low + low | 3: high high high high + where each |low| and |high| + is a 16-bit element */ + compressed_low3, + compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( + (int32_t)216, + /* To be in the right order, we need to move the |low|s above in position + 2 to position 1 and the |high|s in position 1 to position 2, and leave + the rest unchanged. */ + compressed, __m256i); } /** @@ -4178,7 +5010,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; __m256i coefficients = libcrux_ml_kem_vector_avx2_compress_09_f4( libcrux_ml_kem_vector_traits_to_unsigned_representative_61( @@ -4270,7 +5106,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -4283,6 +5122,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_230 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_b4( copy_of_prf_input, domain_separator0); @@ -4291,7 +5131,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; libcrux_ml_kem_hash_functions_avx2_PRF_a9_410( Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -4299,10 +5139,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( libcrux_ml_kem_sampling_sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U]; - libcrux_ml_kem_matrix_compute_vector_u_ab(public_key->A, r_as_ntt, error_1, + libcrux_ml_kem_matrix_compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ + public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = libcrux_ml_kem_serialize_deserialize_then_decompress_message_61( @@ -4312,12 +5154,14 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_ind_cpa_compress_then_serialize_u_8c( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_ed( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -4460,17 +5304,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_a1( Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), ciphertext, implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - libcrux_ml_kem_variant_kdf_d8_ae(shared_secret0, ciphertext, shared_secret1); uint8_t shared_secret[32U]; + libcrux_ml_kem_variant_kdf_d8_ae(shared_secret0, ciphertext, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_80(ciphertext), Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -4810,11 +5654,18 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_traits_to_standard_domain_61( - self->coefficients[j]); + self->coefficients[/* The coefficients are of the form aR^{-1} mod + q, which means calling to_montgomery_domain() + on them should return a mod q. */ + j]); self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09( libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &error->coefficients[j])); @@ -4845,6 +5696,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_ab( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = libcrux_ml_kem_polynomial_ZERO_ef_61(); t_as_ntt[i0] = uu____0; @@ -4925,7 +5778,9 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) + := G(d || K) for ML-KEM */ + key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -4958,8 +5813,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_matrix_compute_As_plus_e_ab( - public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, - error_as_ntt); + /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -5095,12 +5950,18 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_8c( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key) { uint8_t public_key_serialized[1184U]; - libcrux_ml_kem_ind_cpa_serialize_public_key_ed( - public_key->t_as_ntt, - Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), - public_key_serialized); + libcrux_ml_kem_ind_cpa_serialize_public_key_ed(/* pk := (Encode_12(tˆ + mod^{+}q) || ρ) */ + public_key->t_as_ntt, + Eurydice_array_to_slice( + (size_t)32U, + public_key->seed_for_A, + uint8_t), + public_key_serialized); uint8_t secret_key_serialized[1152U]; - libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(private_key->secret_as_ntt, + libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(/* sk := Encode_12(sˆ mod^{+}q) + */ + private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; @@ -5442,17 +6303,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_a10( Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), ciphertext, implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - libcrux_ml_kem_variant_kdf_33_ae(shared_secret0, ciphertext, shared_secret1); uint8_t shared_secret[32U]; + libcrux_ml_kem_variant_kdf_33_ae(shared_secret0, ciphertext, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_80(ciphertext), Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -5772,7 +6633,9 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_33_be(key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_33_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) + := G(d || K) for ML-KEM */ + key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -5805,8 +6668,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_matrix_compute_As_plus_e_ab( - public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, - error_as_ntt); + /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -5971,7 +6834,10 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; libcrux_ml_kem_hash_functions_avx2_H_a9_e0( - Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U, + Eurydice_array_to_subslice2(/* Eurydice can't access values directly on + the types. We need to go to the `value` + directly. */ + private_key->value, (size_t)384U * (size_t)3U, (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); @@ -6931,6 +7797,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_b3( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -6998,7 +7868,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_b3( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -7021,12 +7891,15 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_bf( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -7061,7 +7934,10 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_e2( Eurydice_slice ind_cpa_public_key_hash = uu____0.thd; Eurydice_slice implicit_rejection_value = uu____0.f3; Eurydice_slice uu____1 = Eurydice_array_to_slice( - (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt, + (size_t)3U, + /* XXX: We need to copy_from_slice here because karamel can't handle the + assignment cf. https://github.com/FStarLang/karamel/pull/491 */ + key_pair->private_key.ind_cpa_private_key.secret_as_ntt, libcrux_ml_kem_polynomial_PolynomialRingElement_f6); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 ret[3U]; libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(ind_cpa_secret_key, ret); diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h index 519b51565..7a9446452 100644 --- a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h +++ b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 */ #ifndef __libcrux_mlkem768_portable_H @@ -87,7 +87,7 @@ static const int16_t libcrux_ml_kem_polynomial_ZETAS_TIMES_MONTGOMERY_R[128U] = (int16_t)-108, (int16_t)-308, (int16_t)996, (int16_t)991, (int16_t)958, (int16_t)-1460, (int16_t)1522, (int16_t)1628}; -static KRML_MUSTINLINE int16_t libcrux_ml_kem_polynomial_get_zeta(size_t i) { +static KRML_MUSTINLINE int16_t libcrux_ml_kem_polynomial_zeta(size_t i) { return libcrux_ml_kem_polynomial_ZETAS_TIMES_MONTGOMERY_R[i]; } @@ -1235,11 +1235,28 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( static inline uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient( uint16_t fe) { - int16_t shifted = (int16_t)1664 - (int16_t)fe; - int16_t mask = shifted >> 15U; + int16_t shifted = + (int16_t)1664 - + (int16_t) /* The approach used here is inspired by: + https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150 + If 833 <= fe <= 2496, then -832 <= shifted <= 831 */ + fe; + int16_t mask = + /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) = + -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive + <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so + if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */ + shifted + + >> 15U; int16_t shifted_to_positive = mask ^ shifted; int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832; - int16_t r0 = shifted_positive_in_range >> 15U; + int16_t r0 = + /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the + most significant bit of shifted_positive_in_range will be 1. */ + shifted_positive_in_range + + >> 15U; int16_t r1 = r0 & (int16_t)1; return (uint8_t)r1; } @@ -1276,7 +1293,16 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits( static inline int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient( uint8_t coefficient_bits, uint16_t fe) { - uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits; + uint64_t compressed = + (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits + == 5 || coefficient_bits == 10 || coefficient_bits == 11 ); + hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to + be constant time due to: + https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ + */ + fe + + << (uint32_t)coefficient_bits; compressed = compressed + 1664ULL; compressed = compressed * 10321340ULL; compressed = compressed >> 35U; @@ -2878,7 +2904,13 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer, size_t _initial_coefficient_bound) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2889,7 +2921,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 = libcrux_ml_kem_ntt_ntt_layer_int_vec_step_8c( re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst; libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd; re->coefficients[j] = x; @@ -2913,7 +2945,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_3_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_ntt_layer_3_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); re->coefficients[round] = uu____0; } } @@ -2932,9 +2964,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_2_8c( zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_portable_ntt_layer_2_step_0d( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U)); zeta_i[0U] = zeta_i[0U] + (size_t)1U; } } @@ -2953,11 +2984,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_1_8c( zeta_i[0U] = zeta_i[0U] + (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_portable_ntt_layer_1_step_0d( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] + (size_t)3U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] + (size_t)3U)); zeta_i[0U] = zeta_i[0U] + (size_t)3U; } } @@ -2976,7 +3006,11 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( @@ -3222,13 +3256,13 @@ libcrux_ml_kem_polynomial_ntt_multiply_ef_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_ntt_multiply_0d( &self->coefficients[i0], &rhs->coefficients[i0], - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta((size_t)64U + (size_t)4U * i0 + - (size_t)3U)); + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)1U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)2U), + libcrux_ml_kem_polynomial_zeta((size_t)64U + (size_t)4U * i0 + + (size_t)3U)); out.coefficients[i0] = uu____0; } return out; @@ -3251,7 +3285,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_1b( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, self->coefficients, + (size_t)16U, + /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -3277,11 +3315,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_8c( zeta_i[0U] = zeta_i[0U] - (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_portable_inv_ntt_layer_1_step_0d( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)2U), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)3U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)2U), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)3U)); zeta_i[0U] = zeta_i[0U] - (size_t)3U; } } @@ -3300,9 +3337,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_8c( zeta_i[0U] = zeta_i[0U] - (size_t)1U; re->coefficients[round] = libcrux_ml_kem_vector_portable_inv_ntt_layer_2_step_0d( - re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U]), - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U] - (size_t)1U)); + re->coefficients[round], libcrux_ml_kem_polynomial_zeta(zeta_i[0U]), + libcrux_ml_kem_polynomial_zeta(zeta_i[0U] - (size_t)1U)); zeta_i[0U] = zeta_i[0U] - (size_t)1U; } } @@ -3322,7 +3358,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_inv_ntt_layer_3_step_0d( re->coefficients[round], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); re->coefficients[round] = uu____0; } } @@ -3360,7 +3396,13 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { + for (size_t i0 = (size_t)0U; + i0 < (size_t)128U >> + (uint32_t) /* The semicolon and parentheses at the end of loop are a + workaround for the following bug + https://github.com/hacspec/hax/issues/720 */ + layer; + i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3373,7 +3415,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_8c( libcrux_ml_kem_vector_portable_vector_type_PortableVector_x2 uu____0 = libcrux_ml_kem_invert_ntt_inv_ntt_layer_int_vec_step_reduce_8c( re->coefficients[j], re->coefficients[j + step_vec], - libcrux_ml_kem_polynomial_get_zeta(zeta_i[0U])); + libcrux_ml_kem_polynomial_zeta(zeta_i[0U])); libcrux_ml_kem_vector_portable_vector_type_PortableVector x = uu____0.fst; libcrux_ml_kem_vector_portable_vector_type_PortableVector y = uu____0.snd; re->coefficients[j] = x; @@ -3391,7 +3433,10 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_1b( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; + /* We only ever call this function after matrix/vector multiplication */ + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT + + / (size_t)2U; libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_8c(&zeta_i, re, (size_t)1U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_8c(&zeta_i, re, (size_t)2U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_8c(&zeta_i, re, (size_t)3U); @@ -3595,11 +3640,16 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_42( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(ciphertext, u_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(/* u := + Decompress_q(Decode_{d_u}(c), + d_u) */ + ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_89( - Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, - (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from( + (size_t)1088U, + /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ + ciphertext, (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = libcrux_ml_kem_matrix_compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt); @@ -3621,7 +3671,8 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_42( Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(secret_key, secret_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b( + /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U]; memcpy( @@ -4156,6 +4207,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_2b( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_89( copy_of_randomness0, sampled_coefficients, out); + /* Requiring more than 5 blocks to sample a ring element should be very + * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid + * failing here, we squeeze more blocks out of the state until we have enough. + */ while (true) { if (done) { break; @@ -4222,7 +4277,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_2b( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (transpose) { + if (/* A[i][j] = A_transpose[j][i] */ transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -4244,12 +4299,15 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_3f( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) { - Eurydice_slice uu____0 = - Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = Eurydice_slice_subslice_to( + /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_1b( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 + do for j from 0 to k − 1 do AˆT[i][j] := + Parse(XOF(ρ, i, j)) end for end for */ + public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4497,7 +4555,12 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; i < step; i++) { + for (size_t i = (size_t)0U; + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + step; + i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector t = libcrux_ml_kem_vector_portable_multiply_by_constant_0d( @@ -4519,7 +4582,10 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { - libcrux_ml_kem_ntt_ntt_at_layer_7_8c(re); + libcrux_ml_kem_ntt_ntt_at_layer_7_8c(/* Due to the small coefficient bound, we + can skip the first round of Montgomery + reductions. */ + re); size_t zeta_i = (size_t)1U; libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U, (size_t)11207U); @@ -4726,7 +4792,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = @@ -4858,8 +4928,28 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_8c( libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( result.coefficients[i0], (int16_t)1441); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp = - libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0], - &message->coefficients[i0]); + libcrux_ml_kem_vector_portable_add_0d( + self->coefficients + [/* FIXME: Eurydice crashes with: Warning 11: in + top-level declaration + libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: + this expression is not Low*; the enclosing + function cannot be translated into C*: let + mutable ret(Mark.Present,(Mark.AtMost 2), ): + int16_t[16size_t] = $any in + libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add + ((@9: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] + &(((@8: + libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) + @0; @0 Warning 11 is fatal, exiting. On the + following code: ```rust result.coefficients[i] + = Vector::barrett_reduce(Vector::add( + coefficient_normal_form, + &Vector::add(self.coefficients[i], + &message.coefficients[i]), )); ``` */ + i0], + &message->coefficients[i0]); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 = libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = @@ -5116,7 +5206,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient = libcrux_ml_kem_vector_portable_compress_0d_d1( @@ -5176,7 +5270,11 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients = libcrux_ml_kem_vector_portable_compress_0d_f4( @@ -5268,7 +5366,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := + CBD{η1}(PRF(r, N)) N := N + 1 end + for rˆ := NTT(r) */ + randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -5281,6 +5382,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; + /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_23 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_3b( copy_of_prf_input, domain_separator0); @@ -5289,7 +5391,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = domain_separator; + prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; uint8_t prf_output[128U]; libcrux_ml_kem_hash_functions_portable_PRF_f1_410( Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -5297,10 +5399,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( libcrux_ml_kem_sampling_sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U]; - libcrux_ml_kem_matrix_compute_vector_u_1b(public_key->A, r_as_ntt, error_1, + libcrux_ml_kem_matrix_compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ + public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; + /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = libcrux_ml_kem_serialize_deserialize_then_decompress_message_8c( @@ -5310,12 +5414,14 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U]; + /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_ind_cpa_compress_then_serialize_u_43( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); + /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_6c( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -5456,17 +5562,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_62( Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), ciphertext, implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - libcrux_ml_kem_variant_kdf_d8_d6(shared_secret0, ciphertext, shared_secret1); uint8_t shared_secret[32U]; + libcrux_ml_kem_variant_kdf_d8_d6(shared_secret0, ciphertext, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_80(ciphertext), Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -5741,12 +5847,20 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { + i < + /* The semicolon and parentheses at the end of loop are a workaround for + the following bug https://github.com/hacspec/hax/issues/720 */ + LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; + i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = libcrux_ml_kem_vector_traits_to_standard_domain_8c( - self->coefficients[j]); + self->coefficients[/* The coefficients are of the form aR^{-1} + mod q, which means calling + to_montgomery_domain() on them should + return a mod q. */ + j]); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, @@ -5778,6 +5892,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_1b( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; + /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. + */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = libcrux_ml_kem_polynomial_ZERO_ef_8c(); t_as_ntt[i0] = uu____0; @@ -5857,7 +5973,9 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) + := G(d || K) for ML-KEM */ + key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -5890,8 +6008,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_matrix_compute_As_plus_e_1b( - public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, - error_as_ntt); + /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6023,12 +6141,18 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_43( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key) { uint8_t public_key_serialized[1184U]; - libcrux_ml_kem_ind_cpa_serialize_public_key_6c( - public_key->t_as_ntt, - Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), - public_key_serialized); + libcrux_ml_kem_ind_cpa_serialize_public_key_6c(/* pk := (Encode_12(tˆ + mod^{+}q) || ρ) */ + public_key->t_as_ntt, + Eurydice_array_to_slice( + (size_t)32U, + public_key->seed_for_A, + uint8_t), + public_key_serialized); uint8_t secret_key_serialized[1152U]; - libcrux_ml_kem_ind_cpa_serialize_secret_key_89(private_key->secret_as_ntt, + libcrux_ml_kem_ind_cpa_serialize_secret_key_89(/* sk := Encode_12(sˆ mod^{+}q) + */ + private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; @@ -6340,17 +6464,17 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cca_decapsulate_620( Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret0, uint8_t), ciphertext, implicit_rejection_shared_secret); - uint8_t shared_secret1[32U]; - libcrux_ml_kem_variant_kdf_33_d6(shared_secret0, ciphertext, shared_secret1); uint8_t shared_secret[32U]; + libcrux_ml_kem_variant_kdf_33_d6(shared_secret0, ciphertext, shared_secret); + uint8_t ret0[32U]; libcrux_ml_kem_constant_time_ops_compare_ciphertexts_select_shared_secret_in_constant_time( libcrux_ml_kem_types_as_ref_43_80(ciphertext), Eurydice_array_to_slice((size_t)1088U, expected_ciphertext, uint8_t), - Eurydice_array_to_slice((size_t)32U, shared_secret1, uint8_t), + Eurydice_array_to_slice((size_t)32U, shared_secret, uint8_t), Eurydice_array_to_slice((size_t)32U, implicit_rejection_shared_secret, uint8_t), - shared_secret); - memcpy(ret, shared_secret, (size_t)32U * sizeof(uint8_t)); + ret0); + memcpy(ret, ret0, (size_t)32U * sizeof(uint8_t)); } /** @@ -6609,7 +6733,9 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) + := G(d || K) for ML-KEM */ + key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6642,8 +6768,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_matrix_compute_As_plus_e_1b( - public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, - error_as_ntt); + /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6781,7 +6907,10 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; libcrux_ml_kem_hash_functions_portable_H_f1_e0( - Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U, + Eurydice_array_to_subslice2(/* Eurydice can't access values directly on + the types. We need to go to the `value` + directly. */ + private_key->value, (size_t)384U * (size_t)3U, (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); @@ -7599,7 +7728,10 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_df( Eurydice_slice ind_cpa_public_key_hash = uu____0.thd; Eurydice_slice implicit_rejection_value = uu____0.f3; Eurydice_slice uu____1 = Eurydice_array_to_slice( - (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt, + (size_t)3U, + /* XXX: We need to copy_from_slice here because karamel can't handle the + assignment cf. https://github.com/FStarLang/karamel/pull/491 */ + key_pair->private_key.ind_cpa_private_key.secret_as_ntt, libcrux_ml_kem_polynomial_PolynomialRingElement_1d); libcrux_ml_kem_polynomial_PolynomialRingElement_1d ret[3U]; libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(ind_cpa_secret_key, ret); diff --git a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h index a77bfdbea..5955882fa 100644 --- a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 */ #ifndef __libcrux_sha3_avx2_H @@ -104,7 +104,9 @@ libcrux_sha3_simd_avx2_and_not_xor_ef(__m256i a, __m256i b, __m256i c) { KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_sha3_simd_avx2__veorq_n_u64(__m256i a, uint64_t c) { - __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x((int64_t)c); + __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x( + (int64_t) /* Casting here is required, doesn't change the value. */ + c); return libcrux_intrinsics_avx2_mm256_xor_si256(a, c0); } @@ -1699,7 +1701,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_5b( __m256i); __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256( (int32_t)32, - s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] + s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], @@ -2034,7 +2036,15 @@ static KRML_MUSTINLINE void libcrux_sha3_avx2_x4_shake256( Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2, Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1, Eurydice_slice out2, Eurydice_slice out3) { - Eurydice_slice buf0[4U] = {input0, input1, input2, input3}; + Eurydice_slice buf0[4U] = { + /* XXX: These functions could alternatively implement the same with the + portable implementation #[cfg(feature = "simd128")] { keccakx2::<136, + 0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136, + 0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136, + 0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]); + keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136, + 0x1fu8>([input3], [out3]); } */ + input0, input1, input2, input3}; Eurydice_slice buf[4U] = {out0, out1, out2, out3}; libcrux_sha3_generic_keccak_keccak_fb(buf0, buf); } @@ -2274,7 +2284,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_3a( __m256i); __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256( (int32_t)32, - s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] + s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], diff --git a/libcrux-ml-kem/cg/libcrux_sha3_portable.h b/libcrux-ml-kem/cg/libcrux_sha3_portable.h index d85d8e543..211cf1919 100644 --- a/libcrux-ml-kem/cg/libcrux_sha3_portable.h +++ b/libcrux-ml-kem/cg/libcrux_sha3_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 - * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 - * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd - * F*: 5643e656b989aca7629723653a2570c7df6252b9-dirty - * Libcrux: 3e54f3c659bef6ee815d197ee5c74dd40c75186a + * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f + * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c + * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 + * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc + * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 */ #ifndef __libcrux_sha3_portable_H @@ -1654,6 +1654,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out); } @@ -2012,6 +2013,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out); } @@ -2140,6 +2142,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out); } @@ -2746,6 +2749,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out); } @@ -3104,6 +3108,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out); } @@ -3399,6 +3404,7 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; + /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out); } @@ -3496,6 +3502,7 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice out0, Eurydice_slice out1) { + /* TODO: make argument ordering consistent */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3510,6 +3517,9 @@ typedef struct libcrux_sha3_neon_x2_incremental_KeccakState_s { */ static KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState libcrux_sha3_neon_x2_incremental_init(void) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let s0 = KeccakState::new(); let s1 = + * KeccakState::new(); [s0, s1] } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3522,6 +3532,10 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, + * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3535,6 +3549,10 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_squeeze_first_three_blocks(&mut s0, out0); + * shake128_squeeze_first_three_blocks(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3548,6 +3566,10 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_squeeze_next_block(&mut s0, out0); + * shake128_squeeze_next_block(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3572,6 +3594,10 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { + /* XXX: These functions could alternatively implement the same with the + * portable implementation { let [mut s0, mut s1] = s; + * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, + * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3733,8 +3759,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if (self->buf_len + input_len >= (size_t)136U) { - consumed = (size_t)136U - self->buf_len; + if ( + /* There's something buffered internally to consume. */ self->buf_len + + input_len >= + (size_t)136U) { + consumed = (size_t)136U - /* We have enough data when combining the + internal buffer and the input. */ + self->buf_len; for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -3840,7 +3871,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs); - if (input_remainder_len > (size_t)0U) { + if ( + /* ... buffer the rest if there's not enough input (left). */ + input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; @@ -4187,8 +4220,13 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if (self->buf_len + input_len >= (size_t)168U) { - consumed = (size_t)168U - self->buf_len; + if ( + /* There's something buffered internally to consume. */ self->buf_len + + input_len >= + (size_t)168U) { + consumed = (size_t)168U - /* We have enough data when combining the + internal buffer and the input. */ + self->buf_len; for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -4294,7 +4332,9 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs); - if (input_remainder_len > (size_t)0U) { + if ( + /* ... buffer the rest if there's not enough input (left). */ + input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; @@ -4684,7 +4724,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( size_t blocks = out_len / (size_t)136U; size_t last = out_len - out_len % (size_t)136U; size_t mid; - if ((size_t)136U >= out_len) { + if ((size_t)136U >= + /* Squeeze out one to start with. XXX: Eurydice does not extract + `core::cmp::min`, so we do this instead. (cf. + https://github.com/AeneasVerif/eurydice/issues/49) */ + out_len + + ) { mid = out_len; } else { mid = (size_t)136U; @@ -4698,8 +4744,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, - .end = blocks}), + (CLITERAL(core_ops_range_Range_08){ + .start = (size_t)1U, + .end = /* If we got asked for more than one block, squeeze out + more. */ + blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -4708,7 +4757,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we + always have full + blocks to write out. + */ + out_rest, (size_t)136U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); @@ -4803,7 +4856,13 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( size_t blocks = out_len / (size_t)168U; size_t last = out_len - out_len % (size_t)168U; size_t mid; - if ((size_t)168U >= out_len) { + if ((size_t)168U >= + /* Squeeze out one to start with. XXX: Eurydice does not extract + `core::cmp::min`, so we do this instead. (cf. + https://github.com/AeneasVerif/eurydice/issues/49) */ + out_len + + ) { mid = out_len; } else { mid = (size_t)168U; @@ -4817,8 +4876,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, - .end = blocks}), + (CLITERAL(core_ops_range_Range_08){ + .start = (size_t)1U, + .end = /* If we got asked for more than one block, squeeze out + more. */ + blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -4827,7 +4889,11 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we + always have full + blocks to write out. + */ + out_rest, (size_t)168U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); From 98f9a92172d7a531ad6fa41fd018056fdbd60851 Mon Sep 17 00:00:00 2001 From: karthikbhargavan Date: Tue, 3 Dec 2024 19:17:37 +0000 Subject: [PATCH 5/7] c code --- libcrux-ml-kem/c/code_gen.txt | 10 +- libcrux-ml-kem/c/internal/libcrux_core.h | 10 +- .../c/internal/libcrux_mlkem_avx2.h | 10 +- .../c/internal/libcrux_mlkem_portable.h | 10 +- libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h | 10 +- .../c/internal/libcrux_sha3_internal.h | 78 +- libcrux-ml-kem/c/libcrux_core.c | 10 +- libcrux-ml-kem/c/libcrux_core.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_portable.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem1024_portable.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem512.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem512_avx2.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem512_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem512_portable.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem512_portable.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem768.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_avx2.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_portable.c | 10 +- libcrux-ml-kem/c/libcrux_mlkem768_portable.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem_avx2.c | 1303 ++++------------ libcrux-ml-kem/c/libcrux_mlkem_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_mlkem_portable.c | 373 ++--- libcrux-ml-kem/c/libcrux_mlkem_portable.h | 10 +- libcrux-ml-kem/c/libcrux_sha3.h | 10 +- libcrux-ml-kem/c/libcrux_sha3_avx2.c | 52 +- libcrux-ml-kem/c/libcrux_sha3_avx2.h | 10 +- libcrux-ml-kem/c/libcrux_sha3_internal.h | 16 +- libcrux-ml-kem/c/libcrux_sha3_neon.c | 30 +- libcrux-ml-kem/c/libcrux_sha3_neon.h | 10 +- libcrux-ml-kem/cg/code_gen.txt | 10 +- libcrux-ml-kem/cg/libcrux_core.h | 10 +- libcrux-ml-kem/cg/libcrux_ct_ops.h | 10 +- libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h | 1309 +++-------------- libcrux-ml-kem/cg/libcrux_mlkem768_portable.h | 230 +-- libcrux-ml-kem/cg/libcrux_sha3_avx2.h | 28 +- libcrux-ml-kem/cg/libcrux_sha3_portable.h | 104 +- 40 files changed, 826 insertions(+), 2997 deletions(-) diff --git a/libcrux-ml-kem/c/code_gen.txt b/libcrux-ml-kem/c/code_gen.txt index 8606206e0..54242b657 100644 --- a/libcrux-ml-kem/c/code_gen.txt +++ b/libcrux-ml-kem/c/code_gen.txt @@ -1,6 +1,6 @@ This code was generated with the following revisions: -Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f -Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c -Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 -F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc -Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 +Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 +Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 +Karamel: 8c3612018c25889288da6857771be3ad03b75bcd +F*: 5643e656b989aca7629723653a2570c7df6252b9 +Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 diff --git a/libcrux-ml-kem/c/internal/libcrux_core.h b/libcrux-ml-kem/c/internal/libcrux_core.h index fe0dc7d7d..fe89acd19 100644 --- a/libcrux-ml-kem/c/internal/libcrux_core.h +++ b/libcrux-ml-kem/c/internal/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_core_H diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h index 48345a968..466ef3ba0 100644 --- a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h +++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_mlkem_avx2_H diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h index e89d87311..f108fb1a3 100644 --- a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h +++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_mlkem_portable_H diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h index 78fe0a95b..67b2d4675 100644 --- a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_sha3_avx2_H diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h index 92381f50f..342c481f4 100644 --- a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h +++ b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_sha3_internal_H @@ -273,13 +273,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)136U) { - consumed = (size_t)136U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)136U) { + consumed = (size_t)136U - self->buf_len; { size_t i = (size_t)0U; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -385,9 +380,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); { size_t i = (size_t)0U; @@ -734,13 +727,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)168U) { - consumed = (size_t)168U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)168U) { + consumed = (size_t)168U - self->buf_len; { size_t i = (size_t)0U; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -846,9 +834,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); { size_t i = (size_t)0U; @@ -1238,13 +1224,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( size_t blocks = out_len / (size_t)136U; size_t last = out_len - out_len % (size_t)136U; size_t mid; - if ((size_t)136U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)136U >= out_len) { mid = out_len; } else { mid = (size_t)136U; @@ -1258,11 +1238,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -1271,11 +1248,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)136U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); @@ -1370,13 +1343,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( size_t blocks = out_len / (size_t)168U; size_t last = out_len - out_len % (size_t)168U; size_t mid; - if ((size_t)168U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)168U >= out_len) { mid = out_len; } else { mid = (size_t)168U; @@ -1390,11 +1357,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -1403,11 +1367,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)168U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); diff --git a/libcrux-ml-kem/c/libcrux_core.c b/libcrux-ml-kem/c/libcrux_core.c index de354115a..e69d41843 100644 --- a/libcrux-ml-kem/c/libcrux_core.c +++ b/libcrux-ml-kem/c/libcrux_core.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_core.h" diff --git a/libcrux-ml-kem/c/libcrux_core.h b/libcrux-ml-kem/c/libcrux_core.h index 55c5c5d8e..9097eceda 100644 --- a/libcrux-ml-kem/c/libcrux_core.h +++ b/libcrux-ml-kem/c/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_core_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024.h b/libcrux-ml-kem/c/libcrux_mlkem1024.h index 37334a9b1..041b2ec09 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem1024_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c index 778d6fbf3..5fec937b0 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem1024_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h index 854751c45..96971f755 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem1024_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c index e463cb267..c63594eaa 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem1024_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h index 430c904d1..f951149be 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem1024_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512.h b/libcrux-ml-kem/c/libcrux_mlkem512.h index fb7755a5a..0e850ae5d 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem512_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c index 3e9fbd0cc..7971b5c4f 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem512_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h index 79012290d..3c4030f73 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem512_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c index 8639c4603..b8f6fd756 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem512_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h index faea31c8a..7766250f2 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem512_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768.h b/libcrux-ml-kem/c/libcrux_mlkem768.h index 474b96082..f2c7db21a 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c index a7a0f7e7d..d30955e8a 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem768_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h index 35608499b..ea29365da 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c index 2d21b9d89..1cdebda61 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem768_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h index 514894426..6c512c865 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c index 64e5d2462..7cd2d548f 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_mlkem_avx2.h" @@ -141,16 +141,11 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) { __m256i field_modulus = mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); - __m256i v_minus_field_modulus = - mm256_sub_epi16(/* Compute v_i - Q and crate a mask from the sign bit of - each of these quantities. */ - vector, - field_modulus); + __m256i v_minus_field_modulus = mm256_sub_epi16(vector, field_modulus); __m256i sign_mask = mm256_srai_epi16((int32_t)15, v_minus_field_modulus, __m256i); - __m256i conditional_add_field_modulus = mm256_and_si256( - /* If v_i - Q < 0 then add back Q to (v_i - Q). */ sign_mask, - field_modulus); + __m256i conditional_add_field_modulus = + mm256_and_si256(sign_mask, field_modulus); return mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus); } @@ -455,7 +450,6 @@ libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(__m256i vec) { KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2, int16_t zeta3) { - /* Compute the first term of the product */ __m256i shuffle_with = mm256_set_epi8( (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, @@ -463,8 +457,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4, (int8_t)1, (int8_t)0); - __m256i lhs_shuffled = - mm256_shuffle_epi8(/* Prepare the left hand side */ lhs, shuffle_with); + __m256i lhs_shuffled = mm256_shuffle_epi8(lhs, shuffle_with); __m256i lhs_shuffled0 = mm256_permute4x64_epi64((int32_t)216, lhs_shuffled, __m256i); __m128i lhs_evens = mm256_castsi256_si128(lhs_shuffled0); @@ -472,8 +465,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i lhs_odds = mm256_extracti128_si256((int32_t)1, lhs_shuffled0, __m128i); __m256i lhs_odds0 = mm256_cvtepi16_epi32(lhs_odds); - __m256i rhs_shuffled = - mm256_shuffle_epi8(/* Prepare the right hand side */ rhs, shuffle_with); + __m256i rhs_shuffled = mm256_shuffle_epi8(rhs, shuffle_with); __m256i rhs_shuffled0 = mm256_permute4x64_epi64((int32_t)216, rhs_shuffled, __m256i); __m128i rhs_evens = mm256_castsi256_si128(rhs_shuffled0); @@ -481,8 +473,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i rhs_odds = mm256_extracti128_si256((int32_t)1, rhs_shuffled0, __m128i); __m256i rhs_odds0 = mm256_cvtepi16_epi32(rhs_odds); - __m256i left = - mm256_mullo_epi32(/* Start operating with them */ lhs_evens0, rhs_evens0); + __m256i left = mm256_mullo_epi32(lhs_evens0, rhs_evens0); __m256i right = mm256_mullo_epi32(lhs_odds0, rhs_odds0); __m256i right0 = libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(right); @@ -495,7 +486,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s( products_left); __m256i rhs_adjacent_swapped = mm256_shuffle_epi8( - /* Compute the second term of the product */ rhs, + rhs, mm256_set_epi8((int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6, (int8_t)1, (int8_t)0, (int8_t)3, @@ -509,9 +500,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( products_right); __m256i products_right1 = mm256_slli_epi32((int32_t)16, products_right0, __m256i); - return mm256_blend_epi16((int32_t)170, - /* Combine them into one vector */ products_left0, - products_right1, __m256i); + return mm256_blend_epi16((int32_t)170, products_left0, products_right1, + __m256i); } /** @@ -527,44 +517,11 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09( KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1( __m256i vector, uint8_t ret[2U]) { - __m256i lsb_to_msb = mm256_slli_epi16( - (int32_t)15, - /* Suppose |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀ - 0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least - significant bit in each lane, move it to the most significant position - to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ - d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ - n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */ - vector, __m256i); - __m128i low_msbs = mm256_castsi256_si128( - /* Get the first 8 16-bit elements ... */ lsb_to_msb); - __m128i high_msbs = mm256_extracti128_si256( - (int32_t)1, - /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i); - __m128i msbs = - mm_packs_epi16(/* ... and then pack them into 8-bit values using signed - saturation. This function packs all the |low_msbs|, and - then the high ones. low_msbs = a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | - e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ - l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ We shifted by 15 above - to take advantage of the signed saturation performed by - mm_packs_epi16: - if the sign bit of the 16-bit element - being packed is 1, the corresponding 8-bit element in - |msbs| will be 0xFF. - if the sign bit of the 16-bit - element being packed is 0, the corresponding 8-bit - element in |msbs| will be 0. Thus, if, for example, a₀ = - 1, e₀ = 1, and p₀ = 1, and every other bit is 0, after - packing into 8 bit value, |msbs| will look like: 0xFF - 0x00 0x00 0x00 | 0xFF 0x00 0x00 0x00 | 0x00 0x00 0x00 - 0x00 | 0x00 0x00 0x00 0xFF */ - low_msbs, - high_msbs); - int32_t bits_packed = - mm_movemask_epi8(/* Now that every element is either 0xFF or 0x00, we just - extract the most significant bit from each element and - collate them into two bytes. */ - msbs); + __m256i lsb_to_msb = mm256_slli_epi16((int32_t)15, vector, __m256i); + __m128i low_msbs = mm256_castsi256_si128(lsb_to_msb); + __m128i high_msbs = mm256_extracti128_si256((int32_t)1, lsb_to_msb, __m128i); + __m128i msbs = mm_packs_epi16(low_msbs, high_msbs); + int32_t bits_packed = mm_movemask_epi8(msbs); uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)}; memcpy(ret, result, (size_t)2U * sizeof(uint8_t)); } @@ -582,39 +539,16 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s( int16_t a, int16_t b) { __m256i coefficients = - mm256_set_epi16(/* We need to take each bit from the 2 bytes of input and - put them into their own 16-bit lane. Ideally, we'd load - the two bytes into the vector, duplicate them, and - right-shift the 0th element by 0 bits, the first - element by 1 bit, the second by 2 bits and so on before - AND-ing with 0x1 to leave only the least signifinicant - bit. But since |_mm256_srlv_epi16| does not exist, so - we have to resort to a workaround. Rather than shifting - each element by a different amount, we'll multiply each - element by a value such that the bit we're interested - in becomes the most significant bit. The coefficients - are loaded as follows: */ - b, - b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); - __m256i coefficients_in_msb = - mm256_mullo_epi16(/* And this vector, when multiplied with the previous - one, ensures that the bit we'd like to keep in each - lane becomes the most significant bit upon - multiplication. */ - coefficients, - mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, - (int16_t)1 << 10U, (int16_t)1 << 11U, - (int16_t)1 << 12U, (int16_t)1 << 13U, - (int16_t)1 << 14U, (int16_t)-32768, - (int16_t)1 << 8U, (int16_t)1 << 9U, - (int16_t)1 << 10U, (int16_t)1 << 11U, - (int16_t)1 << 12U, (int16_t)1 << 13U, - (int16_t)1 << 14U, (int16_t)-32768)); - return mm256_srli_epi16( - (int32_t)15, - /* Now that they're all in the most significant bit position, shift them - down to the least significant bit. */ - coefficients_in_msb, __m256i); + mm256_set_epi16(b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); + __m256i coefficients_in_msb = mm256_mullo_epi16( + coefficients, + mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U, + (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U, + (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U, + (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U, + (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U, + (int16_t)-32768)); + return mm256_srli_epi16((int32_t)15, coefficients_in_msb, __m256i); } KRML_MUSTINLINE __m256i @@ -627,23 +561,7 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( - Eurydice_slice_index( - bytes, - /* We need to take each bit from the 2 bytes of input and put them - into their own 16-bit lane. Ideally, we'd load the two bytes into - the vector, duplicate them, and right-shift the 0th element by 0 - bits, the first element by 1 bit, the second by 2 bits and so on - before AND-ing with 0x1 to leave only the least signifinicant bit. - But since |_mm256_srlv_epi16| does not exist, so we have to resort - to a workaround. Rather than shifting each element by a different - amount, we'll multiply each element by a value such that the bit - we're interested in becomes the most significant bit. The - coefficients are loaded as follows: And this vector, when - multiplied with the previous one, ensures that the bit we'd like to - keep in each lane becomes the most significant bit upon - multiplication. Now that they're all in the most significant bit - position, shift them down to the least significant bit. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *)); } @@ -676,47 +594,23 @@ KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4( __m256i vector, uint8_t ret[8U]) { uint8_t serialized[16U] = {0U}; __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 4U, - /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D | - 0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be - laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA - 0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */ - vector); - __m256i adjacent_8_combined = - mm256_shuffle_epi8(/* Recall that |adjacent_2_combined| goes as follows: - 0x00_00_00_BA 0x00_00_00_DC | 0x00_00_00_FE - 0x00_00_00_HG | ... Out of this, we only need the - first byte, the 4th byte, the 8th byte and so on - from the bottom and the top 128 bits. */ - adjacent_2_combined, - mm256_set_epi8( - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0)); - __m256i combined = - mm256_permutevar8x32_epi32(/* |adjacent_8_combined| looks like this: 0: - 0xHG_FE_DC_BA 1: 0x00_00_00_00 | 2: - 0x00_00_00_00 3: 0x00_00_00_00 | 4: - 0xPO_NM_LK_JI .... We put the element at 4 - after the element at 0 ... */ - adjacent_8_combined, - mm256_set_epi32((int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)0, - (int32_t)4, (int32_t)0)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector); + __m256i adjacent_8_combined = mm256_shuffle_epi8( + adjacent_2_combined, + mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, + (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, + (int8_t)4, (int8_t)0)); + __m256i combined = mm256_permutevar8x32_epi32( + adjacent_8_combined, + mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0)); __m128i combined0 = mm256_castsi256_si128(combined); mm_storeu_bytes_si128( - Eurydice_array_to_slice( - (size_t)16U, - /* ... so that we can read them out in one go. */ serialized, - uint8_t), - combined0); + Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0); uint8_t ret0[8U]; core_result_Result_15 dst; Eurydice_slice_to_array2( @@ -740,23 +634,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5, int16_t b6, int16_t b7) { - __m256i coefficients = - mm256_set_epi16(/* Every 4 bits from each byte of input should be put into - its own 16-bit lane. Since |_mm256_srlv_epi16| does not - exist, we have to resort to a workaround. Rather than - shifting each element by a different amount, we'll - multiply each element by a value such that the bits - we're interested in become the most significant bits - (of an 8-bit value). In this lane, the 4 bits we need - to put are already the most significant bits of - |bytes[7]| (that is, b7). */ - b7, - /* In this lane, the 4 bits we need to put are the least - significant bits, so we need to shift the 4 - least-significant bits of |b7| to the most significant - bits (of an 8-bit value). */ - b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, - b0); + __m256i coefficients = mm256_set_epi16(b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, + b2, b2, b1, b1, b0, b0); __m256i coefficients_in_msb = mm256_mullo_epi16( coefficients, mm256_set_epi16((int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, @@ -765,12 +644,9 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U)); - __m256i coefficients_in_lsb = mm256_srli_epi16( - (int32_t)4, - /* Once the 4-bit coefficients are in the most significant positions (of - an 8-bit value), shift them all down by 4. */ - coefficients_in_msb, __m256i); - return mm256_and_si256(/* Zero the remaining bits. */ coefficients_in_lsb, + __m256i coefficients_in_lsb = + mm256_srli_epi16((int32_t)4, coefficients_in_msb, __m256i); + return mm256_and_si256(coefficients_in_lsb, mm256_set1_epi16(((int16_t)1 << 4U) - (int16_t)1)); } @@ -786,23 +662,7 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( - Eurydice_slice_index( - bytes, - /* Every 4 bits from each byte of input should be put into its own - 16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to - resort to a workaround. Rather than shifting each element by a - different amount, we'll multiply each element by a value such that - the bits we're interested in become the most significant bits (of - an 8-bit value). In this lane, the 4 bits we need to put are - already the most significant bits of |bytes[7]| (that is, b7). In - this lane, the 4 bits we need to put are the least significant - bits, so we need to shift the 4 least-significant bits of |b7| to - the most significant bits (of an 8-bit value). These constants are - chosen to shift the bits of the values that we loaded into - |coefficients|. Once the 4-bit coefficients are in the most - significant positions (of an 8-bit value), shift them all down - by 4. Zero the remaining bits. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *), @@ -824,78 +684,32 @@ libcrux_ml_kem_vector_avx2_deserialize_4_09(Eurydice_slice bytes) { KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5( __m256i vector, uint8_t ret[10U]) { uint8_t serialized[32U] = {0U}; - __m256i adjacent_2_combined = - mm256_madd_epi16(/* If |vector| is laid out as follows (superscript number - indicates the corresponding bit is duplicated that - many times): 0¹¹a₄a₃a₂a₁a₀ 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀ - 0¹¹d₄d₃d₂d₁d₀ | ↩ 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀ - 0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | ↩ |adjacent_2_combined| - will be laid out as a series of 32-bit integers, as - follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... */ - vector, - mm256_set_epi16( - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, - (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, - (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, - (int16_t)1)); - __m256i adjacent_4_combined = - mm256_sllv_epi32(/* Recall that |adjacent_2_combined| is laid out as - follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... This shift results - in: b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | - ↩ f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... */ - adjacent_2_combined, - mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, - (int32_t)22, (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22)); - __m256i adjacent_4_combined0 = mm256_srli_epi64( - (int32_t)22, - /* |adjacent_4_combined|, when viewed as 64-bit lanes, is: - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift - down by 22 bits to remove the least significant 0 bits that aren't part - of the bits we need. */ - adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = mm256_shuffle_epi32( - (int32_t)8, - /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks - like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³² - 2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to - read out the bytes in one go, we need to shifts the bits in position 2 - to position 1 in each 128-bit lane. */ - adjacent_4_combined0, __m256i); - __m256i adjacent_8_combined0 = - mm256_sllv_epi32(/* |adjacent_8_combined|, when viewed as a set of 32-bit - values, now looks like: - 0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 0³² 0³² | - ↩ Once again, we line these bits up by shifting the up - values at indices 0 and 5 by 12, viewing the resulting - register as a set of 64-bit values, and then shifting - down the 64-bit values by 12 bits. */ - adjacent_8_combined, - mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, - (int32_t)12, (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)12)); + __m256i adjacent_2_combined = mm256_madd_epi16( + vector, mm256_set_epi16( + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1)); + __m256i adjacent_4_combined = mm256_sllv_epi32( + adjacent_2_combined, + mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, + (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22)); + __m256i adjacent_4_combined0 = + mm256_srli_epi64((int32_t)22, adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = + mm256_shuffle_epi32((int32_t)8, adjacent_4_combined0, __m256i); + __m256i adjacent_8_combined0 = mm256_sllv_epi32( + adjacent_8_combined, + mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12)); __m256i adjacent_8_combined1 = mm256_srli_epi64((int32_t)12, adjacent_8_combined0, __m256i); - __m128i lower_8 = - mm256_castsi256_si128(/* We now have 40 bits starting at position 0 in the - lower 128-bit lane, ... */ - adjacent_8_combined1); + __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined1); mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t), lower_8); - __m128i upper_8 = mm256_extracti128_si256( - (int32_t)1, - /* ... and the second 40 bits at position 0 in the upper 128-bit lane */ - adjacent_8_combined1, __m128i); + __m128i upper_8 = + mm256_extracti128_si256((int32_t)1, adjacent_8_combined1, __m128i); mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t), upper_8); @@ -989,67 +803,25 @@ core_core_arch_x86___m128i_x2 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( __m256i vector) { __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 10U, - /* If |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ... - |adjacent_2_combined| will be laid out as a series of 32-bit - integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */ - vector); - __m256i adjacent_4_combined = - mm256_sllv_epi32(/* Shifting up the values at the even indices by 12, we - get: b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ ... */ - adjacent_2_combined, - mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, - (int32_t)12, (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector); + __m256i adjacent_4_combined = mm256_sllv_epi32( + adjacent_2_combined, + mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12)); __m256i adjacent_4_combined0 = - mm256_srli_epi64((int32_t)12, - /* Viewing this as a set of 64-bit integers we get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ ... Shifting down by 12 gives us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ ... */ - adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = - mm256_shuffle_epi8(/* |adjacent_4_combined|, when the bottom and top 128 - bit-lanes are grouped into bytes, looks like: - 0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ In - each 128-bit lane, we want to put bytes 8, 9, 10, - 11, 12 after bytes 0, 1, 2, 3 to allow for - sequential reading. */ - adjacent_4_combined0, - mm256_set_epi8( - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0)); - __m128i lower_8 = - mm256_castsi256_si128(/* We now have 64 bits starting at position 0 in the - lower 128-bit lane, ... */ - adjacent_8_combined); - __m128i upper_8 = mm256_extracti128_si256( - (int32_t)1, - /* and 64 bits starting at position 0 in the upper 128-bit lane. */ - adjacent_8_combined, __m128i); + mm256_srli_epi64((int32_t)12, adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = mm256_shuffle_epi8( + adjacent_4_combined0, + mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, + (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, + (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, + (int8_t)9, (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, + (int8_t)1, (int8_t)0)); + __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined); + __m128i upper_8 = + mm256_extracti128_si256((int32_t)1, adjacent_8_combined, __m128i); return ( CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8}); } @@ -1057,167 +829,8 @@ libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10( __m256i vector, uint8_t ret[20U]) { core_core_arch_x86___m128i_x2 uu____0 = - libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If - |vector| - is - laid - out - as - follows - (superscript - number - indicates - the - corresponding - bit - is - duplicated - that - many - times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ - | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ - | ↩ - ... - |adjacent_2_combined| - will - be - laid - out - as a - series - of - 32-bit - integers, - as - follows: - 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - .... - Shifting - up - the - values - at - the - even - indices - by - 12, - we - get: - b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - ... - Viewing - this - as a - set - of - 64-bit - integers - we - get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ - ... - Shifting - down - by - 12 - gives - us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ - ... - |adjacent_4_combined|, - when - the - bottom - and - top - 128 - bit-lanes - are - grouped - into - bytes, - looks - like: - 0₇0₆0₅B₄B₃B₂B₁B₀ - | ↩ - 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ - | ↩ - In - each - 128-bit - lane, - we - want - to - put - bytes - 8, - 9, - 10, - 11, - 12 - after - bytes - 0, - 1, - 2, 3 - to - allow - for - sequential - reading. - We - now - have - 64 - bits - starting - at - position - 0 in - the - lower - 128-bit - lane, - ... - and - 64 - bits - starting - at - position - 0 in - the - upper - 128-bit - lane. - */ - vector); + libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( + vector); __m128i lower_8 = uu____0.fst; __m128i upper_8 = uu____0.snd; uint8_t serialized[32U] = {0U}; @@ -1267,16 +880,14 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( (int16_t)1 << 0U, (int16_t)1 << 2U, (int16_t)1 << 4U, (int16_t)1 << 6U)); __m256i coefficients1 = mm256_srli_epi16((int32_t)6, coefficients0, __m256i); - return mm256_and_si256( - /* Here I can prove this `and` is not useful */ coefficients1, - mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1)); + return mm256_and_si256(coefficients1, + mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1)); } KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) { - Eurydice_slice lower_coefficients = Eurydice_slice_subslice2( - /* Here I can prove this `and` is not useful */ bytes, (size_t)0U, - (size_t)16U, uint8_t); + Eurydice_slice lower_coefficients = + Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t); Eurydice_slice upper_coefficients = Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t); return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( @@ -1442,64 +1053,26 @@ KRML_MUSTINLINE size_t libcrux_ml_kem_vector_avx2_sampling_rejection_sample( __m256i field_modulus = mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i potential_coefficients = - libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can - be interpreted as a - sequence of - serialized 12-bit - (i.e. uncompressed) - coefficients. Not - all coefficients - may be less than - FIELD_MODULUS - though. */ - input); + libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input); __m256i compare_with_field_modulus = - mm256_cmpgt_epi16(/* Suppose we view |potential_coefficients| as follows - (grouping 64-bit elements): A B C D | E F G H | .... - and A < 3329, D < 3329 and H < 3329, - |compare_with_field_modulus| will look like: 0xFF 0 0 - 0xFF | 0 0 0 0xFF | ... */ - field_modulus, - potential_coefficients); + mm256_cmpgt_epi16(field_modulus, potential_coefficients); uint8_t good[2U]; - libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each - lane is either 0 or 1, we - only need one bit from - each lane in the register - to tell us what - coefficients to keep and - what to throw-away. - Combine all the bits - (there are 16) into two - bytes. */ - compare_with_field_modulus, + libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus, good); uint8_t lower_shuffles[16U]; memcpy(lower_shuffles, - /* Each bit (and its corresponding position) represents an element we - want to sample. We'd like all such elements to be next to each other - starting at index 0, so that they can be read from the vector - easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level - shuffling indices needed to make this happen. For e.g. if good[0] = - 0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit - lane to the first. To do this, we need the byte-level shuffle - indices to be 2 3 X X X X ... */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[0U]], (size_t)16U * sizeof(uint8_t)); - __m128i lower_shuffles0 = mm_loadu_si128(Eurydice_array_to_slice( - (size_t)16U, - /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles, - uint8_t)); + __m128i lower_shuffles0 = mm_loadu_si128( + Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t)); __m128i lower_coefficients = mm256_castsi256_si128(potential_coefficients); __m128i lower_coefficients0 = mm_shuffle_epi8(lower_coefficients, lower_shuffles0); - mm_storeu_si128(/* ... then write them out ... */ output, - lower_coefficients0); + mm_storeu_si128(output, lower_coefficients0); size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]); uint8_t upper_shuffles[16U]; memcpy(upper_shuffles, - /* Do the same for |goood[1]| */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[1U]], (size_t)16U * sizeof(uint8_t)); @@ -1852,13 +1425,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; - H_a9_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)3U, - (size_t)768U * (size_t)3U + (size_t)32U, - uint8_t), + H_a9_e0(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)3U, + (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)3U + (size_t)32U, @@ -2345,10 +1914,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c1( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -2407,7 +1972,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c1( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -2622,12 +2187,7 @@ with const generics static KRML_MUSTINLINE void ntt_at_layer_7_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09( re->coefficients[j + step], (int16_t)-1600); @@ -2679,13 +2239,7 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2771,11 +2325,7 @@ with const generics static KRML_MUSTINLINE void poly_barrett_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]); @@ -2790,9 +2340,7 @@ with const generics */ static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { - ntt_at_layer_7_61(/* Due to the small coefficient bound, we can skip the first - round of Montgomery reductions. */ - re); + ntt_at_layer_7_61(re); size_t zeta_i = (size_t)1U; ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U); ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)5U); @@ -2925,14 +2473,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -2966,17 +2509,10 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; - __m256i coefficient_normal_form = to_standard_domain_61( - self->coefficients[/* The coefficients are of the form aR^{-1} mod q, - which means calling to_montgomery_domain() on them - should return a mod q. */ - j]); + __m256i coefficient_normal_form = + to_standard_domain_61(self->coefficients[j]); self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09( libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &error->coefficients[j])); @@ -3006,8 +2542,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_ab( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -3083,10 +2617,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221( IndCpaPrivateKeyUnpacked_63 *private_key, IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_be(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -3116,8 +2647,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221( sample_vector_cbd_then_ntt_out_b41(copy_of_prf_input, domain_separator) .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_ab(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_ab(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -3142,13 +2673,11 @@ serialize_unpacked_secret_key_8c(IndCpaPublicKeyUnpacked_63 *public_key, IndCpaPrivateKeyUnpacked_63 *private_key) { uint8_t public_key_serialized[1184U]; serialize_public_key_ed( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1152U]; - serialize_secret_key_ed( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_ed(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -3335,15 +2864,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa1( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); deserialize_ring_elements_reduced_ab(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -3520,13 +3045,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3557,10 +3076,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -3586,11 +3102,7 @@ static KRML_MUSTINLINE void add_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( @@ -3703,26 +3215,8 @@ add_message_error_reduce_ef_61( __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( result.coefficients[i0], (int16_t)1441); - __m256i tmp = libcrux_ml_kem_vector_avx2_add_09( - self->coefficients - [/* FIXME: Eurydice crashes with: Warning 11: in top-level - declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing function cannot be - translated into C*: let mutable ret(Mark.Present,(Mark.AtMost - 2), ): int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the following code: - ```rust result.coefficients[i] = - Vector::barrett_reduce(Vector::add( coefficient_normal_form, - &Vector::add(self.coefficients[i], &message.coefficients[i]), - )); ``` */ - i0], - &message->coefficients[i0]); + __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0], + &message->coefficients[i0]); __m256i tmp0 = libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp); result.coefficients[i0] = @@ -3770,18 +3264,8 @@ compress_ciphertext_coefficient_ef(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)10, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3789,18 +3273,12 @@ compress_ciphertext_coefficient_ef(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)10, coefficients_high0, __m256i); @@ -3813,20 +3291,8 @@ compress_ciphertext_coefficient_ef(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -3880,18 +3346,8 @@ compress_ciphertext_coefficient_c4(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)11, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3899,18 +3355,12 @@ compress_ciphertext_coefficient_c4(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)11, coefficients_high0, __m256i); @@ -3923,20 +3373,8 @@ compress_ciphertext_coefficient_c4(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4014,18 +3452,8 @@ compress_ciphertext_coefficient_d1(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)4, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -4033,18 +3461,12 @@ compress_ciphertext_coefficient_d1(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)4, coefficients_high0, __m256i); @@ -4057,20 +3479,8 @@ compress_ciphertext_coefficient_d1(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4096,11 +3506,7 @@ static KRML_MUSTINLINE void compress_then_serialize_4_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficient = compress_09_d1(to_unsigned_field_modulus_61(re.coefficients[i0])); @@ -4127,18 +3533,8 @@ compress_ciphertext_coefficient_f4(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)5, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -4146,18 +3542,12 @@ compress_ciphertext_coefficient_f4(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)5, coefficients_high0, __m256i); @@ -4170,20 +3560,8 @@ compress_ciphertext_coefficient_f4(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4209,11 +3587,7 @@ static KRML_MUSTINLINE void compress_then_serialize_5_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficients = compress_09_f4(to_unsigned_representative_61(re.coefficients[i0])); @@ -4301,11 +3675,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -4317,7 +3687,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_23 uu____3 = sample_ring_element_cbd_b41(copy_of_prf_input, domain_separator0); @@ -4326,7 +3695,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_a9_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -4334,11 +3703,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U]; - compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_ab(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -4347,14 +3714,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_8c( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_ed( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -4549,8 +3914,7 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)10); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4558,16 +3922,12 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)10, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4575,27 +3935,12 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)10, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4650,8 +3995,7 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)11); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4659,16 +4003,12 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)11, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4676,27 +4016,12 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)11, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4819,8 +4144,7 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)4); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4828,16 +4152,12 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)4, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4845,27 +4165,12 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)4, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4915,8 +4220,7 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)5); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4924,16 +4228,12 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)5, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4941,27 +4241,12 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)5, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -5128,14 +4413,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_2f( IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U]; - deserialize_then_decompress_u_ed( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_ed(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_ed( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -5156,8 +4438,7 @@ with const generics static KRML_MUSTINLINE void decrypt_2f(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U]; - deserialize_secret_key_ab(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_ab(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U]; memcpy( @@ -5477,13 +4758,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_5e( libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) { uint8_t t[32U]; - H_a9_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)4U, - (size_t)768U * (size_t)4U + (size_t)32U, - uint8_t), + H_a9_ac(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)4U, + (size_t)768U * (size_t)4U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)4U + (size_t)32U, @@ -5960,10 +5237,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c( memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_78( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -6022,7 +5295,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -6173,14 +5446,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_42( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -6211,8 +5479,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_42( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -6288,10 +5554,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22( IndCpaPrivateKeyUnpacked_39 *private_key, IndCpaPublicKeyUnpacked_39 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_6a(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_6a(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6321,8 +5584,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22( sample_vector_cbd_then_ntt_out_b4(copy_of_prf_input, domain_separator) .fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_42(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_42(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6347,13 +5610,11 @@ serialize_unpacked_secret_key_c9(IndCpaPublicKeyUnpacked_39 *public_key, IndCpaPrivateKeyUnpacked_39 *private_key) { uint8_t public_key_serialized[1568U]; serialize_public_key_1e( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1536U]; - serialize_secret_key_78( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_78(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1536U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -6540,15 +5801,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa0( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_39 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t); deserialize_ring_elements_reduced_42(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1536U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[4U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -6640,10 +5897,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_42( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -6873,11 +6127,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( IndCpaPublicKeyUnpacked_39 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1568U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -6889,7 +6139,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_dd uu____3 = sample_ring_element_cbd_b4(copy_of_prf_input, domain_separator0); @@ -6898,7 +6147,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( error_1, uu____3.fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_a9_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -6906,11 +6155,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[4U]; - compute_vector_u_42(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_42(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -6919,14 +6166,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( &message_as_ring_element); uint8_t ciphertext[1568U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[4U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_c9( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)1408U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_1e( uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, @@ -7240,14 +6485,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_37( IndCpaPrivateKeyUnpacked_39 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[4U]; - deserialize_then_decompress_u_1e( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_1e(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_78( - Eurydice_array_to_subslice_from( - (size_t)1568U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)1408U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, + (size_t)1408U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_42(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -7268,8 +6510,7 @@ with const generics static KRML_MUSTINLINE void decrypt_37(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[4U]; - deserialize_secret_key_42(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_42(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[4U]; memcpy( @@ -7577,13 +6818,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_4d( libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) { uint8_t t[32U]; - H_a9_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)2U, - (size_t)768U * (size_t)2U + (size_t)32U, - uint8_t), + H_a9_fd(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)2U, + (size_t)768U * (size_t)2U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)2U + (size_t)32U, @@ -8034,10 +7271,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c0( memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_29( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -8096,7 +7329,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c0( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -8252,14 +7485,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_89( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -8290,8 +7518,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_89( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -8367,10 +7593,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220( IndCpaPrivateKeyUnpacked_94 *private_key, IndCpaPublicKeyUnpacked_94 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_f8(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_f8(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -8400,8 +7623,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220( sample_vector_cbd_then_ntt_out_b40(copy_of_prf_input, domain_separator) .fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_89(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_89(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -8426,13 +7649,11 @@ serialize_unpacked_secret_key_2d(IndCpaPublicKeyUnpacked_94 *public_key, IndCpaPrivateKeyUnpacked_94 *private_key) { uint8_t public_key_serialized[800U]; serialize_public_key_ba( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[768U]; - serialize_secret_key_29( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_29(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[768U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -8619,15 +7840,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_94 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t); deserialize_ring_elements_reduced_89(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)768U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[2U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -8765,10 +7982,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_89( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -8960,11 +8174,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( IndCpaPublicKeyUnpacked_94 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[768U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -8976,7 +8186,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_40 uu____3 = sample_ring_element_cbd_b40(copy_of_prf_input, domain_separator0); @@ -8985,7 +8194,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( error_1, uu____3.fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_a9_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -8993,11 +8202,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[2U]; - compute_vector_u_89(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_89(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -9006,14 +8213,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( &message_as_ring_element); uint8_t ciphertext[768U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[2U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_2d( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_ba( uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext, @@ -9297,14 +8502,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_4b( IndCpaPrivateKeyUnpacked_94 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[2U]; - deserialize_then_decompress_u_ba( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_ba(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_29( - Eurydice_array_to_subslice_from( - (size_t)768U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)640U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)768U, ciphertext, + (size_t)640U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_89(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -9325,8 +8527,7 @@ with const generics static KRML_MUSTINLINE void decrypt_4b(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[2U]; - deserialize_secret_key_89(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_89(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[2U]; memcpy( diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h index addfdaf30..95dad8cf8 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.c b/libcrux-ml-kem/c/libcrux_mlkem_portable.c index fddae347c..1d3a317a8 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_mlkem_portable.h" @@ -1152,28 +1152,11 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( */ uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient( uint16_t fe) { - int16_t shifted = - (int16_t)1664 - - (int16_t) /* The approach used here is inspired by: - https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150 - If 833 <= fe <= 2496, then -832 <= shifted <= 831 */ - fe; - int16_t mask = - /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) = - -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive - <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so - if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */ - shifted - - >> 15U; + int16_t shifted = (int16_t)1664 - (int16_t)fe; + int16_t mask = shifted >> 15U; int16_t shifted_to_positive = mask ^ shifted; int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832; - int16_t r0 = - /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the - most significant bit of shifted_positive_in_range will be 1. */ - shifted_positive_in_range - - >> 15U; + int16_t r0 = shifted_positive_in_range >> 15U; int16_t r1 = r0 & (int16_t)1; return (uint8_t)r1; } @@ -1209,16 +1192,7 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits( int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient( uint8_t coefficient_bits, uint16_t fe) { - uint64_t compressed = - (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits - == 5 || coefficient_bits == 10 || coefficient_bits == 11 ); - hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to - be constant time due to: - https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ - */ - fe - - << (uint32_t)coefficient_bits; + uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits; compressed = compressed + 1664ULL; compressed = compressed * 10321340ULL; compressed = compressed >> 35U; @@ -2738,13 +2712,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_60( libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) { uint8_t t[32U]; - H_f1_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)4U, - (size_t)768U * (size_t)4U + (size_t)32U, - uint8_t), + H_f1_ac(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)4U, + (size_t)768U * (size_t)4U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)4U + (size_t)32U, @@ -3234,10 +3204,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b( memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_ff( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -3297,7 +3263,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -3495,12 +3461,7 @@ with const generics static KRML_MUSTINLINE void ntt_at_layer_7_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector t = libcrux_ml_kem_vector_portable_multiply_by_constant_0d( @@ -3562,13 +3523,7 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3660,11 +3615,7 @@ with const generics static KRML_MUSTINLINE void poly_barrett_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( @@ -3681,9 +3632,7 @@ with const generics */ static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { - ntt_at_layer_7_8c(/* Due to the small coefficient bound, we can skip the first - round of Montgomery reductions. */ - re); + ntt_at_layer_7_8c(re); size_t zeta_i = (size_t)1U; ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U); ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)5U); @@ -3822,11 +3771,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_d0( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -3866,18 +3811,10 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector - coefficient_normal_form = to_standard_domain_8c( - self->coefficients[/* The coefficients are of the form aR^{-1} mod - q, which means calling to_montgomery_domain() - on them should return a mod q. */ - j]); + coefficient_normal_form = to_standard_domain_8c(self->coefficients[j]); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, @@ -3909,8 +3846,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_d0( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -3986,10 +3921,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c( IndCpaPrivateKeyUnpacked_af *private_key, IndCpaPublicKeyUnpacked_af *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_03(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_03(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -4019,8 +3951,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c( sample_vector_cbd_then_ntt_out_3b(copy_of_prf_input, domain_separator) .fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_d0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_d0(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -4045,13 +3977,11 @@ serialize_unpacked_secret_key_2f(IndCpaPublicKeyUnpacked_af *public_key, IndCpaPrivateKeyUnpacked_af *private_key) { uint8_t public_key_serialized[1568U]; serialize_public_key_00( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1536U]; - serialize_secret_key_ff( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_ff(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1536U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -4239,15 +4169,11 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_af *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t); deserialize_ring_elements_reduced_d0(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1536U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[4U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4434,13 +4360,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -4471,10 +4391,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_d0( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -4500,11 +4417,7 @@ static KRML_MUSTINLINE void add_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = @@ -4630,27 +4543,8 @@ add_message_error_reduce_ef_8c( libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( result.coefficients[i0], (int16_t)1441); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp = - libcrux_ml_kem_vector_portable_add_0d( - self->coefficients[/* FIXME: Eurydice crashes with: Warning 11: in - top-level declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing - function cannot be translated into C*: let - mutable ret(Mark.Present,(Mark.AtMost 2), ): - int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the - following code: ```rust result.coefficients[i] - = Vector::barrett_reduce(Vector::add( - coefficient_normal_form, - &Vector::add(self.coefficients[i], - &message.coefficients[i]), )); ``` */ - i0], - &message->coefficients[i0]); + libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0], + &message->coefficients[i0]); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 = libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = @@ -4863,11 +4757,7 @@ static KRML_MUSTINLINE void compress_then_serialize_4_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient = compress_0d_d1(to_unsigned_field_modulus_8c(re.coefficients[i0])); @@ -4922,11 +4812,7 @@ static KRML_MUSTINLINE void compress_then_serialize_5_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients = compress_0d_f4(to_unsigned_representative_8c(re.coefficients[i0])); @@ -5015,11 +4901,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( IndCpaPublicKeyUnpacked_af *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1568U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -5031,7 +4913,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_dd0 uu____3 = sample_ring_element_cbd_3b(copy_of_prf_input, domain_separator0); @@ -5040,7 +4921,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( error_1, uu____3.fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_f1_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -5048,11 +4929,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[4U]; - compute_vector_u_d0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_d0(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -5061,14 +4940,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( &message_as_ring_element); uint8_t ciphertext[1568U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[4U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_2f( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)1408U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_00( uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, @@ -5707,14 +5584,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_7d( IndCpaPrivateKeyUnpacked_af *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[4U]; - deserialize_then_decompress_u_00( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_00(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_ff( - Eurydice_array_to_subslice_from( - (size_t)1568U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)1408U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, + (size_t)1408U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_d0(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -5735,8 +5609,7 @@ with const generics static KRML_MUSTINLINE void decrypt_7d(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[4U]; - deserialize_secret_key_d0(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_d0(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[4U]; memcpy( @@ -6056,13 +5929,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_30( libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) { uint8_t t[32U]; - H_f1_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)2U, - (size_t)768U * (size_t)2U + (size_t)32U, - uint8_t), + H_f1_fd(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)2U, + (size_t)768U * (size_t)2U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)2U + (size_t)32U, @@ -6512,10 +6381,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b0( memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_64( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -6575,7 +6440,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b0( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -6721,11 +6586,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_a0( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -6760,8 +6621,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_a0( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -6837,10 +6696,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0( IndCpaPrivateKeyUnpacked_d4 *private_key, IndCpaPublicKeyUnpacked_d4 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_10(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_10(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6870,8 +6726,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0( sample_vector_cbd_then_ntt_out_3b0(copy_of_prf_input, domain_separator) .fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_a0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_a0(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6896,13 +6752,11 @@ serialize_unpacked_secret_key_6d(IndCpaPublicKeyUnpacked_d4 *public_key, IndCpaPrivateKeyUnpacked_d4 *private_key) { uint8_t public_key_serialized[800U]; serialize_public_key_86( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[768U]; - serialize_secret_key_64( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_64(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[768U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -7090,15 +6944,11 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f0( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_d4 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t); deserialize_ring_elements_reduced_a0(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)768U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[2U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -7224,10 +7074,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_a0( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -7458,11 +7305,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( IndCpaPublicKeyUnpacked_d4 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[768U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -7475,7 +7318,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_400 uu____3 = sample_ring_element_cbd_3b0(copy_of_prf_input, domain_separator0); @@ -7484,7 +7326,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( error_1, uu____3.fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_f1_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -7492,11 +7334,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[2U]; - compute_vector_u_a0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_a0(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -7505,14 +7345,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( &message_as_ring_element); uint8_t ciphertext[768U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[2U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_6d( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_86( uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext, @@ -7827,14 +7665,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_d1( IndCpaPrivateKeyUnpacked_d4 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[2U]; - deserialize_then_decompress_u_86( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_86(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_64( - Eurydice_array_to_subslice_from( - (size_t)768U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)640U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)768U, ciphertext, + (size_t)640U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_a0(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -7855,8 +7690,7 @@ with const generics static KRML_MUSTINLINE void decrypt_d1(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[2U]; - deserialize_secret_key_a0(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_a0(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[2U]; memcpy( @@ -8164,13 +7998,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; - H_f1_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)3U, - (size_t)768U * (size_t)3U + (size_t)32U, - uint8_t), + H_f1_e0(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)3U, + (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)3U + (size_t)32U, @@ -8626,10 +8456,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b1( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_89( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -8689,7 +8515,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b1( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -8824,11 +8650,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_1b( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -8863,8 +8685,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_1b( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -8940,10 +8760,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1( IndCpaPrivateKeyUnpacked_a0 *private_key, IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_9c(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -8973,8 +8790,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1( sample_vector_cbd_then_ntt_out_3b1(copy_of_prf_input, domain_separator) .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_1b(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_1b(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -8999,13 +8816,11 @@ serialize_unpacked_secret_key_43(IndCpaPublicKeyUnpacked_a0 *public_key, IndCpaPrivateKeyUnpacked_a0 *private_key) { uint8_t public_key_serialized[1184U]; serialize_public_key_6c( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1152U]; - serialize_secret_key_89( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_89(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -9193,15 +9008,11 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f1( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); deserialize_ring_elements_reduced_1b(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -9295,10 +9106,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_1b( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -9491,11 +9299,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -9508,7 +9312,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_230 uu____3 = sample_ring_element_cbd_3b1(copy_of_prf_input, domain_separator0); @@ -9517,7 +9320,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_f1_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -9525,11 +9328,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U]; - compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_1b(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -9538,14 +9339,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_43( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_6c( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -9830,14 +9629,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_42( IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U]; - deserialize_then_decompress_u_6c( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_6c(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_89( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -9858,8 +9654,7 @@ with const generics static KRML_MUSTINLINE void decrypt_42(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U]; - deserialize_secret_key_1b(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_1b(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U]; memcpy( diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/libcrux_mlkem_portable.h index 012f00992..ccb5a6654 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem_portable_H diff --git a/libcrux-ml-kem/c/libcrux_sha3.h b/libcrux-ml-kem/c/libcrux_sha3.h index 16a61b7e6..393be1f15 100644 --- a/libcrux-ml-kem/c/libcrux_sha3.h +++ b/libcrux-ml-kem/c/libcrux_sha3.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_H diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.c b/libcrux-ml-kem/c/libcrux_sha3_avx2.c index 23fa30cd5..3274dc64a 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_avx2.c +++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_sha3_avx2.h" @@ -77,8 +77,7 @@ static KRML_MUSTINLINE __m256i and_not_xor_ef(__m256i a, __m256i b, __m256i c) { } static KRML_MUSTINLINE __m256i _veorq_n_u64(__m256i a, uint64_t c) { - __m256i c0 = mm256_set1_epi64x( - (int64_t) /* Casting here is required, doesn't change the value. */ c); + __m256i c0 = mm256_set1_epi64x((int64_t)c); return mm256_xor_si256(a, c0); } @@ -1431,13 +1430,13 @@ static KRML_MUSTINLINE void store_block_5b(__m256i (*s)[5U], s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U] [((size_t)4U * i0 + (size_t)2U) % (size_t)5U], __m256i); - __m256i v1h = mm256_permute2x128_si256( - (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], - s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], - __m256i); + __m256i v1h = + mm256_permute2x128_si256((int32_t)32, + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], + s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], + __m256i); __m256i v2l = mm256_permute2x128_si256( (int32_t)49, s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U], @@ -1748,16 +1747,7 @@ void libcrux_sha3_avx2_x4_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2, Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1, Eurydice_slice out2, Eurydice_slice out3) { - Eurydice_slice buf0[4U] = { - /* XXX: These functions could alternatively implement the same with the - portable implementation #[cfg(feature = "simd128")] { keccakx2::<136, - 0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136, - 0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136, - 0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]); - keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136, - 0x1fu8>([input3], [out3]); } */ - input0, - input1, input2, input3}; + Eurydice_slice buf0[4U] = {input0, input1, input2, input3}; Eurydice_slice buf[4U] = {out0, out1, out2, out3}; keccak_fb(buf0, buf); } @@ -1972,13 +1962,13 @@ static KRML_MUSTINLINE void store_block_3a(__m256i (*s)[5U], s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U] [((size_t)4U * i0 + (size_t)2U) % (size_t)5U], __m256i); - __m256i v1h = mm256_permute2x128_si256( - (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], - s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], - __m256i); + __m256i v1h = + mm256_permute2x128_si256((int32_t)32, + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], + s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], + __m256i); __m256i v2l = mm256_permute2x128_si256( (int32_t)49, s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U], diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/libcrux_sha3_avx2.h index 645f80b34..eaa8d8c25 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_sha3_internal.h b/libcrux-ml-kem/c/libcrux_sha3_internal.h index 74eeb47a3..c68ee5802 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_internal.h +++ b/libcrux-ml-kem/c/libcrux_sha3_internal.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_internal_H @@ -1811,7 +1811,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out); } @@ -2160,7 +2159,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out); } @@ -2509,7 +2507,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out); } @@ -2698,7 +2695,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out); } @@ -2817,7 +2813,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out); } @@ -3166,7 +3161,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out); } diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.c b/libcrux-ml-kem/c/libcrux_sha3_neon.c index 5e4416bcd..8c9edc379 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_neon.c +++ b/libcrux-ml-kem/c/libcrux_sha3_neon.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_sha3_neon.h" @@ -62,7 +62,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice out0, Eurydice_slice out1) { - /* TODO: make argument ordering consistent */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -73,9 +72,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, */ KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState libcrux_sha3_neon_x2_incremental_init(void) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let s0 = KeccakState::new(); let s1 = - * KeccakState::new(); [s0, s1] } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -87,10 +83,6 @@ libcrux_sha3_neon_x2_incremental_init(void) { KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -104,10 +96,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_first_three_blocks(&mut s0, out0); - * shake128_squeeze_first_three_blocks(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -121,10 +109,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_next_block(&mut s0, out0); - * shake128_squeeze_next_block(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -148,10 +132,6 @@ libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_five_blocks( KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.h b/libcrux-ml-kem/c/libcrux_sha3_neon.h index 6e264c84f..c51c09cc5 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_neon.h +++ b/libcrux-ml-kem/c/libcrux_sha3_neon.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_neon_H diff --git a/libcrux-ml-kem/cg/code_gen.txt b/libcrux-ml-kem/cg/code_gen.txt index 7e79f022e..54242b657 100644 --- a/libcrux-ml-kem/cg/code_gen.txt +++ b/libcrux-ml-kem/cg/code_gen.txt @@ -1,6 +1,6 @@ This code was generated with the following revisions: -Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f -Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c -Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 -F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc -Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 +Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 +Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 +Karamel: 8c3612018c25889288da6857771be3ad03b75bcd +F*: 5643e656b989aca7629723653a2570c7df6252b9 +Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 diff --git a/libcrux-ml-kem/cg/libcrux_core.h b/libcrux-ml-kem/cg/libcrux_core.h index ca8a53171..b8e2354f8 100644 --- a/libcrux-ml-kem/cg/libcrux_core.h +++ b/libcrux-ml-kem/cg/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_core_H diff --git a/libcrux-ml-kem/cg/libcrux_ct_ops.h b/libcrux-ml-kem/cg/libcrux_ct_ops.h index 5f693d09c..cf4a616ac 100644 --- a/libcrux-ml-kem/cg/libcrux_ct_ops.h +++ b/libcrux-ml-kem/cg/libcrux_ct_ops.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_ct_ops_H diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h index bb50d3eaf..f6933bc18 100644 --- a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h +++ b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_avx2_H @@ -171,16 +171,11 @@ libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) { __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16( LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i v_minus_field_modulus = - libcrux_intrinsics_avx2_mm256_sub_epi16(/* Compute v_i - Q and crate a - mask from the sign bit of each - of these quantities. */ - vector, field_modulus); + libcrux_intrinsics_avx2_mm256_sub_epi16(vector, field_modulus); __m256i sign_mask = libcrux_intrinsics_avx2_mm256_srai_epi16( (int32_t)15, v_minus_field_modulus, __m256i); __m256i conditional_add_field_modulus = - libcrux_intrinsics_avx2_mm256_and_si256(/* If v_i - Q < 0 then add back Q - to (v_i - Q). */ - sign_mask, field_modulus); + libcrux_intrinsics_avx2_mm256_and_si256(sign_mask, field_modulus); return libcrux_intrinsics_avx2_mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus); } @@ -562,7 +557,6 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2, int16_t zeta3) { - /* Compute the first term of the product */ __m256i shuffle_with = libcrux_intrinsics_avx2_mm256_set_epi8( (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, @@ -570,8 +564,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4, (int8_t)1, (int8_t)0); - __m256i lhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - /* Prepare the left hand side */ lhs, shuffle_with); + __m256i lhs_shuffled = + libcrux_intrinsics_avx2_mm256_shuffle_epi8(lhs, shuffle_with); __m256i lhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64( (int32_t)216, lhs_shuffled, __m256i); __m128i lhs_evens = @@ -580,8 +574,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i lhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256( (int32_t)1, lhs_shuffled0, __m128i); __m256i lhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(lhs_odds); - __m256i rhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - /* Prepare the right hand side */ rhs, shuffle_with); + __m256i rhs_shuffled = + libcrux_intrinsics_avx2_mm256_shuffle_epi8(rhs, shuffle_with); __m256i rhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64( (int32_t)216, rhs_shuffled, __m256i); __m128i rhs_evens = @@ -590,8 +584,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i rhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256( (int32_t)1, rhs_shuffled0, __m128i); __m256i rhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(rhs_odds); - __m256i left = libcrux_intrinsics_avx2_mm256_mullo_epi32( - /* Start operating with them */ lhs_evens0, rhs_evens0); + __m256i left = + libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_evens0, rhs_evens0); __m256i right = libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_odds0, rhs_odds0); __m256i right0 = @@ -606,7 +600,7 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s( products_left); __m256i rhs_adjacent_swapped = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - /* Compute the second term of the product */ rhs, + rhs, libcrux_intrinsics_avx2_mm256_set_epi8( (int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6, @@ -621,10 +615,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( products_right); __m256i products_right1 = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)16, products_right0, __m256i); - return libcrux_intrinsics_avx2_mm256_blend_epi16( - (int32_t)170, - /* Combine them into one vector */ products_left0, products_right1, - __m256i); + return libcrux_intrinsics_avx2_mm256_blend_epi16((int32_t)170, products_left0, + products_right1, __m256i); } /** @@ -642,60 +634,13 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09( KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1( __m256i vector, uint8_t ret[2U]) { - __m256i lsb_to_msb = libcrux_intrinsics_avx2_mm256_slli_epi16( - (int32_t)15, - /* Suppose |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀ - 0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least - significant bit in each lane, move it to the most significant position - to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ - d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ - n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */ - vector, __m256i); - __m128i low_msbs = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* Get the first 8 16-bit - elements ... */ - lsb_to_msb); + __m256i lsb_to_msb = + libcrux_intrinsics_avx2_mm256_slli_epi16((int32_t)15, vector, __m256i); + __m128i low_msbs = libcrux_intrinsics_avx2_mm256_castsi256_si128(lsb_to_msb); __m128i high_msbs = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i); - __m128i msbs = - libcrux_intrinsics_avx2_mm_packs_epi16(/* ... and then pack them into - 8-bit values using signed - saturation. This function packs - all the |low_msbs|, and then the - high ones. low_msbs = a₀0¹⁵ - b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ - g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ - j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ - o₀0¹⁵ p₀0¹⁵ We shifted by 15 - above to take advantage of the - signed saturation performed by - mm_packs_epi16: - if the sign - bit of the 16-bit element being - packed is 1, the corresponding - 8-bit element in |msbs| will be - 0xFF. - if the sign bit of the - 16-bit element being packed is - 0, the corresponding 8-bit - element in |msbs| will be 0. - Thus, if, for example, a₀ = 1, - e₀ = 1, and p₀ = 1, and every - other bit is 0, after packing - into 8 bit value, |msbs| will - look like: 0xFF 0x00 0x00 0x00 | - 0xFF 0x00 0x00 0x00 | 0x00 0x00 - 0x00 0x00 | 0x00 0x00 0x00 0xFF - */ - low_msbs, high_msbs); - int32_t bits_packed = - libcrux_intrinsics_avx2_mm_movemask_epi8(/* Now that every element is - either 0xFF or 0x00, we just - extract the most significant - bit from each element and - collate them into two bytes. - */ - msbs); + (int32_t)1, lsb_to_msb, __m128i); + __m128i msbs = libcrux_intrinsics_avx2_mm_packs_epi16(low_msbs, high_msbs); + int32_t bits_packed = libcrux_intrinsics_avx2_mm_movemask_epi8(msbs); uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)}; memcpy(ret, result, (size_t)2U * sizeof(uint8_t)); } @@ -714,63 +659,18 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s( int16_t a, int16_t b) { - __m256i coefficients = - libcrux_intrinsics_avx2_mm256_set_epi16(/* We need to take each bit from - the 2 bytes of input and put - them into their own 16-bit - lane. Ideally, we'd load the - two bytes into the vector, - duplicate them, and right-shift - the 0th element by 0 bits, the - first element by 1 bit, the - second by 2 bits and so on - before AND-ing with 0x1 to - leave only the least - signifinicant bit. But since - |_mm256_srlv_epi16| does not - exist, so we have to resort to - a workaround. Rather than - shifting each element by a - different amount, we'll - multiply each element by a - value such that the bit we're - interested in becomes the most - significant bit. The - coefficients are loaded as - follows: */ - b, b, b, b, b, b, b, b, a, a, a, - a, a, a, a, a); - __m256i coefficients_in_msb = - libcrux_intrinsics_avx2_mm256_mullo_epi16(/* And this vector, when - multiplied with the previous - one, ensures that the bit - we'd like to keep in each - lane becomes the most - significant bit upon - multiplication. */ - coefficients, - libcrux_intrinsics_avx2_mm256_set_epi16( - (int16_t)1 << 8U, - (int16_t)1 << 9U, - (int16_t)1 << 10U, - (int16_t)1 << 11U, - (int16_t)1 << 12U, - (int16_t)1 << 13U, - (int16_t)1 << 14U, - (int16_t)-32768, - (int16_t)1 << 8U, - (int16_t)1 << 9U, - (int16_t)1 << 10U, - (int16_t)1 << 11U, - (int16_t)1 << 12U, - (int16_t)1 << 13U, - (int16_t)1 << 14U, - (int16_t)-32768)); - return libcrux_intrinsics_avx2_mm256_srli_epi16( - (int32_t)15, - /* Now that they're all in the most significant bit position, shift them - down to the least significant bit. */ - coefficients_in_msb, __m256i); + __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16( + b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); + __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16( + coefficients, libcrux_intrinsics_avx2_mm256_set_epi16( + (int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U, + (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U, + (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U, + (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U, + (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U, + (int16_t)-32768)); + return libcrux_intrinsics_avx2_mm256_srli_epi16((int32_t)15, + coefficients_in_msb, __m256i); } KRML_ATTRIBUTE_TARGET("avx2") @@ -785,23 +685,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( - Eurydice_slice_index( - bytes, - /* We need to take each bit from the 2 bytes of input and put them - into their own 16-bit lane. Ideally, we'd load the two bytes into - the vector, duplicate them, and right-shift the 0th element by 0 - bits, the first element by 1 bit, the second by 2 bits and so on - before AND-ing with 0x1 to leave only the least signifinicant bit. - But since |_mm256_srlv_epi16| does not exist, so we have to resort - to a workaround. Rather than shifting each element by a different - amount, we'll multiply each element by a value such that the bit - we're interested in becomes the most significant bit. The - coefficients are loaded as follows: And this vector, when - multiplied with the previous one, ensures that the bit we'd like to - keep in each lane becomes the most significant bit upon - multiplication. Now that they're all in the most significant bit - position, shift them down to the least significant bit. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *)); } @@ -837,70 +721,23 @@ static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4( __m256i vector, uint8_t ret[8U]) { uint8_t serialized[16U] = {0U}; __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 4U, - /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D | - 0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be - laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA - 0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */ - vector); - __m256i adjacent_8_combined = - libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* Recall that - |adjacent_2_combined| goes - as follows: 0x00_00_00_BA - 0x00_00_00_DC | - 0x00_00_00_FE 0x00_00_00_HG - | ... Out of this, we only - need the first byte, the 4th - byte, the 8th byte and so on - from the bottom and the top - 128 bits. */ - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi8( - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, - (int8_t)4, (int8_t)0, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, - (int8_t)4, (int8_t)0)); - __m256i combined = - libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32(/* |adjacent_8_combined| - looks like this: 0: - 0xHG_FE_DC_BA 1: - 0x00_00_00_00 | 2: - 0x00_00_00_00 3: - 0x00_00_00_00 | 4: - 0xPO_NM_LK_JI .... - We put the element - at 4 after the - element at 0 ... */ - adjacent_8_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)4, - (int32_t)0)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector); + __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8( + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi8( + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0)); + __m256i combined = libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32( + adjacent_8_combined, libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0)); __m128i combined0 = libcrux_intrinsics_avx2_mm256_castsi256_si128(combined); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( - Eurydice_array_to_slice( - (size_t)16U, - /* ... so that we can read them out in one go. */ serialized, - uint8_t), - combined0); + Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0); uint8_t ret0[8U]; Result_15 dst; Eurydice_slice_to_array2( @@ -926,33 +763,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5, int16_t b6, int16_t b7) { - __m256i coefficients = - libcrux_intrinsics_avx2_mm256_set_epi16(/* Every 4 bits from each byte of - input should be put into its - own 16-bit lane. Since - |_mm256_srlv_epi16| does not - exist, we have to resort to a - workaround. Rather than - shifting each element by a - different amount, we'll - multiply each element by a - value such that the bits we're - interested in become the most - significant bits (of an 8-bit - value). In this lane, the 4 - bits we need to put are already - the most significant bits of - |bytes[7]| (that is, b7). */ - b7, - /* In this lane, the 4 bits we - need to put are the least - significant bits, so we need to - shift the 4 least-significant - bits of |b7| to the most - significant bits (of an 8-bit - value). */ - b7, b6, b6, b5, b5, b4, b4, b3, - b3, b2, b2, b1, b1, b0, b0); + __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16( + b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, b0); __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16( coefficients, libcrux_intrinsics_avx2_mm256_set_epi16( (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, @@ -962,14 +774,10 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U)); __m256i coefficients_in_lsb = libcrux_intrinsics_avx2_mm256_srli_epi16( - (int32_t)4, - /* Once the 4-bit coefficients are in the most significant positions (of - an 8-bit value), shift them all down by 4. */ - coefficients_in_msb, __m256i); + (int32_t)4, coefficients_in_msb, __m256i); return libcrux_intrinsics_avx2_mm256_and_si256( - /* Zero the remaining bits. */ coefficients_in_lsb, - libcrux_intrinsics_avx2_mm256_set1_epi16(((int16_t)1 << 4U) - - (int16_t)1)); + coefficients_in_lsb, libcrux_intrinsics_avx2_mm256_set1_epi16( + ((int16_t)1 << 4U) - (int16_t)1)); } KRML_ATTRIBUTE_TARGET("avx2") @@ -986,23 +794,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( - Eurydice_slice_index( - bytes, - /* Every 4 bits from each byte of input should be put into its own - 16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to - resort to a workaround. Rather than shifting each element by a - different amount, we'll multiply each element by a value such that - the bits we're interested in become the most significant bits (of - an 8-bit value). In this lane, the 4 bits we need to put are - already the most significant bits of |bytes[7]| (that is, b7). In - this lane, the 4 bits we need to put are the least significant - bits, so we need to shift the 4 least-significant bits of |b7| to - the most significant bits (of an 8-bit value). These constants are - chosen to shift the bits of the values that we loaded into - |coefficients|. Once the 4-bit coefficients are in the most - significant positions (of an 8-bit value), shift them all down - by 4. Zero the remaining bits. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *), @@ -1026,106 +818,35 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5( __m256i vector, uint8_t ret[10U]) { uint8_t serialized[32U] = {0U}; - __m256i adjacent_2_combined = - libcrux_intrinsics_avx2_mm256_madd_epi16(/* If |vector| is laid out as - follows (superscript number - indicates the corresponding - bit is duplicated that many - times): 0¹¹a₄a₃a₂a₁a₀ - 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀ - 0¹¹d₄d₃d₂d₁d₀ | ↩ - 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀ - 0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | - ↩ |adjacent_2_combined| will - be laid out as a series of - 32-bit integers, as follows: - 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ - 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... */ - vector, - libcrux_intrinsics_avx2_mm256_set_epi16( - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, - (int16_t)1)); - __m256i adjacent_4_combined = - libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Recall that - |adjacent_2_combined| is laid - out as follows: - 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ - 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... This shift results in: - b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ - f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... */ - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22)); + __m256i adjacent_2_combined = libcrux_intrinsics_avx2_mm256_madd_epi16( + vector, libcrux_intrinsics_avx2_mm256_set_epi16( + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1)); + __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32( + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, (int32_t)0, + (int32_t)22, (int32_t)0, (int32_t)22)); __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64( - (int32_t)22, - /* |adjacent_4_combined|, when viewed as 64-bit lanes, is: - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift - down by 22 bits to remove the least significant 0 bits that aren't part - of the bits we need. */ - adjacent_4_combined, __m256i); + (int32_t)22, adjacent_4_combined, __m256i); __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi32( - (int32_t)8, - /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks - like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³² - 2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to - read out the bytes in one go, we need to shifts the bits in position 2 - to position 1 in each 128-bit lane. */ - adjacent_4_combined0, __m256i); - __m256i adjacent_8_combined0 = - libcrux_intrinsics_avx2_mm256_sllv_epi32(/* |adjacent_8_combined|, when - viewed as a set of 32-bit - values, now looks like: - 0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0³² 0³² | ↩ Once again, we - line these bits up by shifting - the up values at indices 0 and - 5 by 12, viewing the resulting - register as a set of 64-bit - values, and then shifting down - the 64-bit values by 12 bits. - */ - adjacent_8_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)12)); + (int32_t)8, adjacent_4_combined0, __m256i); + __m256i adjacent_8_combined0 = libcrux_intrinsics_avx2_mm256_sllv_epi32( + adjacent_8_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, (int32_t)0, + (int32_t)0, (int32_t)0, (int32_t)12)); __m256i adjacent_8_combined1 = libcrux_intrinsics_avx2_mm256_srli_epi64( (int32_t)12, adjacent_8_combined0, __m256i); __m128i lower_8 = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 40 bits - starting at position 0 in - the lower 128-bit lane, - ... */ - adjacent_8_combined1); + libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined1); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t), lower_8); __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ... and the second 40 bits at position 0 in the upper 128-bit lane */ - adjacent_8_combined1, __m128i); + (int32_t)1, adjacent_8_combined1, __m128i); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t), upper_8); @@ -1231,87 +952,27 @@ static inline core_core_arch_x86___m128i_x2 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( __m256i vector) { __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 10U, - /* If |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ... - |adjacent_2_combined| will be laid out as a series of 32-bit - integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */ - vector); - __m256i adjacent_4_combined = - libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Shifting up the values at the - even indices by 12, we get: - b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ ... */ - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector); + __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32( + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, (int32_t)0, + (int32_t)12, (int32_t)0, (int32_t)12)); __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64( - (int32_t)12, - /* Viewing this as a set of 64-bit integers we get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ ... Shifting down by 12 gives us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ ... */ - adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = - libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* |adjacent_4_combined|, when - the bottom and top 128 - bit-lanes are grouped into - bytes, looks like: - 0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ - 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ - In each 128-bit lane, we - want to put bytes 8, 9, 10, - 11, 12 after bytes 0, 1, 2, - 3 to allow for sequential - reading. */ - adjacent_4_combined0, - libcrux_intrinsics_avx2_mm256_set_epi8( - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, - (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, - (int8_t)1, (int8_t)0, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, - (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, - (int8_t)1, (int8_t)0)); + (int32_t)12, adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8( + adjacent_4_combined0, + libcrux_intrinsics_avx2_mm256_set_epi8( + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, + (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, + (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0)); __m128i lower_8 = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 64 bits - starting at position 0 in - the lower 128-bit lane, - ... */ - adjacent_8_combined); + libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined); __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* and 64 bits starting at position 0 in the upper 128-bit lane. */ - adjacent_8_combined, __m128i); + (int32_t)1, adjacent_8_combined, __m128i); return ( CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8}); } @@ -1320,167 +981,8 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10( __m256i vector, uint8_t ret[20U]) { core_core_arch_x86___m128i_x2 uu____0 = - libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If - |vector| - is - laid - out - as - follows - (superscript - number - indicates - the - corresponding - bit - is - duplicated - that - many - times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ - | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ - | ↩ - ... - |adjacent_2_combined| - will - be - laid - out - as a - series - of - 32-bit - integers, - as - follows: - 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - .... - Shifting - up - the - values - at - the - even - indices - by - 12, - we - get: - b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - ... - Viewing - this - as a - set - of - 64-bit - integers - we - get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ - ... - Shifting - down - by - 12 - gives - us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ - ... - |adjacent_4_combined|, - when - the - bottom - and - top - 128 - bit-lanes - are - grouped - into - bytes, - looks - like: - 0₇0₆0₅B₄B₃B₂B₁B₀ - | ↩ - 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ - | ↩ - In - each - 128-bit - lane, - we - want - to - put - bytes - 8, - 9, - 10, - 11, - 12 - after - bytes - 0, - 1, - 2, 3 - to - allow - for - sequential - reading. - We - now - have - 64 - bits - starting - at - position - 0 in - the - lower - 128-bit - lane, - ... - and - 64 - bits - starting - at - position - 0 in - the - upper - 128-bit - lane. - */ - vector); + libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( + vector); __m128i lower_8 = uu____0.fst; __m128i upper_8 = uu____0.snd; uint8_t serialized[32U] = {0U}; @@ -1536,20 +1038,16 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( (int16_t)1 << 6U)); __m256i coefficients1 = libcrux_intrinsics_avx2_mm256_srli_epi16( (int32_t)6, coefficients0, __m256i); - return libcrux_intrinsics_avx2_mm256_and_si256(/* Here I can prove this `and` - is not useful */ - coefficients1, - libcrux_intrinsics_avx2_mm256_set1_epi16( - ((int16_t)1 << 10U) - - (int16_t)1)); + return libcrux_intrinsics_avx2_mm256_and_si256( + coefficients1, libcrux_intrinsics_avx2_mm256_set1_epi16( + ((int16_t)1 << 10U) - (int16_t)1)); } KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) { - Eurydice_slice lower_coefficients = Eurydice_slice_subslice2( - /* Here I can prove this `and` is not useful */ bytes, (size_t)0U, - (size_t)16U, uint8_t); + Eurydice_slice lower_coefficients = + Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t); Eurydice_slice upper_coefficients = Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t); return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( @@ -1735,70 +1233,28 @@ libcrux_ml_kem_vector_avx2_sampling_rejection_sample(Eurydice_slice input, __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16( LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i potential_coefficients = - libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can - be interpreted as a - sequence of - serialized 12-bit - (i.e. uncompressed) - coefficients. Not - all coefficients - may be less than - FIELD_MODULUS - though. */ - input); + libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input); __m256i compare_with_field_modulus = - libcrux_intrinsics_avx2_mm256_cmpgt_epi16(/* Suppose we view - |potential_coefficients| as - follows (grouping 64-bit - elements): A B C D | E F G H - | .... and A < 3329, D < 3329 - and H < 3329, - |compare_with_field_modulus| - will look like: 0xFF 0 0 0xFF - | 0 0 0 0xFF | ... */ - field_modulus, + libcrux_intrinsics_avx2_mm256_cmpgt_epi16(field_modulus, potential_coefficients); uint8_t good[2U]; - libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each - lane is either 0 or 1, we - only need one bit from - each lane in the register - to tell us what - coefficients to keep and - what to throw-away. - Combine all the bits - (there are 16) into two - bytes. */ - compare_with_field_modulus, + libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus, good); uint8_t lower_shuffles[16U]; memcpy(lower_shuffles, - /* Each bit (and its corresponding position) represents an element we - want to sample. We'd like all such elements to be next to each other - starting at index 0, so that they can be read from the vector - easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level - shuffling indices needed to make this happen. For e.g. if good[0] = - 0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit - lane to the first. To do this, we need the byte-level shuffle - indices to be 2 3 X X X X ... */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[0U]], (size_t)16U * sizeof(uint8_t)); - __m128i lower_shuffles0 = - libcrux_intrinsics_avx2_mm_loadu_si128(Eurydice_array_to_slice( - (size_t)16U, - /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles, - uint8_t)); + __m128i lower_shuffles0 = libcrux_intrinsics_avx2_mm_loadu_si128( + Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t)); __m128i lower_coefficients = libcrux_intrinsics_avx2_mm256_castsi256_si128(potential_coefficients); __m128i lower_coefficients0 = libcrux_intrinsics_avx2_mm_shuffle_epi8( lower_coefficients, lower_shuffles0); - libcrux_intrinsics_avx2_mm_storeu_si128( - /* ... then write them out ... */ output, lower_coefficients0); + libcrux_intrinsics_avx2_mm_storeu_si128(output, lower_coefficients0); size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]); uint8_t upper_shuffles[16U]; memcpy(upper_shuffles, - /* Do the same for |goood[1]| */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[1U]], (size_t)16U * sizeof(uint8_t)); @@ -1979,9 +1435,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)10); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1991,15 +1445,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)10, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2009,29 +1459,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)10, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2097,9 +1531,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)11); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2109,15 +1541,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)11, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2127,29 +1555,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)11, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2253,13 +1665,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer, size_t _initial_coefficient_bound) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2354,11 +1760,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]); @@ -2455,9 +1857,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)4); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2467,15 +1867,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)4, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2485,29 +1881,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)4, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2568,9 +1948,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)5); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2580,15 +1958,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)5, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2598,29 +1972,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)5, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2730,14 +2088,9 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -2840,13 +2193,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2878,10 +2225,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_61(&zeta_i, re, (size_t)1U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_61(&zeta_i, re, (size_t)2U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_61(&zeta_i, re, (size_t)3U); @@ -3076,16 +2420,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_2f( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(/* u := - Decompress_q(Decode_{d_u}(c), - d_u) */ - ciphertext, u_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_ed( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = libcrux_ml_kem_matrix_compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt); @@ -3108,8 +2447,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_2f( Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab( - /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U]; memcpy( @@ -3658,10 +2996,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_6c( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -3728,7 +3062,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_6c( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -3750,15 +3084,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_fa( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4028,12 +3359,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09( re->coefficients[j + step], (int16_t)-1600); @@ -4054,10 +3380,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { - libcrux_ml_kem_ntt_ntt_at_layer_7_61(/* Due to the small coefficient bound, we - can skip the first round of Montgomery - reductions. */ - re); + libcrux_ml_kem_ntt_ntt_at_layer_7_61(re); size_t zeta_i = (size_t)1U; libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U, (size_t)11207U); @@ -4268,11 +3591,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( @@ -4395,26 +3714,8 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_61( __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( result.coefficients[i0], (int16_t)1441); - __m256i tmp = libcrux_ml_kem_vector_avx2_add_09( - self->coefficients - [/* FIXME: Eurydice crashes with: Warning 11: in top-level - declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing function cannot be - translated into C*: let mutable ret(Mark.Present,(Mark.AtMost - 2), ): int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the following code: - ```rust result.coefficients[i] = - Vector::barrett_reduce(Vector::add( coefficient_normal_form, - &Vector::add(self.coefficients[i], &message.coefficients[i]), - )); ``` */ - i0], - &message->coefficients[i0]); + __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0], + &message->coefficients[i0]); __m256i tmp0 = libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp); result.coefficients[i0] = @@ -4472,23 +3773,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)10, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4497,17 +3784,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4521,23 +3802,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -4601,23 +3869,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)11, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4626,17 +3880,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4650,23 +3898,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -4782,23 +4017,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)4, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4807,17 +4028,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4831,23 +4046,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -4878,11 +4080,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficient = libcrux_ml_kem_vector_avx2_compress_09_d1( libcrux_ml_kem_serialize_to_unsigned_field_modulus_61( @@ -4914,23 +4112,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)5, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4939,17 +4123,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4963,23 +4141,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -5010,11 +4175,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficients = libcrux_ml_kem_vector_avx2_compress_09_f4( libcrux_ml_kem_vector_traits_to_unsigned_representative_61( @@ -5106,10 +4267,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -5122,7 +4280,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_230 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_b4( copy_of_prf_input, domain_separator0); @@ -5131,7 +4288,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; libcrux_ml_kem_hash_functions_avx2_PRF_a9_410( Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -5139,12 +4296,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( libcrux_ml_kem_sampling_sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U]; - libcrux_ml_kem_matrix_compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ - public_key->A, r_as_ntt, error_1, + libcrux_ml_kem_matrix_compute_vector_u_ab(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = libcrux_ml_kem_serialize_deserialize_then_decompress_message_61( @@ -5154,14 +4309,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_ind_cpa_compress_then_serialize_u_8c( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_ed( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -5654,18 +4807,11 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_traits_to_standard_domain_61( - self->coefficients[/* The coefficients are of the form aR^{-1} mod - q, which means calling to_montgomery_domain() - on them should return a mod q. */ - j]); + self->coefficients[j]); self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09( libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &error->coefficients[j])); @@ -5696,8 +4842,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_ab( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = libcrux_ml_kem_polynomial_ZERO_ef_61(); t_as_ntt[i0] = uu____0; @@ -5778,9 +4922,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -5813,8 +4955,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_matrix_compute_As_plus_e_ab( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -5950,18 +5092,12 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_8c( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key) { uint8_t public_key_serialized[1184U]; - libcrux_ml_kem_ind_cpa_serialize_public_key_ed(/* pk := (Encode_12(tˆ - mod^{+}q) || ρ) */ - public_key->t_as_ntt, - Eurydice_array_to_slice( - (size_t)32U, - public_key->seed_for_A, - uint8_t), - public_key_serialized); + libcrux_ml_kem_ind_cpa_serialize_public_key_ed( + public_key->t_as_ntt, + Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), + public_key_serialized); uint8_t secret_key_serialized[1152U]; - libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(/* sk := Encode_12(sˆ mod^{+}q) - */ - private_key->secret_as_ntt, + libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; @@ -6633,9 +5769,7 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_33_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_33_be(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6668,8 +5802,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_matrix_compute_As_plus_e_ab( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6834,10 +5968,7 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; libcrux_ml_kem_hash_functions_avx2_H_a9_e0( - Eurydice_array_to_subslice2(/* Eurydice can't access values directly on - the types. We need to go to the `value` - directly. */ - private_key->value, (size_t)384U * (size_t)3U, + Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U, (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); @@ -7797,10 +6928,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_b3( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -7868,7 +6995,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_b3( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -7891,15 +7018,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_bf( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -7934,10 +7058,7 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_e2( Eurydice_slice ind_cpa_public_key_hash = uu____0.thd; Eurydice_slice implicit_rejection_value = uu____0.f3; Eurydice_slice uu____1 = Eurydice_array_to_slice( - (size_t)3U, - /* XXX: We need to copy_from_slice here because karamel can't handle the - assignment cf. https://github.com/FStarLang/karamel/pull/491 */ - key_pair->private_key.ind_cpa_private_key.secret_as_ntt, + (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt, libcrux_ml_kem_polynomial_PolynomialRingElement_f6); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 ret[3U]; libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(ind_cpa_secret_key, ret); diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h index 7a9446452..8f0de6a3e 100644 --- a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h +++ b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_portable_H @@ -1235,28 +1235,11 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( static inline uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient( uint16_t fe) { - int16_t shifted = - (int16_t)1664 - - (int16_t) /* The approach used here is inspired by: - https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150 - If 833 <= fe <= 2496, then -832 <= shifted <= 831 */ - fe; - int16_t mask = - /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) = - -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive - <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so - if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */ - shifted - - >> 15U; + int16_t shifted = (int16_t)1664 - (int16_t)fe; + int16_t mask = shifted >> 15U; int16_t shifted_to_positive = mask ^ shifted; int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832; - int16_t r0 = - /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the - most significant bit of shifted_positive_in_range will be 1. */ - shifted_positive_in_range - - >> 15U; + int16_t r0 = shifted_positive_in_range >> 15U; int16_t r1 = r0 & (int16_t)1; return (uint8_t)r1; } @@ -1293,16 +1276,7 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits( static inline int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient( uint8_t coefficient_bits, uint16_t fe) { - uint64_t compressed = - (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits - == 5 || coefficient_bits == 10 || coefficient_bits == 11 ); - hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to - be constant time due to: - https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ - */ - fe - - << (uint32_t)coefficient_bits; + uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits; compressed = compressed + 1664ULL; compressed = compressed * 10321340ULL; compressed = compressed >> 35U; @@ -2904,13 +2878,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer, size_t _initial_coefficient_bound) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3006,11 +2974,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( @@ -3285,11 +3249,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_1b( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -3396,13 +3356,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3433,10 +3387,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_1b( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_8c(&zeta_i, re, (size_t)1U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_8c(&zeta_i, re, (size_t)2U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_8c(&zeta_i, re, (size_t)3U); @@ -3640,16 +3591,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_42( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(/* u := - Decompress_q(Decode_{d_u}(c), - d_u) */ - ciphertext, u_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_89( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = libcrux_ml_kem_matrix_compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt); @@ -3671,8 +3617,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_42( Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b( - /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U]; memcpy( @@ -4207,10 +4152,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_2b( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_89( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -4277,7 +4218,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_2b( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -4299,15 +4240,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_3f( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_1b( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4555,12 +4493,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector t = libcrux_ml_kem_vector_portable_multiply_by_constant_0d( @@ -4582,10 +4515,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { - libcrux_ml_kem_ntt_ntt_at_layer_7_8c(/* Due to the small coefficient bound, we - can skip the first round of Montgomery - reductions. */ - re); + libcrux_ml_kem_ntt_ntt_at_layer_7_8c(re); size_t zeta_i = (size_t)1U; libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U, (size_t)11207U); @@ -4792,11 +4722,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = @@ -4928,28 +4854,8 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_8c( libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( result.coefficients[i0], (int16_t)1441); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp = - libcrux_ml_kem_vector_portable_add_0d( - self->coefficients - [/* FIXME: Eurydice crashes with: Warning 11: in - top-level declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing - function cannot be translated into C*: let - mutable ret(Mark.Present,(Mark.AtMost 2), ): - int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the - following code: ```rust result.coefficients[i] - = Vector::barrett_reduce(Vector::add( - coefficient_normal_form, - &Vector::add(self.coefficients[i], - &message.coefficients[i]), )); ``` */ - i0], - &message->coefficients[i0]); + libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0], + &message->coefficients[i0]); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 = libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = @@ -5206,11 +5112,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient = libcrux_ml_kem_vector_portable_compress_0d_d1( @@ -5270,11 +5172,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients = libcrux_ml_kem_vector_portable_compress_0d_f4( @@ -5366,10 +5264,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -5382,7 +5277,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_23 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_3b( copy_of_prf_input, domain_separator0); @@ -5391,7 +5285,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; libcrux_ml_kem_hash_functions_portable_PRF_f1_410( Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -5399,12 +5293,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( libcrux_ml_kem_sampling_sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U]; - libcrux_ml_kem_matrix_compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ - public_key->A, r_as_ntt, error_1, + libcrux_ml_kem_matrix_compute_vector_u_1b(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = libcrux_ml_kem_serialize_deserialize_then_decompress_message_8c( @@ -5414,14 +5306,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_ind_cpa_compress_then_serialize_u_43( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_6c( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -5847,20 +5737,12 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = libcrux_ml_kem_vector_traits_to_standard_domain_8c( - self->coefficients[/* The coefficients are of the form aR^{-1} - mod q, which means calling - to_montgomery_domain() on them should - return a mod q. */ - j]); + self->coefficients[j]); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, @@ -5892,8 +5774,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_1b( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = libcrux_ml_kem_polynomial_ZERO_ef_8c(); t_as_ntt[i0] = uu____0; @@ -5973,9 +5853,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6008,8 +5886,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_matrix_compute_As_plus_e_1b( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6141,18 +6019,12 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_43( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key) { uint8_t public_key_serialized[1184U]; - libcrux_ml_kem_ind_cpa_serialize_public_key_6c(/* pk := (Encode_12(tˆ - mod^{+}q) || ρ) */ - public_key->t_as_ntt, - Eurydice_array_to_slice( - (size_t)32U, - public_key->seed_for_A, - uint8_t), - public_key_serialized); + libcrux_ml_kem_ind_cpa_serialize_public_key_6c( + public_key->t_as_ntt, + Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), + public_key_serialized); uint8_t secret_key_serialized[1152U]; - libcrux_ml_kem_ind_cpa_serialize_secret_key_89(/* sk := Encode_12(sˆ mod^{+}q) - */ - private_key->secret_as_ntt, + libcrux_ml_kem_ind_cpa_serialize_secret_key_89(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; @@ -6733,9 +6605,7 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6768,8 +6638,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_matrix_compute_As_plus_e_1b( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6907,10 +6777,7 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; libcrux_ml_kem_hash_functions_portable_H_f1_e0( - Eurydice_array_to_subslice2(/* Eurydice can't access values directly on - the types. We need to go to the `value` - directly. */ - private_key->value, (size_t)384U * (size_t)3U, + Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U, (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); @@ -7728,10 +7595,7 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_df( Eurydice_slice ind_cpa_public_key_hash = uu____0.thd; Eurydice_slice implicit_rejection_value = uu____0.f3; Eurydice_slice uu____1 = Eurydice_array_to_slice( - (size_t)3U, - /* XXX: We need to copy_from_slice here because karamel can't handle the - assignment cf. https://github.com/FStarLang/karamel/pull/491 */ - key_pair->private_key.ind_cpa_private_key.secret_as_ntt, + (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt, libcrux_ml_kem_polynomial_PolynomialRingElement_1d); libcrux_ml_kem_polynomial_PolynomialRingElement_1d ret[3U]; libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(ind_cpa_secret_key, ret); diff --git a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h index 5955882fa..7a519bf7c 100644 --- a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_avx2_H @@ -104,9 +104,7 @@ libcrux_sha3_simd_avx2_and_not_xor_ef(__m256i a, __m256i b, __m256i c) { KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_sha3_simd_avx2__veorq_n_u64(__m256i a, uint64_t c) { - __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x( - (int64_t) /* Casting here is required, doesn't change the value. */ - c); + __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x((int64_t)c); return libcrux_intrinsics_avx2_mm256_xor_si256(a, c0); } @@ -1701,7 +1699,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_5b( __m256i); __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256( (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], @@ -2036,15 +2034,7 @@ static KRML_MUSTINLINE void libcrux_sha3_avx2_x4_shake256( Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2, Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1, Eurydice_slice out2, Eurydice_slice out3) { - Eurydice_slice buf0[4U] = { - /* XXX: These functions could alternatively implement the same with the - portable implementation #[cfg(feature = "simd128")] { keccakx2::<136, - 0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136, - 0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136, - 0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]); - keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136, - 0x1fu8>([input3], [out3]); } */ - input0, input1, input2, input3}; + Eurydice_slice buf0[4U] = {input0, input1, input2, input3}; Eurydice_slice buf[4U] = {out0, out1, out2, out3}; libcrux_sha3_generic_keccak_keccak_fb(buf0, buf); } @@ -2284,7 +2274,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_3a( __m256i); __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256( (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], diff --git a/libcrux-ml-kem/cg/libcrux_sha3_portable.h b/libcrux-ml-kem/cg/libcrux_sha3_portable.h index 211cf1919..a606f5f71 100644 --- a/libcrux-ml-kem/cg/libcrux_sha3_portable.h +++ b/libcrux-ml-kem/cg/libcrux_sha3_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_portable_H @@ -1654,7 +1654,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out); } @@ -2013,7 +2012,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out); } @@ -2142,7 +2140,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out); } @@ -2749,7 +2746,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out); } @@ -3108,7 +3104,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out); } @@ -3404,7 +3399,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out); } @@ -3502,7 +3496,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice out0, Eurydice_slice out1) { - /* TODO: make argument ordering consistent */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3517,9 +3510,6 @@ typedef struct libcrux_sha3_neon_x2_incremental_KeccakState_s { */ static KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState libcrux_sha3_neon_x2_incremental_init(void) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let s0 = KeccakState::new(); let s1 = - * KeccakState::new(); [s0, s1] } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3532,10 +3522,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3549,10 +3535,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_first_three_blocks(&mut s0, out0); - * shake128_squeeze_first_three_blocks(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3566,10 +3548,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_next_block(&mut s0, out0); - * shake128_squeeze_next_block(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3594,10 +3572,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3759,13 +3733,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)136U) { - consumed = (size_t)136U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)136U) { + consumed = (size_t)136U - self->buf_len; for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -3871,9 +3840,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; @@ -4220,13 +4187,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)168U) { - consumed = (size_t)168U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)168U) { + consumed = (size_t)168U - self->buf_len; for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -4332,9 +4294,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; @@ -4724,13 +4684,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( size_t blocks = out_len / (size_t)136U; size_t last = out_len - out_len % (size_t)136U; size_t mid; - if ((size_t)136U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)136U >= out_len) { mid = out_len; } else { mid = (size_t)136U; @@ -4744,11 +4698,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -4757,11 +4708,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)136U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); @@ -4856,13 +4803,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( size_t blocks = out_len / (size_t)168U; size_t last = out_len - out_len % (size_t)168U; size_t mid; - if ((size_t)168U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)168U >= out_len) { mid = out_len; } else { mid = (size_t)168U; @@ -4876,11 +4817,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -4889,11 +4827,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)168U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); From 294c5806abf3c530d0425cba7382c05203f2ffe2 Mon Sep 17 00:00:00 2001 From: karthikbhargavan Date: Tue, 3 Dec 2024 22:16:16 +0000 Subject: [PATCH 6/7] fstar --- Cargo.lock | 12 ++-- fstar-helpers/fstar-bitvec/BitVecEq.fsti | 4 +- .../extraction/Libcrux_ml_kem.Ind_cca.fst | 2 +- .../extraction/Libcrux_ml_kem.Mlkem1024.fsti | 59 +++++++++++++------ .../extraction/Libcrux_ml_kem.Mlkem512.fsti | 59 +++++++++++++------ .../extraction/Libcrux_ml_kem.Mlkem768.fsti | 59 +++++++++++++------ ...crux_ml_kem.Vector.Portable.Arithmetic.fst | 2 +- .../proofs/fstar/spec/Spec.Utils.fst | 5 +- 8 files changed, 140 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 363acf1a7..94f450b74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -697,8 +697,8 @@ dependencies = [ [[package]] name = "hax-lib" -version = "0.1.0-alpha.1" -source = "git+https://github.com/hacspec/hax/#1c5e17c9ceee5adede0f4ea7f68bb3d8337f33a0" +version = "0.1.0-rc.1" +source = "git+https://github.com/hacspec/hax/#de59826b832befc82905286d052c8a961c31f3cd" dependencies = [ "hax-lib-macros", "num-bigint", @@ -707,8 +707,8 @@ dependencies = [ [[package]] name = "hax-lib-macros" -version = "0.1.0-alpha.1" -source = "git+https://github.com/hacspec/hax/#1c5e17c9ceee5adede0f4ea7f68bb3d8337f33a0" +version = "0.1.0-rc.1" +source = "git+https://github.com/hacspec/hax/#de59826b832befc82905286d052c8a961c31f3cd" dependencies = [ "hax-lib-macros-types", "paste", @@ -720,8 +720,8 @@ dependencies = [ [[package]] name = "hax-lib-macros-types" -version = "0.1.0-alpha.1" -source = "git+https://github.com/hacspec/hax/#1c5e17c9ceee5adede0f4ea7f68bb3d8337f33a0" +version = "0.1.0-rc.1" +source = "git+https://github.com/hacspec/hax/#de59826b832befc82905286d052c8a961c31f3cd" dependencies = [ "proc-macro2", "quote", diff --git a/fstar-helpers/fstar-bitvec/BitVecEq.fsti b/fstar-helpers/fstar-bitvec/BitVecEq.fsti index c370f28bf..6792f2b29 100644 --- a/fstar-helpers/fstar-bitvec/BitVecEq.fsti +++ b/fstar-helpers/fstar-bitvec/BitVecEq.fsti @@ -1,5 +1,5 @@ module BitVecEq -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15" +#set-options "--fuel 0 --ifuel 1 --z3rlimit 100" open Core open FStar.Mul open MkSeq @@ -72,7 +72,7 @@ let int_t_array_bitwise_eq // else get_bit_nat (pow2 (bits n) + v x) (v nth)) // with get_bit_intro #n x nth -#push-options "--fuel 0 --ifuel 0 --z3rlimit 80" +#push-options "--fuel 0 --ifuel 0 --z3rlimit 150" /// Rewrite a `bit_vec_of_int_t_array (Seq.slice arr ...)` into a `bit_vec_sub ...` let int_t_seq_slice_to_bv_sub_lemma #t #n (arr: t_Array (int_t t) n) diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst index a6ffee609..ee9e56c50 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Ind_cca.fst @@ -235,7 +235,7 @@ let serialize_kem_secret_key #pop-options -#push-options "--z3rlimit 300" +#push-options "--z3rlimit 300 --ext context_pruning --split_queries always" let encapsulate (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE: diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti index b31f845fc..007e5c86f 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem1024.fsti @@ -3,39 +3,64 @@ module Libcrux_ml_kem.Mlkem1024 open Core open FStar.Mul -let v_C1_BLOCK_SIZE_1024_: usize = sz 352 +let v_ETA1: usize = sz 2 -let v_C1_SIZE_1024_: usize = sz 1408 +let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64 -let v_C2_SIZE_1024_: usize = sz 160 +let v_ETA2: usize = sz 2 -let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = sz 1568 +let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64 -let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = sz 1568 +let v_RANK_1024_: usize = sz 4 -let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize = sz 1536 +let v_CPA_PKE_SECRET_KEY_SIZE_1024_: usize = + ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! + Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT + <: + usize) /! + sz 8 -let v_ETA1: usize = sz 2 +let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize = + (v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8 -let v_ETA1_RANDOMNESS_SIZE: usize = sz 128 +let v_T_AS_NTT_ENCODED_SIZE_1024_: usize = + ((v_RANK_1024_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! + Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT + <: + usize) /! + sz 8 -let v_ETA2: usize = sz 2 +let v_CPA_PKE_PUBLIC_KEY_SIZE_1024_: usize = v_T_AS_NTT_ENCODED_SIZE_1024_ +! sz 32 -let v_ETA2_RANDOMNESS_SIZE: usize = sz 128 +let v_SECRET_KEY_SIZE_1024_: usize = + ((v_CPA_PKE_SECRET_KEY_SIZE_1024_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_1024_ <: usize) +! + Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE + <: + usize) +! + Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE -let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1600 +let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11 -let v_RANKED_BYTES_PER_RING_ELEMENT_1024_: usize = sz 1536 +let v_C1_BLOCK_SIZE_1024_: usize = + (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_1024_ + <: + usize) /! + sz 8 -let v_RANK_1024_: usize = sz 4 +let v_C1_SIZE_1024_: usize = v_C1_BLOCK_SIZE_1024_ *! v_RANK_1024_ -let v_SECRET_KEY_SIZE_1024_: usize = sz 3168 +let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5 -let v_T_AS_NTT_ENCODED_SIZE_1024_: usize = sz 1536 +let v_C2_SIZE_1024_: usize = + (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_1024_ + <: + usize) /! + sz 8 -let v_VECTOR_U_COMPRESSION_FACTOR_1024_: usize = sz 11 +let v_CPA_PKE_CIPHERTEXT_SIZE_1024_: usize = v_C1_SIZE_1024_ +! v_C2_SIZE_1024_ -let v_VECTOR_V_COMPRESSION_FACTOR_1024_: usize = sz 5 +let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = + Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_1024_ /// Validate a private key. /// Returns `true` if valid, and `false` otherwise. diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti index 28d905063..94590e2ee 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem512.fsti @@ -3,39 +3,64 @@ module Libcrux_ml_kem.Mlkem512 open Core open FStar.Mul -let v_C1_BLOCK_SIZE_512_: usize = sz 320 +let v_ETA1: usize = sz 3 -let v_C1_SIZE_512_: usize = sz 640 +let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64 -let v_C2_SIZE_512_: usize = sz 128 +let v_ETA2: usize = sz 2 -let v_CPA_PKE_CIPHERTEXT_SIZE_512_: usize = sz 768 +let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64 -let v_CPA_PKE_PUBLIC_KEY_SIZE_512_: usize = sz 800 +let v_RANK_512_: usize = sz 2 -let v_CPA_PKE_SECRET_KEY_SIZE_512_: usize = sz 768 +let v_CPA_PKE_SECRET_KEY_SIZE_512_: usize = + ((v_RANK_512_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! + Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT + <: + usize) /! + sz 8 -let v_ETA1: usize = sz 3 +let v_RANKED_BYTES_PER_RING_ELEMENT_512_: usize = + (v_RANK_512_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8 -let v_ETA1_RANDOMNESS_SIZE: usize = sz 192 +let v_T_AS_NTT_ENCODED_SIZE_512_: usize = + ((v_RANK_512_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! + Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT + <: + usize) /! + sz 8 -let v_ETA2: usize = sz 2 +let v_CPA_PKE_PUBLIC_KEY_SIZE_512_: usize = v_T_AS_NTT_ENCODED_SIZE_512_ +! sz 32 -let v_ETA2_RANDOMNESS_SIZE: usize = sz 128 +let v_SECRET_KEY_SIZE_512_: usize = + ((v_CPA_PKE_SECRET_KEY_SIZE_512_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_512_ <: usize) +! + Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE + <: + usize) +! + Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE -let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 800 +let v_VECTOR_U_COMPRESSION_FACTOR_512_: usize = sz 10 -let v_RANKED_BYTES_PER_RING_ELEMENT_512_: usize = sz 768 +let v_C1_BLOCK_SIZE_512_: usize = + (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_512_ + <: + usize) /! + sz 8 -let v_RANK_512_: usize = sz 2 +let v_C1_SIZE_512_: usize = v_C1_BLOCK_SIZE_512_ *! v_RANK_512_ -let v_SECRET_KEY_SIZE_512_: usize = sz 1632 +let v_VECTOR_V_COMPRESSION_FACTOR_512_: usize = sz 4 -let v_T_AS_NTT_ENCODED_SIZE_512_: usize = sz 768 +let v_C2_SIZE_512_: usize = + (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_512_ + <: + usize) /! + sz 8 -let v_VECTOR_U_COMPRESSION_FACTOR_512_: usize = sz 10 +let v_CPA_PKE_CIPHERTEXT_SIZE_512_: usize = v_C1_SIZE_512_ +! v_C2_SIZE_512_ -let v_VECTOR_V_COMPRESSION_FACTOR_512_: usize = sz 4 +let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = + Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_512_ /// Validate a private key. /// Returns `true` if valid, and `false` otherwise. diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti index 928e6a233..d1d7c217f 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Mlkem768.fsti @@ -3,39 +3,64 @@ module Libcrux_ml_kem.Mlkem768 open Core open FStar.Mul -let v_C1_BLOCK_SIZE_768_: usize = sz 320 +let v_ETA1: usize = sz 2 -let v_C1_SIZE_768_: usize = sz 960 +let v_ETA1_RANDOMNESS_SIZE: usize = v_ETA1 *! sz 64 -let v_C2_SIZE_768_: usize = sz 128 +let v_ETA2: usize = sz 2 -let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = sz 1088 +let v_ETA2_RANDOMNESS_SIZE: usize = v_ETA2 *! sz 64 -let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = sz 1184 +let v_RANK_768_: usize = sz 3 -let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize = sz 1152 +let v_CPA_PKE_SECRET_KEY_SIZE_768_: usize = + ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! + Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT + <: + usize) /! + sz 8 -let v_ETA1: usize = sz 2 +let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize = + (v_RANK_768_ *! Libcrux_ml_kem.Constants.v_BITS_PER_RING_ELEMENT <: usize) /! sz 8 -let v_ETA1_RANDOMNESS_SIZE: usize = sz 128 +let v_T_AS_NTT_ENCODED_SIZE_768_: usize = + ((v_RANK_768_ *! Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT <: usize) *! + Libcrux_ml_kem.Constants.v_BITS_PER_COEFFICIENT + <: + usize) /! + sz 8 -let v_ETA2: usize = sz 2 +let v_CPA_PKE_PUBLIC_KEY_SIZE_768_: usize = v_T_AS_NTT_ENCODED_SIZE_768_ +! sz 32 -let v_ETA2_RANDOMNESS_SIZE: usize = sz 128 +let v_SECRET_KEY_SIZE_768_: usize = + ((v_CPA_PKE_SECRET_KEY_SIZE_768_ +! v_CPA_PKE_PUBLIC_KEY_SIZE_768_ <: usize) +! + Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE + <: + usize) +! + Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE -let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = sz 1120 +let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10 -let v_RANKED_BYTES_PER_RING_ELEMENT_768_: usize = sz 1152 +let v_C1_BLOCK_SIZE_768_: usize = + (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_U_COMPRESSION_FACTOR_768_ + <: + usize) /! + sz 8 -let v_RANK_768_: usize = sz 3 +let v_C1_SIZE_768_: usize = v_C1_BLOCK_SIZE_768_ *! v_RANK_768_ -let v_SECRET_KEY_SIZE_768_: usize = sz 2400 +let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4 -let v_T_AS_NTT_ENCODED_SIZE_768_: usize = sz 1152 +let v_C2_SIZE_768_: usize = + (Libcrux_ml_kem.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *! v_VECTOR_V_COMPRESSION_FACTOR_768_ + <: + usize) /! + sz 8 -let v_VECTOR_U_COMPRESSION_FACTOR_768_: usize = sz 10 +let v_CPA_PKE_CIPHERTEXT_SIZE_768_: usize = v_C1_SIZE_768_ +! v_C2_SIZE_768_ -let v_VECTOR_V_COMPRESSION_FACTOR_768_: usize = sz 4 +let v_IMPLICIT_REJECTION_HASH_INPUT_SIZE: usize = + Libcrux_ml_kem.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_768_ /// Validate a private key. /// Returns `true` if valid, and `false` otherwise. diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst index 9f607fddd..f400f5ccd 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Portable.Arithmetic.fst @@ -28,7 +28,7 @@ let get_n_least_significant_bits (n: u8) (value: u32) = #pop-options -#push-options "--z3rlimit 150" +#push-options "--z3rlimit 200" let barrett_reduce_element (value: i16) = let t:i32 = diff --git a/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst b/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst index 5c77472f2..cbe51c827 100644 --- a/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst +++ b/libcrux-ml-kem/proofs/fstar/spec/Spec.Utils.fst @@ -361,7 +361,9 @@ val lemma_mont_mul_red_i16_int (x y:i16): Lemma let result:i16 = mont_mul_red_i16 x y in is_i16b 3328 result /\ v result % 3329 == (v x * v y * 169) % 3329)) - + +#push-options "--z3rlimit 200" + let lemma_mont_mul_red_i16_int (x y:i16) = let vlow = x *. y in let prod = v x * v y in @@ -429,6 +431,7 @@ let lemma_mont_mul_red_i16_int (x y:i16) = ((prod) * 169) % 3329; } +#pop-options val lemma_mont_mul_red_i16 (x y:i16): Lemma (requires (is_i16b 1664 y \/ is_intb (3326 * pow2 15) (v x * v y))) From 83a72e794daa8e6943bc85af53bc2cfd7b592e9b Mon Sep 17 00:00:00 2001 From: karthikbhargavan Date: Tue, 3 Dec 2024 22:58:03 +0000 Subject: [PATCH 7/7] fstar --- .../fstar/extraction/Libcrux_ml_kem.Types.fst | 155 ------------------ .../extraction/Libcrux_ml_kem.Types.fsti | 141 ++++++++++++++-- .../Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst | 2 +- .../Libcrux_ml_kem.Vector.Avx2.Serialize.fst | 1 - 4 files changed, 127 insertions(+), 172 deletions(-) diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst index 3a598d127..5748d2562 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fst @@ -9,103 +9,10 @@ let impl_13__len (v_SIZE: usize) (_: Prims.unit) = v_SIZE let impl_20__len (v_SIZE: usize) (_: Prims.unit) = v_SIZE -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_1 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) = - { - f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); - f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemCiphertext v_SIZE) -> true); - f_from - = - fun (value: t_Array u8 v_SIZE) -> - { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value } - <: - t_MlKemCiphertext v_SIZE - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_2 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemCiphertext v_SIZE) = - { - f_from_pre = (fun (value: t_MlKemCiphertext v_SIZE) -> true); - f_from_post = (fun (value: t_MlKemCiphertext v_SIZE) (out: t_Array u8 v_SIZE) -> true); - f_from = fun (value: t_MlKemCiphertext v_SIZE) -> value.f_value - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_5 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) = - { - f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); - f_from_post - = - (fun (value: t_Array u8 v_SIZE) (result: t_MlKemCiphertext v_SIZE) -> result.f_value = value); - f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemCiphertext v_SIZE - } - let impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) = self.f_value -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_8 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) = - { - f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); - f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPrivateKey v_SIZE) -> true); - f_from - = - fun (value: t_Array u8 v_SIZE) -> - { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value } - <: - t_MlKemPrivateKey v_SIZE - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_9 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPrivateKey v_SIZE) = - { - f_from_pre = (fun (value: t_MlKemPrivateKey v_SIZE) -> true); - f_from_post = (fun (value: t_MlKemPrivateKey v_SIZE) (out: t_Array u8 v_SIZE) -> true); - f_from = fun (value: t_MlKemPrivateKey v_SIZE) -> value.f_value - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_12 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) = - { - f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); - f_from_post - = - (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPrivateKey v_SIZE) -> result.f_value = value); - f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPrivateKey v_SIZE - } - let impl_13__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) = self.f_value -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_15 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) = - { - f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); - f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPublicKey v_SIZE) -> true); - f_from - = - fun (value: t_Array u8 v_SIZE) -> - { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value } - <: - t_MlKemPublicKey v_SIZE - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_16 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPublicKey v_SIZE) = - { - f_from_pre = (fun (value: t_MlKemPublicKey v_SIZE) -> true); - f_from_post = (fun (value: t_MlKemPublicKey v_SIZE) (out: t_Array u8 v_SIZE) -> true); - f_from = fun (value: t_MlKemPublicKey v_SIZE) -> value.f_value - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_19 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) = - { - f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); - f_from_post - = - (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPublicKey v_SIZE) -> result.f_value = value); - f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPublicKey v_SIZE - } - let impl_20__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) = self.f_value let impl_21__from @@ -178,65 +85,3 @@ let unpack_private_key (v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE: usize) (private <: (t_Slice u8 & t_Slice u8 & t_Slice u8 & t_Slice u8) -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl (v_SIZE: usize) : Core.Default.t_Default (t_MlKemCiphertext v_SIZE) = - { - f_default_pre = (fun (_: Prims.unit) -> true); - f_default_post = (fun (_: Prims.unit) (out: t_MlKemCiphertext v_SIZE) -> true); - f_default - = - fun (_: Prims.unit) -> - { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemCiphertext v_SIZE - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_7 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPrivateKey v_SIZE) = - { - f_default_pre = (fun (_: Prims.unit) -> true); - f_default_post = (fun (_: Prims.unit) (out: t_MlKemPrivateKey v_SIZE) -> true); - f_default - = - fun (_: Prims.unit) -> - { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPrivateKey v_SIZE - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_14 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPublicKey v_SIZE) = - { - f_default_pre = (fun (_: Prims.unit) -> true); - f_default_post = (fun (_: Prims.unit) (out: t_MlKemPublicKey v_SIZE) -> true); - f_default - = - fun (_: Prims.unit) -> - { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPublicKey v_SIZE - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_4 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemCiphertext v_SIZE) (t_Slice u8) = - { - f_as_ref_pre = (fun (self: t_MlKemCiphertext v_SIZE) -> true); - f_as_ref_post - = - (fun (self___: t_MlKemCiphertext v_SIZE) (result: t_Slice u8) -> result = self___.f_value); - f_as_ref = fun (self: t_MlKemCiphertext v_SIZE) -> self.f_value <: t_Slice u8 - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_11 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPrivateKey v_SIZE) (t_Slice u8) = - { - f_as_ref_pre = (fun (self: t_MlKemPrivateKey v_SIZE) -> true); - f_as_ref_post - = - (fun (self___: t_MlKemPrivateKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value); - f_as_ref = fun (self: t_MlKemPrivateKey v_SIZE) -> self.f_value <: t_Slice u8 - } - -[@@ FStar.Tactics.Typeclasses.tcinstance] -let impl_18 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPublicKey v_SIZE) (t_Slice u8) = - { - f_as_ref_pre = (fun (self: t_MlKemPublicKey v_SIZE) -> true); - f_as_ref_post - = - (fun (self___: t_MlKemPublicKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value); - f_as_ref = fun (self: t_MlKemPublicKey v_SIZE) -> self.f_value <: t_Slice u8 - } diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti index 4f76c2ffc..1947307c5 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Types.fsti @@ -19,13 +19,35 @@ val impl_20__len: v_SIZE: usize -> Prims.unit type t_MlKemCiphertext (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_1 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) +let impl_1 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) = + { + f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); + f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemCiphertext v_SIZE) -> true); + f_from + = + fun (value: t_Array u8 v_SIZE) -> + { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value } + <: + t_MlKemCiphertext v_SIZE + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_2 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemCiphertext v_SIZE) +let impl_2 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemCiphertext v_SIZE) = + { + f_from_pre = (fun (value: t_MlKemCiphertext v_SIZE) -> true); + f_from_post = (fun (value: t_MlKemCiphertext v_SIZE) (out: t_Array u8 v_SIZE) -> true); + f_from = fun (value: t_MlKemCiphertext v_SIZE) -> value.f_value + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_5 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) +let impl_5 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemCiphertext v_SIZE) (t_Array u8 v_SIZE) = + { + f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); + f_from_post + = + (fun (value: t_Array u8 v_SIZE) (result: t_MlKemCiphertext v_SIZE) -> result.f_value = value); + f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemCiphertext v_SIZE + } /// A reference to the raw byte slice. val impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) @@ -40,13 +62,35 @@ val impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) type t_MlKemPrivateKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_8 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) +let impl_8 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) = + { + f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); + f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPrivateKey v_SIZE) -> true); + f_from + = + fun (value: t_Array u8 v_SIZE) -> + { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value } + <: + t_MlKemPrivateKey v_SIZE + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_9 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPrivateKey v_SIZE) +let impl_9 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPrivateKey v_SIZE) = + { + f_from_pre = (fun (value: t_MlKemPrivateKey v_SIZE) -> true); + f_from_post = (fun (value: t_MlKemPrivateKey v_SIZE) (out: t_Array u8 v_SIZE) -> true); + f_from = fun (value: t_MlKemPrivateKey v_SIZE) -> value.f_value + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_12 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) +let impl_12 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPrivateKey v_SIZE) (t_Array u8 v_SIZE) = + { + f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); + f_from_post + = + (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPrivateKey v_SIZE) -> result.f_value = value); + f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPrivateKey v_SIZE + } /// A reference to the raw byte slice. val impl_13__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) @@ -61,13 +105,35 @@ val impl_13__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) type t_MlKemPublicKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_15 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) +let impl_15 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) = + { + f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); + f_from_post = (fun (value: t_Array u8 v_SIZE) (out: t_MlKemPublicKey v_SIZE) -> true); + f_from + = + fun (value: t_Array u8 v_SIZE) -> + { f_value = Core.Clone.f_clone #(t_Array u8 v_SIZE) #FStar.Tactics.Typeclasses.solve value } + <: + t_MlKemPublicKey v_SIZE + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_16 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPublicKey v_SIZE) +let impl_16 (v_SIZE: usize) : Core.Convert.t_From (t_Array u8 v_SIZE) (t_MlKemPublicKey v_SIZE) = + { + f_from_pre = (fun (value: t_MlKemPublicKey v_SIZE) -> true); + f_from_post = (fun (value: t_MlKemPublicKey v_SIZE) (out: t_Array u8 v_SIZE) -> true); + f_from = fun (value: t_MlKemPublicKey v_SIZE) -> value.f_value + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_19 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) +let impl_19 (v_SIZE: usize) : Core.Convert.t_From (t_MlKemPublicKey v_SIZE) (t_Array u8 v_SIZE) = + { + f_from_pre = (fun (value: t_Array u8 v_SIZE) -> true); + f_from_post + = + (fun (value: t_Array u8 v_SIZE) (result: t_MlKemPublicKey v_SIZE) -> result.f_value = value); + f_from = fun (value: t_Array u8 v_SIZE) -> { f_value = value } <: t_MlKemPublicKey v_SIZE + } /// A reference to the raw byte slice. val impl_20__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) @@ -169,22 +235,67 @@ val unpack_private_key (v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE: usize) (private v Libcrux_ml_kem.Constants.v_H_DIGEST_SIZE)) [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl (v_SIZE: usize) : Core.Default.t_Default (t_MlKemCiphertext v_SIZE) +let impl (v_SIZE: usize) : Core.Default.t_Default (t_MlKemCiphertext v_SIZE) = + { + f_default_pre = (fun (_: Prims.unit) -> true); + f_default_post = (fun (_: Prims.unit) (out: t_MlKemCiphertext v_SIZE) -> true); + f_default + = + fun (_: Prims.unit) -> + { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemCiphertext v_SIZE + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_7 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPrivateKey v_SIZE) +let impl_7 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPrivateKey v_SIZE) = + { + f_default_pre = (fun (_: Prims.unit) -> true); + f_default_post = (fun (_: Prims.unit) (out: t_MlKemPrivateKey v_SIZE) -> true); + f_default + = + fun (_: Prims.unit) -> + { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPrivateKey v_SIZE + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_14 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPublicKey v_SIZE) +let impl_14 (v_SIZE: usize) : Core.Default.t_Default (t_MlKemPublicKey v_SIZE) = + { + f_default_pre = (fun (_: Prims.unit) -> true); + f_default_post = (fun (_: Prims.unit) (out: t_MlKemPublicKey v_SIZE) -> true); + f_default + = + fun (_: Prims.unit) -> + { f_value = Rust_primitives.Hax.repeat 0uy v_SIZE } <: t_MlKemPublicKey v_SIZE + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_4 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemCiphertext v_SIZE) (t_Slice u8) +let impl_4 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemCiphertext v_SIZE) (t_Slice u8) = + { + f_as_ref_pre = (fun (self: t_MlKemCiphertext v_SIZE) -> true); + f_as_ref_post + = + (fun (self___: t_MlKemCiphertext v_SIZE) (result: t_Slice u8) -> result = self___.f_value); + f_as_ref = fun (self: t_MlKemCiphertext v_SIZE) -> self.f_value <: t_Slice u8 + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_11 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPrivateKey v_SIZE) (t_Slice u8) +let impl_11 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPrivateKey v_SIZE) (t_Slice u8) = + { + f_as_ref_pre = (fun (self: t_MlKemPrivateKey v_SIZE) -> true); + f_as_ref_post + = + (fun (self___: t_MlKemPrivateKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value); + f_as_ref = fun (self: t_MlKemPrivateKey v_SIZE) -> self.f_value <: t_Slice u8 + } [@@ FStar.Tactics.Typeclasses.tcinstance] -val impl_18 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPublicKey v_SIZE) (t_Slice u8) +let impl_18 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPublicKey v_SIZE) (t_Slice u8) = + { + f_as_ref_pre = (fun (self: t_MlKemPublicKey v_SIZE) -> true); + f_as_ref_post + = + (fun (self___: t_MlKemPublicKey v_SIZE) (result: t_Slice u8) -> result = self___.f_value); + f_as_ref = fun (self: t_MlKemPublicKey v_SIZE) -> self.f_value <: t_Slice u8 + } [@@ FStar.Tactics.Typeclasses.tcinstance] let impl_3 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemCiphertext v_SIZE) (t_Slice u8) = diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst index 6f960e706..cba0ea581 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Arithmetic.fst @@ -184,7 +184,7 @@ let cond_subtract_3329_ (vector: Libcrux_intrinsics.Avx2_extract.t_Vec256) = #pop-options -#push-options "--z3rlimit 200" +#push-options "--z3rlimit 250" let montgomery_multiply_by_constant (vector: Libcrux_intrinsics.Avx2_extract.t_Vec256) diff --git a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst index b0c197583..00fb6832a 100644 --- a/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst +++ b/libcrux-ml-kem/proofs/fstar/extraction/Libcrux_ml_kem.Vector.Avx2.Serialize.fst @@ -38,7 +38,6 @@ let deserialize_1_ (bytes: t_Slice u8) = deserialize_1___deserialize_1_u8s (bytes.[ sz 0 ] <: u8) (bytes.[ sz 1 ] <: u8) [@@"opaque_to_smt"] - let deserialize_4___deserialize_4_i16s (b0 b1 b2 b3 b4 b5 b6 b7: i16) = let coefficients:Libcrux_intrinsics.Avx2_extract.t_Vec256 = Libcrux_intrinsics.Avx2_extract.mm256_set_epi16 b7 b7 b6 b6 b5 b5 b4 b4 b3 b3 b2 b2 b1 b1 b0 b0