diff --git a/libcrux-ml-kem/c/code_gen.txt b/libcrux-ml-kem/c/code_gen.txt index 8606206e0..54242b657 100644 --- a/libcrux-ml-kem/c/code_gen.txt +++ b/libcrux-ml-kem/c/code_gen.txt @@ -1,6 +1,6 @@ This code was generated with the following revisions: -Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f -Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c -Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 -F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc -Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 +Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 +Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 +Karamel: 8c3612018c25889288da6857771be3ad03b75bcd +F*: 5643e656b989aca7629723653a2570c7df6252b9 +Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 diff --git a/libcrux-ml-kem/c/internal/libcrux_core.h b/libcrux-ml-kem/c/internal/libcrux_core.h index fe0dc7d7d..fe89acd19 100644 --- a/libcrux-ml-kem/c/internal/libcrux_core.h +++ b/libcrux-ml-kem/c/internal/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_core_H diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h index 48345a968..466ef3ba0 100644 --- a/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h +++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_mlkem_avx2_H diff --git a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h index e89d87311..f108fb1a3 100644 --- a/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h +++ b/libcrux-ml-kem/c/internal/libcrux_mlkem_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_mlkem_portable_H diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h index 78fe0a95b..67b2d4675 100644 --- a/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/c/internal/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_sha3_avx2_H diff --git a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h index 92381f50f..342c481f4 100644 --- a/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h +++ b/libcrux-ml-kem/c/internal/libcrux_sha3_internal.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __internal_libcrux_sha3_internal_H @@ -273,13 +273,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)136U) { - consumed = (size_t)136U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)136U) { + consumed = (size_t)136U - self->buf_len; { size_t i = (size_t)0U; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -385,9 +380,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); { size_t i = (size_t)0U; @@ -734,13 +727,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)168U) { - consumed = (size_t)168U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)168U) { + consumed = (size_t)168U - self->buf_len; { size_t i = (size_t)0U; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -846,9 +834,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); { size_t i = (size_t)0U; @@ -1238,13 +1224,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( size_t blocks = out_len / (size_t)136U; size_t last = out_len - out_len % (size_t)136U; size_t mid; - if ((size_t)136U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)136U >= out_len) { mid = out_len; } else { mid = (size_t)136U; @@ -1258,11 +1238,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -1271,11 +1248,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)136U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); @@ -1370,13 +1343,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( size_t blocks = out_len / (size_t)168U; size_t last = out_len - out_len % (size_t)168U; size_t mid; - if ((size_t)168U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)168U >= out_len) { mid = out_len; } else { mid = (size_t)168U; @@ -1390,11 +1357,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -1403,11 +1367,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)168U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); diff --git a/libcrux-ml-kem/c/libcrux_core.c b/libcrux-ml-kem/c/libcrux_core.c index de354115a..e69d41843 100644 --- a/libcrux-ml-kem/c/libcrux_core.c +++ b/libcrux-ml-kem/c/libcrux_core.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_core.h" diff --git a/libcrux-ml-kem/c/libcrux_core.h b/libcrux-ml-kem/c/libcrux_core.h index 55c5c5d8e..9097eceda 100644 --- a/libcrux-ml-kem/c/libcrux_core.h +++ b/libcrux-ml-kem/c/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_core_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024.h b/libcrux-ml-kem/c/libcrux_mlkem1024.h index 37334a9b1..041b2ec09 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem1024_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c index 778d6fbf3..5fec937b0 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem1024_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h index 854751c45..96971f755 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem1024_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c index e463cb267..c63594eaa 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem1024_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h index 430c904d1..f951149be 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem1024_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem1024_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512.h b/libcrux-ml-kem/c/libcrux_mlkem512.h index fb7755a5a..0e850ae5d 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem512_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c index 3e9fbd0cc..7971b5c4f 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem512_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h index 79012290d..3c4030f73 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem512_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c index 8639c4603..b8f6fd756 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem512_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h index faea31c8a..7766250f2 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem512_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem512_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem512_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768.h b/libcrux-ml-kem/c/libcrux_mlkem768.h index 474b96082..f2c7db21a 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c index a7a0f7e7d..d30955e8a 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem768_avx2.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h index 35608499b..ea29365da 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c index 2d21b9d89..1cdebda61 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_mlkem768_portable.h" diff --git a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h index 514894426..6c512c865 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem768_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem768_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_portable_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c index 64e5d2462..7cd2d548f 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.c +++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_mlkem_avx2.h" @@ -141,16 +141,11 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) { __m256i field_modulus = mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); - __m256i v_minus_field_modulus = - mm256_sub_epi16(/* Compute v_i - Q and crate a mask from the sign bit of - each of these quantities. */ - vector, - field_modulus); + __m256i v_minus_field_modulus = mm256_sub_epi16(vector, field_modulus); __m256i sign_mask = mm256_srai_epi16((int32_t)15, v_minus_field_modulus, __m256i); - __m256i conditional_add_field_modulus = mm256_and_si256( - /* If v_i - Q < 0 then add back Q to (v_i - Q). */ sign_mask, - field_modulus); + __m256i conditional_add_field_modulus = + mm256_and_si256(sign_mask, field_modulus); return mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus); } @@ -455,7 +450,6 @@ libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(__m256i vec) { KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2, int16_t zeta3) { - /* Compute the first term of the product */ __m256i shuffle_with = mm256_set_epi8( (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, @@ -463,8 +457,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4, (int8_t)1, (int8_t)0); - __m256i lhs_shuffled = - mm256_shuffle_epi8(/* Prepare the left hand side */ lhs, shuffle_with); + __m256i lhs_shuffled = mm256_shuffle_epi8(lhs, shuffle_with); __m256i lhs_shuffled0 = mm256_permute4x64_epi64((int32_t)216, lhs_shuffled, __m256i); __m128i lhs_evens = mm256_castsi256_si128(lhs_shuffled0); @@ -472,8 +465,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i lhs_odds = mm256_extracti128_si256((int32_t)1, lhs_shuffled0, __m128i); __m256i lhs_odds0 = mm256_cvtepi16_epi32(lhs_odds); - __m256i rhs_shuffled = - mm256_shuffle_epi8(/* Prepare the right hand side */ rhs, shuffle_with); + __m256i rhs_shuffled = mm256_shuffle_epi8(rhs, shuffle_with); __m256i rhs_shuffled0 = mm256_permute4x64_epi64((int32_t)216, rhs_shuffled, __m256i); __m128i rhs_evens = mm256_castsi256_si128(rhs_shuffled0); @@ -481,8 +473,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i rhs_odds = mm256_extracti128_si256((int32_t)1, rhs_shuffled0, __m128i); __m256i rhs_odds0 = mm256_cvtepi16_epi32(rhs_odds); - __m256i left = - mm256_mullo_epi32(/* Start operating with them */ lhs_evens0, rhs_evens0); + __m256i left = mm256_mullo_epi32(lhs_evens0, rhs_evens0); __m256i right = mm256_mullo_epi32(lhs_odds0, rhs_odds0); __m256i right0 = libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s(right); @@ -495,7 +486,7 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s( products_left); __m256i rhs_adjacent_swapped = mm256_shuffle_epi8( - /* Compute the second term of the product */ rhs, + rhs, mm256_set_epi8((int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6, (int8_t)1, (int8_t)0, (int8_t)3, @@ -509,9 +500,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( products_right); __m256i products_right1 = mm256_slli_epi32((int32_t)16, products_right0, __m256i); - return mm256_blend_epi16((int32_t)170, - /* Combine them into one vector */ products_left0, - products_right1, __m256i); + return mm256_blend_epi16((int32_t)170, products_left0, products_right1, + __m256i); } /** @@ -527,44 +517,11 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09( KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1( __m256i vector, uint8_t ret[2U]) { - __m256i lsb_to_msb = mm256_slli_epi16( - (int32_t)15, - /* Suppose |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀ - 0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least - significant bit in each lane, move it to the most significant position - to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ - d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ - n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */ - vector, __m256i); - __m128i low_msbs = mm256_castsi256_si128( - /* Get the first 8 16-bit elements ... */ lsb_to_msb); - __m128i high_msbs = mm256_extracti128_si256( - (int32_t)1, - /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i); - __m128i msbs = - mm_packs_epi16(/* ... and then pack them into 8-bit values using signed - saturation. This function packs all the |low_msbs|, and - then the high ones. low_msbs = a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | - e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ - l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ We shifted by 15 above - to take advantage of the signed saturation performed by - mm_packs_epi16: - if the sign bit of the 16-bit element - being packed is 1, the corresponding 8-bit element in - |msbs| will be 0xFF. - if the sign bit of the 16-bit - element being packed is 0, the corresponding 8-bit - element in |msbs| will be 0. Thus, if, for example, a₀ = - 1, e₀ = 1, and p₀ = 1, and every other bit is 0, after - packing into 8 bit value, |msbs| will look like: 0xFF - 0x00 0x00 0x00 | 0xFF 0x00 0x00 0x00 | 0x00 0x00 0x00 - 0x00 | 0x00 0x00 0x00 0xFF */ - low_msbs, - high_msbs); - int32_t bits_packed = - mm_movemask_epi8(/* Now that every element is either 0xFF or 0x00, we just - extract the most significant bit from each element and - collate them into two bytes. */ - msbs); + __m256i lsb_to_msb = mm256_slli_epi16((int32_t)15, vector, __m256i); + __m128i low_msbs = mm256_castsi256_si128(lsb_to_msb); + __m128i high_msbs = mm256_extracti128_si256((int32_t)1, lsb_to_msb, __m128i); + __m128i msbs = mm_packs_epi16(low_msbs, high_msbs); + int32_t bits_packed = mm_movemask_epi8(msbs); uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)}; memcpy(ret, result, (size_t)2U * sizeof(uint8_t)); } @@ -582,39 +539,16 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s( int16_t a, int16_t b) { __m256i coefficients = - mm256_set_epi16(/* We need to take each bit from the 2 bytes of input and - put them into their own 16-bit lane. Ideally, we'd load - the two bytes into the vector, duplicate them, and - right-shift the 0th element by 0 bits, the first - element by 1 bit, the second by 2 bits and so on before - AND-ing with 0x1 to leave only the least signifinicant - bit. But since |_mm256_srlv_epi16| does not exist, so - we have to resort to a workaround. Rather than shifting - each element by a different amount, we'll multiply each - element by a value such that the bit we're interested - in becomes the most significant bit. The coefficients - are loaded as follows: */ - b, - b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); - __m256i coefficients_in_msb = - mm256_mullo_epi16(/* And this vector, when multiplied with the previous - one, ensures that the bit we'd like to keep in each - lane becomes the most significant bit upon - multiplication. */ - coefficients, - mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, - (int16_t)1 << 10U, (int16_t)1 << 11U, - (int16_t)1 << 12U, (int16_t)1 << 13U, - (int16_t)1 << 14U, (int16_t)-32768, - (int16_t)1 << 8U, (int16_t)1 << 9U, - (int16_t)1 << 10U, (int16_t)1 << 11U, - (int16_t)1 << 12U, (int16_t)1 << 13U, - (int16_t)1 << 14U, (int16_t)-32768)); - return mm256_srli_epi16( - (int32_t)15, - /* Now that they're all in the most significant bit position, shift them - down to the least significant bit. */ - coefficients_in_msb, __m256i); + mm256_set_epi16(b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); + __m256i coefficients_in_msb = mm256_mullo_epi16( + coefficients, + mm256_set_epi16((int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U, + (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U, + (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U, + (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U, + (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U, + (int16_t)-32768)); + return mm256_srli_epi16((int32_t)15, coefficients_in_msb, __m256i); } KRML_MUSTINLINE __m256i @@ -627,23 +561,7 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( - Eurydice_slice_index( - bytes, - /* We need to take each bit from the 2 bytes of input and put them - into their own 16-bit lane. Ideally, we'd load the two bytes into - the vector, duplicate them, and right-shift the 0th element by 0 - bits, the first element by 1 bit, the second by 2 bits and so on - before AND-ing with 0x1 to leave only the least signifinicant bit. - But since |_mm256_srlv_epi16| does not exist, so we have to resort - to a workaround. Rather than shifting each element by a different - amount, we'll multiply each element by a value such that the bit - we're interested in becomes the most significant bit. The - coefficients are loaded as follows: And this vector, when - multiplied with the previous one, ensures that the bit we'd like to - keep in each lane becomes the most significant bit upon - multiplication. Now that they're all in the most significant bit - position, shift them down to the least significant bit. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *)); } @@ -676,47 +594,23 @@ KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4( __m256i vector, uint8_t ret[8U]) { uint8_t serialized[16U] = {0U}; __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 4U, - /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D | - 0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be - laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA - 0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */ - vector); - __m256i adjacent_8_combined = - mm256_shuffle_epi8(/* Recall that |adjacent_2_combined| goes as follows: - 0x00_00_00_BA 0x00_00_00_DC | 0x00_00_00_FE - 0x00_00_00_HG | ... Out of this, we only need the - first byte, the 4th byte, the 8th byte and so on - from the bottom and the top 128 bits. */ - adjacent_2_combined, - mm256_set_epi8( - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0)); - __m256i combined = - mm256_permutevar8x32_epi32(/* |adjacent_8_combined| looks like this: 0: - 0xHG_FE_DC_BA 1: 0x00_00_00_00 | 2: - 0x00_00_00_00 3: 0x00_00_00_00 | 4: - 0xPO_NM_LK_JI .... We put the element at 4 - after the element at 0 ... */ - adjacent_8_combined, - mm256_set_epi32((int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)0, - (int32_t)4, (int32_t)0)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector); + __m256i adjacent_8_combined = mm256_shuffle_epi8( + adjacent_2_combined, + mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, + (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, + (int8_t)4, (int8_t)0)); + __m256i combined = mm256_permutevar8x32_epi32( + adjacent_8_combined, + mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0)); __m128i combined0 = mm256_castsi256_si128(combined); mm_storeu_bytes_si128( - Eurydice_array_to_slice( - (size_t)16U, - /* ... so that we can read them out in one go. */ serialized, - uint8_t), - combined0); + Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0); uint8_t ret0[8U]; core_result_Result_15 dst; Eurydice_slice_to_array2( @@ -740,23 +634,8 @@ KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5, int16_t b6, int16_t b7) { - __m256i coefficients = - mm256_set_epi16(/* Every 4 bits from each byte of input should be put into - its own 16-bit lane. Since |_mm256_srlv_epi16| does not - exist, we have to resort to a workaround. Rather than - shifting each element by a different amount, we'll - multiply each element by a value such that the bits - we're interested in become the most significant bits - (of an 8-bit value). In this lane, the 4 bits we need - to put are already the most significant bits of - |bytes[7]| (that is, b7). */ - b7, - /* In this lane, the 4 bits we need to put are the least - significant bits, so we need to shift the 4 - least-significant bits of |b7| to the most significant - bits (of an 8-bit value). */ - b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, - b0); + __m256i coefficients = mm256_set_epi16(b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, + b2, b2, b1, b1, b0, b0); __m256i coefficients_in_msb = mm256_mullo_epi16( coefficients, mm256_set_epi16((int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, @@ -765,12 +644,9 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U)); - __m256i coefficients_in_lsb = mm256_srli_epi16( - (int32_t)4, - /* Once the 4-bit coefficients are in the most significant positions (of - an 8-bit value), shift them all down by 4. */ - coefficients_in_msb, __m256i); - return mm256_and_si256(/* Zero the remaining bits. */ coefficients_in_lsb, + __m256i coefficients_in_lsb = + mm256_srli_epi16((int32_t)4, coefficients_in_msb, __m256i); + return mm256_and_si256(coefficients_in_lsb, mm256_set1_epi16(((int16_t)1 << 4U) - (int16_t)1)); } @@ -786,23 +662,7 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( - Eurydice_slice_index( - bytes, - /* Every 4 bits from each byte of input should be put into its own - 16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to - resort to a workaround. Rather than shifting each element by a - different amount, we'll multiply each element by a value such that - the bits we're interested in become the most significant bits (of - an 8-bit value). In this lane, the 4 bits we need to put are - already the most significant bits of |bytes[7]| (that is, b7). In - this lane, the 4 bits we need to put are the least significant - bits, so we need to shift the 4 least-significant bits of |b7| to - the most significant bits (of an 8-bit value). These constants are - chosen to shift the bits of the values that we loaded into - |coefficients|. Once the 4-bit coefficients are in the most - significant positions (of an 8-bit value), shift them all down - by 4. Zero the remaining bits. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *), @@ -824,78 +684,32 @@ libcrux_ml_kem_vector_avx2_deserialize_4_09(Eurydice_slice bytes) { KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5( __m256i vector, uint8_t ret[10U]) { uint8_t serialized[32U] = {0U}; - __m256i adjacent_2_combined = - mm256_madd_epi16(/* If |vector| is laid out as follows (superscript number - indicates the corresponding bit is duplicated that - many times): 0¹¹a₄a₃a₂a₁a₀ 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀ - 0¹¹d₄d₃d₂d₁d₀ | ↩ 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀ - 0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | ↩ |adjacent_2_combined| - will be laid out as a series of 32-bit integers, as - follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... */ - vector, - mm256_set_epi16( - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, - (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, - (int16_t)1, (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, - (int16_t)1)); - __m256i adjacent_4_combined = - mm256_sllv_epi32(/* Recall that |adjacent_2_combined| is laid out as - follows: 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ .... This shift results - in: b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | - ↩ f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... */ - adjacent_2_combined, - mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, - (int32_t)22, (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22)); - __m256i adjacent_4_combined0 = mm256_srli_epi64( - (int32_t)22, - /* |adjacent_4_combined|, when viewed as 64-bit lanes, is: - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift - down by 22 bits to remove the least significant 0 bits that aren't part - of the bits we need. */ - adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = mm256_shuffle_epi32( - (int32_t)8, - /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks - like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³² - 2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to - read out the bytes in one go, we need to shifts the bits in position 2 - to position 1 in each 128-bit lane. */ - adjacent_4_combined0, __m256i); - __m256i adjacent_8_combined0 = - mm256_sllv_epi32(/* |adjacent_8_combined|, when viewed as a set of 32-bit - values, now looks like: - 0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 0³² 0³² | - ↩ Once again, we line these bits up by shifting the up - values at indices 0 and 5 by 12, viewing the resulting - register as a set of 64-bit values, and then shifting - down the 64-bit values by 12 bits. */ - adjacent_8_combined, - mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, - (int32_t)12, (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)12)); + __m256i adjacent_2_combined = mm256_madd_epi16( + vector, mm256_set_epi16( + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1)); + __m256i adjacent_4_combined = mm256_sllv_epi32( + adjacent_2_combined, + mm256_set_epi32((int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, + (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22)); + __m256i adjacent_4_combined0 = + mm256_srli_epi64((int32_t)22, adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = + mm256_shuffle_epi32((int32_t)8, adjacent_4_combined0, __m256i); + __m256i adjacent_8_combined0 = mm256_sllv_epi32( + adjacent_8_combined, + mm256_set_epi32((int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12)); __m256i adjacent_8_combined1 = mm256_srli_epi64((int32_t)12, adjacent_8_combined0, __m256i); - __m128i lower_8 = - mm256_castsi256_si128(/* We now have 40 bits starting at position 0 in the - lower 128-bit lane, ... */ - adjacent_8_combined1); + __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined1); mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t), lower_8); - __m128i upper_8 = mm256_extracti128_si256( - (int32_t)1, - /* ... and the second 40 bits at position 0 in the upper 128-bit lane */ - adjacent_8_combined1, __m128i); + __m128i upper_8 = + mm256_extracti128_si256((int32_t)1, adjacent_8_combined1, __m128i); mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t), upper_8); @@ -989,67 +803,25 @@ core_core_arch_x86___m128i_x2 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( __m256i vector) { __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 10U, - /* If |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ... - |adjacent_2_combined| will be laid out as a series of 32-bit - integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */ - vector); - __m256i adjacent_4_combined = - mm256_sllv_epi32(/* Shifting up the values at the even indices by 12, we - get: b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ ... */ - adjacent_2_combined, - mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, - (int32_t)12, (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector); + __m256i adjacent_4_combined = mm256_sllv_epi32( + adjacent_2_combined, + mm256_set_epi32((int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, + (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12)); __m256i adjacent_4_combined0 = - mm256_srli_epi64((int32_t)12, - /* Viewing this as a set of 64-bit integers we get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ ... Shifting down by 12 gives us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ ... */ - adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = - mm256_shuffle_epi8(/* |adjacent_4_combined|, when the bottom and top 128 - bit-lanes are grouped into bytes, looks like: - 0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ In - each 128-bit lane, we want to put bytes 8, 9, 10, - 11, 12 after bytes 0, 1, 2, 3 to allow for - sequential reading. */ - adjacent_4_combined0, - mm256_set_epi8( - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, - (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0)); - __m128i lower_8 = - mm256_castsi256_si128(/* We now have 64 bits starting at position 0 in the - lower 128-bit lane, ... */ - adjacent_8_combined); - __m128i upper_8 = mm256_extracti128_si256( - (int32_t)1, - /* and 64 bits starting at position 0 in the upper 128-bit lane. */ - adjacent_8_combined, __m128i); + mm256_srli_epi64((int32_t)12, adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = mm256_shuffle_epi8( + adjacent_4_combined0, + mm256_set_epi8((int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, + (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, + (int8_t)0, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, + (int8_t)9, (int8_t)8, (int8_t)4, (int8_t)3, (int8_t)2, + (int8_t)1, (int8_t)0)); + __m128i lower_8 = mm256_castsi256_si128(adjacent_8_combined); + __m128i upper_8 = + mm256_extracti128_si256((int32_t)1, adjacent_8_combined, __m128i); return ( CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8}); } @@ -1057,167 +829,8 @@ libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10( __m256i vector, uint8_t ret[20U]) { core_core_arch_x86___m128i_x2 uu____0 = - libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If - |vector| - is - laid - out - as - follows - (superscript - number - indicates - the - corresponding - bit - is - duplicated - that - many - times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ - | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ - | ↩ - ... - |adjacent_2_combined| - will - be - laid - out - as a - series - of - 32-bit - integers, - as - follows: - 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - .... - Shifting - up - the - values - at - the - even - indices - by - 12, - we - get: - b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - ... - Viewing - this - as a - set - of - 64-bit - integers - we - get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ - ... - Shifting - down - by - 12 - gives - us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ - ... - |adjacent_4_combined|, - when - the - bottom - and - top - 128 - bit-lanes - are - grouped - into - bytes, - looks - like: - 0₇0₆0₅B₄B₃B₂B₁B₀ - | ↩ - 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ - | ↩ - In - each - 128-bit - lane, - we - want - to - put - bytes - 8, - 9, - 10, - 11, - 12 - after - bytes - 0, - 1, - 2, 3 - to - allow - for - sequential - reading. - We - now - have - 64 - bits - starting - at - position - 0 in - the - lower - 128-bit - lane, - ... - and - 64 - bits - starting - at - position - 0 in - the - upper - 128-bit - lane. - */ - vector); + libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( + vector); __m128i lower_8 = uu____0.fst; __m128i upper_8 = uu____0.snd; uint8_t serialized[32U] = {0U}; @@ -1267,16 +880,14 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( (int16_t)1 << 0U, (int16_t)1 << 2U, (int16_t)1 << 4U, (int16_t)1 << 6U)); __m256i coefficients1 = mm256_srli_epi16((int32_t)6, coefficients0, __m256i); - return mm256_and_si256( - /* Here I can prove this `and` is not useful */ coefficients1, - mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1)); + return mm256_and_si256(coefficients1, + mm256_set1_epi16(((int16_t)1 << 10U) - (int16_t)1)); } KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) { - Eurydice_slice lower_coefficients = Eurydice_slice_subslice2( - /* Here I can prove this `and` is not useful */ bytes, (size_t)0U, - (size_t)16U, uint8_t); + Eurydice_slice lower_coefficients = + Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t); Eurydice_slice upper_coefficients = Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t); return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( @@ -1442,64 +1053,26 @@ KRML_MUSTINLINE size_t libcrux_ml_kem_vector_avx2_sampling_rejection_sample( __m256i field_modulus = mm256_set1_epi16(LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i potential_coefficients = - libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can - be interpreted as a - sequence of - serialized 12-bit - (i.e. uncompressed) - coefficients. Not - all coefficients - may be less than - FIELD_MODULUS - though. */ - input); + libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input); __m256i compare_with_field_modulus = - mm256_cmpgt_epi16(/* Suppose we view |potential_coefficients| as follows - (grouping 64-bit elements): A B C D | E F G H | .... - and A < 3329, D < 3329 and H < 3329, - |compare_with_field_modulus| will look like: 0xFF 0 0 - 0xFF | 0 0 0 0xFF | ... */ - field_modulus, - potential_coefficients); + mm256_cmpgt_epi16(field_modulus, potential_coefficients); uint8_t good[2U]; - libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each - lane is either 0 or 1, we - only need one bit from - each lane in the register - to tell us what - coefficients to keep and - what to throw-away. - Combine all the bits - (there are 16) into two - bytes. */ - compare_with_field_modulus, + libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus, good); uint8_t lower_shuffles[16U]; memcpy(lower_shuffles, - /* Each bit (and its corresponding position) represents an element we - want to sample. We'd like all such elements to be next to each other - starting at index 0, so that they can be read from the vector - easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level - shuffling indices needed to make this happen. For e.g. if good[0] = - 0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit - lane to the first. To do this, we need the byte-level shuffle - indices to be 2 3 X X X X ... */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[0U]], (size_t)16U * sizeof(uint8_t)); - __m128i lower_shuffles0 = mm_loadu_si128(Eurydice_array_to_slice( - (size_t)16U, - /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles, - uint8_t)); + __m128i lower_shuffles0 = mm_loadu_si128( + Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t)); __m128i lower_coefficients = mm256_castsi256_si128(potential_coefficients); __m128i lower_coefficients0 = mm_shuffle_epi8(lower_coefficients, lower_shuffles0); - mm_storeu_si128(/* ... then write them out ... */ output, - lower_coefficients0); + mm_storeu_si128(output, lower_coefficients0); size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]); uint8_t upper_shuffles[16U]; memcpy(upper_shuffles, - /* Do the same for |goood[1]| */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[1U]], (size_t)16U * sizeof(uint8_t)); @@ -1852,13 +1425,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; - H_a9_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)3U, - (size_t)768U * (size_t)3U + (size_t)32U, - uint8_t), + H_a9_e0(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)3U, + (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)3U + (size_t)32U, @@ -2345,10 +1914,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c1( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -2407,7 +1972,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c1( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -2622,12 +2187,7 @@ with const generics static KRML_MUSTINLINE void ntt_at_layer_7_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09( re->coefficients[j + step], (int16_t)-1600); @@ -2679,13 +2239,7 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2771,11 +2325,7 @@ with const generics static KRML_MUSTINLINE void poly_barrett_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]); @@ -2790,9 +2340,7 @@ with const generics */ static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { - ntt_at_layer_7_61(/* Due to the small coefficient bound, we can skip the first - round of Montgomery reductions. */ - re); + ntt_at_layer_7_61(re); size_t zeta_i = (size_t)1U; ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U); ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)5U); @@ -2925,14 +2473,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -2966,17 +2509,10 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; - __m256i coefficient_normal_form = to_standard_domain_61( - self->coefficients[/* The coefficients are of the form aR^{-1} mod q, - which means calling to_montgomery_domain() on them - should return a mod q. */ - j]); + __m256i coefficient_normal_form = + to_standard_domain_61(self->coefficients[j]); self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09( libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &error->coefficients[j])); @@ -3006,8 +2542,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_ab( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -3083,10 +2617,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221( IndCpaPrivateKeyUnpacked_63 *private_key, IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_be(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -3116,8 +2647,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_221( sample_vector_cbd_then_ntt_out_b41(copy_of_prf_input, domain_separator) .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_ab(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_ab(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -3142,13 +2673,11 @@ serialize_unpacked_secret_key_8c(IndCpaPublicKeyUnpacked_63 *public_key, IndCpaPrivateKeyUnpacked_63 *private_key) { uint8_t public_key_serialized[1184U]; serialize_public_key_ed( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1152U]; - serialize_secret_key_ed( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_ed(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -3335,15 +2864,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa1( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); deserialize_ring_elements_reduced_ab(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -3520,13 +3045,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3557,10 +3076,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -3586,11 +3102,7 @@ static KRML_MUSTINLINE void add_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( @@ -3703,26 +3215,8 @@ add_message_error_reduce_ef_61( __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( result.coefficients[i0], (int16_t)1441); - __m256i tmp = libcrux_ml_kem_vector_avx2_add_09( - self->coefficients - [/* FIXME: Eurydice crashes with: Warning 11: in top-level - declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing function cannot be - translated into C*: let mutable ret(Mark.Present,(Mark.AtMost - 2), ): int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the following code: - ```rust result.coefficients[i] = - Vector::barrett_reduce(Vector::add( coefficient_normal_form, - &Vector::add(self.coefficients[i], &message.coefficients[i]), - )); ``` */ - i0], - &message->coefficients[i0]); + __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0], + &message->coefficients[i0]); __m256i tmp0 = libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp); result.coefficients[i0] = @@ -3770,18 +3264,8 @@ compress_ciphertext_coefficient_ef(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)10, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3789,18 +3273,12 @@ compress_ciphertext_coefficient_ef(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)10, coefficients_high0, __m256i); @@ -3813,20 +3291,8 @@ compress_ciphertext_coefficient_ef(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -3880,18 +3346,8 @@ compress_ciphertext_coefficient_c4(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)11, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -3899,18 +3355,12 @@ compress_ciphertext_coefficient_c4(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)11, coefficients_high0, __m256i); @@ -3923,20 +3373,8 @@ compress_ciphertext_coefficient_c4(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4014,18 +3452,8 @@ compress_ciphertext_coefficient_d1(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)4, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -4033,18 +3461,12 @@ compress_ciphertext_coefficient_d1(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)4, coefficients_high0, __m256i); @@ -4057,20 +3479,8 @@ compress_ciphertext_coefficient_d1(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4096,11 +3506,7 @@ static KRML_MUSTINLINE void compress_then_serialize_4_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficient = compress_09_d1(to_unsigned_field_modulus_61(re.coefficients[i0])); @@ -4127,18 +3533,8 @@ compress_ciphertext_coefficient_f4(__m256i vector) { __m256i compression_factor = mm256_set1_epi32((int32_t)10321340); __m256i coefficient_bits_mask = mm256_set1_epi32(((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1); - __m128i coefficients_low = - mm256_castsi256_si128(/* ---- Compress the first 8 coefficients ---- Take - the bottom 128 bits, i.e. the first 8 16-bit - coefficients */ - vector); - __m256i coefficients_low0 = - mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] = A - coefficients_low[16:31] = B - coefficients_low[32:63] = C and so on ... after - this step: coefficients_low[0:31] = A - coefficients_low[32:63] = B and so on ... */ - coefficients_low); + __m128i coefficients_low = mm256_castsi256_si128(vector); + __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = mm256_slli_epi32((int32_t)5, coefficients_low0, __m256i); __m256i compressed_low0 = @@ -4146,18 +3542,12 @@ compress_ciphertext_coefficient_f4(__m256i vector) { __m256i compressed_low1 = libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); - __m256i compressed_low2 = mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + __m256i compressed_low2 = + mm256_srli_epi32((int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = mm256_and_si256(compressed_low2, coefficient_bits_mask); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = mm256_slli_epi32((int32_t)5, coefficients_high0, __m256i); @@ -4170,20 +3560,8 @@ compress_ciphertext_coefficient_f4(__m256i vector) { mm256_srli_epi32((int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = mm256_and_si256(compressed_high2, coefficient_bits_mask); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - compressed_low3, - compressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(compressed_low3, compressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4209,11 +3587,7 @@ static KRML_MUSTINLINE void compress_then_serialize_5_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficients = compress_09_f4(to_unsigned_representative_61(re.coefficients[i0])); @@ -4301,11 +3675,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -4317,7 +3687,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_23 uu____3 = sample_ring_element_cbd_b41(copy_of_prf_input, domain_separator0); @@ -4326,7 +3695,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_a9_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -4334,11 +3703,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U]; - compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_ab(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -4347,14 +3714,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_741( &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_8c( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_ed( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -4549,8 +3914,7 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)10); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4558,16 +3922,12 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)10, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4575,27 +3935,12 @@ decompress_ciphertext_coefficient_ef(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)10, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4650,8 +3995,7 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)11); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4659,16 +4003,12 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)11, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4676,27 +4016,12 @@ decompress_ciphertext_coefficient_c4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)11, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4819,8 +4144,7 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)4); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4828,16 +4152,12 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)4, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4845,27 +4165,12 @@ decompress_ciphertext_coefficient_d1(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)4, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -4915,8 +4220,7 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_set1_epi32((int32_t)LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i two_pow_coefficient_bits = mm256_set1_epi32((int32_t)1 << (uint32_t)(int32_t)5); - __m128i coefficients_low = mm256_castsi256_si128( - /* ---- Compress the first 8 coefficients ---- */ vector); + __m128i coefficients_low = mm256_castsi256_si128(vector); __m256i coefficients_low0 = mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = mm256_mullo_epi32(coefficients_low0, field_modulus); @@ -4924,16 +4228,12 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_low, __m256i); __m256i decompressed_low1 = mm256_add_epi32(decompressed_low0, two_pow_coefficient_bits); - __m256i decompressed_low2 = mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + __m256i decompressed_low2 = + mm256_srli_epi32((int32_t)5, decompressed_low1, __m256i); __m256i decompressed_low3 = mm256_srli_epi32((int32_t)1, decompressed_low2, __m256i); - __m128i coefficients_high = mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + __m128i coefficients_high = + mm256_extracti128_si256((int32_t)1, vector, __m128i); __m256i coefficients_high0 = mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = mm256_mullo_epi32(coefficients_high0, field_modulus); @@ -4941,27 +4241,12 @@ decompress_ciphertext_coefficient_f4(__m256i vector) { mm256_slli_epi32((int32_t)1, decompressed_high, __m256i); __m256i decompressed_high1 = mm256_add_epi32(decompressed_high0, two_pow_coefficient_bits); - __m256i decompressed_high2 = mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + __m256i decompressed_high2 = + mm256_srli_epi32((int32_t)5, decompressed_high1, __m256i); __m256i decompressed_high3 = mm256_srli_epi32((int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - mm256_packs_epi32(/* Combining them, and grouping each set of 64-bits, - this function results in: 0: low low low low | 1: - high high high high | 2: low low low low | 3: high - high high high where each |low| and |high| is a - 16-bit element */ - decompressed_low3, - decompressed_high3); - return mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = mm256_packs_epi32(decompressed_low3, decompressed_high3); + return mm256_permute4x64_epi64((int32_t)216, compressed, __m256i); } /** @@ -5128,14 +4413,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_2f( IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U]; - deserialize_then_decompress_u_ed( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_ed(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_ed( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -5156,8 +4438,7 @@ with const generics static KRML_MUSTINLINE void decrypt_2f(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U]; - deserialize_secret_key_ab(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_ab(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U]; memcpy( @@ -5477,13 +4758,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_5e( libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) { uint8_t t[32U]; - H_a9_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)4U, - (size_t)768U * (size_t)4U + (size_t)32U, - uint8_t), + H_a9_ac(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)4U, + (size_t)768U * (size_t)4U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)4U + (size_t)32U, @@ -5960,10 +5237,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c( memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_78( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -6022,7 +5295,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -6173,14 +5446,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_42( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -6211,8 +5479,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_42( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -6288,10 +5554,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22( IndCpaPrivateKeyUnpacked_39 *private_key, IndCpaPublicKeyUnpacked_39 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_6a(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_6a(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6321,8 +5584,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_22( sample_vector_cbd_then_ntt_out_b4(copy_of_prf_input, domain_separator) .fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_42(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_42(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6347,13 +5610,11 @@ serialize_unpacked_secret_key_c9(IndCpaPublicKeyUnpacked_39 *public_key, IndCpaPrivateKeyUnpacked_39 *private_key) { uint8_t public_key_serialized[1568U]; serialize_public_key_1e( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1536U]; - serialize_secret_key_78( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_78(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1536U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -6540,15 +5801,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa0( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_39 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t); deserialize_ring_elements_reduced_42(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1536U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[4U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -6640,10 +5897,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_42( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -6873,11 +6127,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( IndCpaPublicKeyUnpacked_39 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1568U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -6889,7 +6139,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_dd uu____3 = sample_ring_element_cbd_b4(copy_of_prf_input, domain_separator0); @@ -6898,7 +6147,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( error_1, uu____3.fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_a9_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -6906,11 +6155,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[4U]; - compute_vector_u_42(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_42(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -6919,14 +6166,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_74( &message_as_ring_element); uint8_t ciphertext[1568U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[4U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_c9( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)1408U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_1e( uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, @@ -7240,14 +6485,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_37( IndCpaPrivateKeyUnpacked_39 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[4U]; - deserialize_then_decompress_u_1e( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_1e(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_78( - Eurydice_array_to_subslice_from( - (size_t)1568U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)1408U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, + (size_t)1408U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_42(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -7268,8 +6510,7 @@ with const generics static KRML_MUSTINLINE void decrypt_37(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[4U]; - deserialize_secret_key_42(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_42(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[4U]; memcpy( @@ -7577,13 +6818,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_4d( libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) { uint8_t t[32U]; - H_a9_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)2U, - (size_t)768U * (size_t)2U + (size_t)32U, - uint8_t), + H_a9_fd(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)2U, + (size_t)768U * (size_t)2U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)2U + (size_t)32U, @@ -8034,10 +7271,6 @@ static KRML_MUSTINLINE void sample_from_xof_6c0( memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_29( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -8096,7 +7329,7 @@ static KRML_MUSTINLINE void sample_matrix_A_6c0( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -8252,14 +7485,9 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_89( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -8290,8 +7518,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_89( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = ZERO_ef_61(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -8367,10 +7593,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220( IndCpaPrivateKeyUnpacked_94 *private_key, IndCpaPublicKeyUnpacked_94 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_f8(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_f8(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -8400,8 +7623,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_220( sample_vector_cbd_then_ntt_out_b40(copy_of_prf_input, domain_separator) .fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); - compute_As_plus_e_89(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_89(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -8426,13 +7649,11 @@ serialize_unpacked_secret_key_2d(IndCpaPublicKeyUnpacked_94 *public_key, IndCpaPrivateKeyUnpacked_94 *private_key) { uint8_t public_key_serialized[800U]; serialize_public_key_ba( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[768U]; - serialize_secret_key_29( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_29(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[768U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -8619,15 +7840,11 @@ libcrux_ml_kem_hash_functions_avx2_Simd256Hash with const generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_fa( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_94 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t); deserialize_ring_elements_reduced_89(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)768U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[2U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -8765,10 +7982,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_89( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_61(&zeta_i, re); invert_ntt_at_layer_2_61(&zeta_i, re); invert_ntt_at_layer_3_61(&zeta_i, re); @@ -8960,11 +8174,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( IndCpaPublicKeyUnpacked_94 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[768U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -8976,7 +8186,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_40 uu____3 = sample_ring_element_cbd_b40(copy_of_prf_input, domain_separator0); @@ -8985,7 +8194,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( error_1, uu____3.fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_a9_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -8993,11 +8202,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[2U]; - compute_vector_u_89(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_89(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = deserialize_then_decompress_message_61(copy_of_message); @@ -9006,14 +8213,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_740( &message_as_ring_element); uint8_t ciphertext[768U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[2U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); compress_then_serialize_u_2d( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; compress_then_serialize_ring_element_v_ba( uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext, @@ -9297,14 +8502,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_4b( IndCpaPrivateKeyUnpacked_94 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[2U]; - deserialize_then_decompress_u_ba( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_ba(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = deserialize_then_decompress_ring_element_v_29( - Eurydice_array_to_subslice_from( - (size_t)768U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)640U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)768U, ciphertext, + (size_t)640U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = compute_message_89(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -9325,8 +8527,7 @@ with const generics static KRML_MUSTINLINE void decrypt_4b(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[2U]; - deserialize_secret_key_89(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_89(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[2U]; memcpy( diff --git a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h index addfdaf30..95dad8cf8 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_avx2.h +++ b/libcrux-ml-kem/c/libcrux_mlkem_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.c b/libcrux-ml-kem/c/libcrux_mlkem_portable.c index fddae347c..1d3a317a8 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_portable.c +++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_mlkem_portable.h" @@ -1152,28 +1152,11 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( */ uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient( uint16_t fe) { - int16_t shifted = - (int16_t)1664 - - (int16_t) /* The approach used here is inspired by: - https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150 - If 833 <= fe <= 2496, then -832 <= shifted <= 831 */ - fe; - int16_t mask = - /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) = - -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive - <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so - if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */ - shifted - - >> 15U; + int16_t shifted = (int16_t)1664 - (int16_t)fe; + int16_t mask = shifted >> 15U; int16_t shifted_to_positive = mask ^ shifted; int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832; - int16_t r0 = - /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the - most significant bit of shifted_positive_in_range will be 1. */ - shifted_positive_in_range - - >> 15U; + int16_t r0 = shifted_positive_in_range >> 15U; int16_t r1 = r0 & (int16_t)1; return (uint8_t)r1; } @@ -1209,16 +1192,7 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits( int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient( uint8_t coefficient_bits, uint16_t fe) { - uint64_t compressed = - (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits - == 5 || coefficient_bits == 10 || coefficient_bits == 11 ); - hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to - be constant time due to: - https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ - */ - fe - - << (uint32_t)coefficient_bits; + uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits; compressed = compressed + 1664ULL; compressed = compressed * 10321340ULL; compressed = compressed >> 35U; @@ -2738,13 +2712,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_60( libcrux_ml_kem_types_MlKemPrivateKey_83 *private_key) { uint8_t t[32U]; - H_f1_ac(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)4U, - (size_t)768U * (size_t)4U + (size_t)32U, - uint8_t), + H_f1_ac(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)4U, + (size_t)768U * (size_t)4U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)4U + (size_t)32U, @@ -3234,10 +3204,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b( memcpy(copy_of_randomness0, randomness0, (size_t)4U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_ff( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -3297,7 +3263,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -3495,12 +3461,7 @@ with const generics static KRML_MUSTINLINE void ntt_at_layer_7_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector t = libcrux_ml_kem_vector_portable_multiply_by_constant_0d( @@ -3562,13 +3523,7 @@ static KRML_MUSTINLINE void ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3660,11 +3615,7 @@ with const generics static KRML_MUSTINLINE void poly_barrett_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( @@ -3681,9 +3632,7 @@ with const generics */ static KRML_MUSTINLINE void ntt_binomially_sampled_ring_element_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { - ntt_at_layer_7_8c(/* Due to the small coefficient bound, we can skip the first - round of Montgomery reductions. */ - re); + ntt_at_layer_7_8c(re); size_t zeta_i = (size_t)1U; ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U); ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)5U); @@ -3822,11 +3771,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_d0( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -3866,18 +3811,10 @@ static KRML_MUSTINLINE void add_standard_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector - coefficient_normal_form = to_standard_domain_8c( - self->coefficients[/* The coefficients are of the form aR^{-1} mod - q, which means calling to_montgomery_domain() - on them should return a mod q. */ - j]); + coefficient_normal_form = to_standard_domain_8c(self->coefficients[j]); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, @@ -3909,8 +3846,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_d0( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -3986,10 +3921,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c( IndCpaPrivateKeyUnpacked_af *private_key, IndCpaPublicKeyUnpacked_af *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_03(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_03(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -4019,8 +3951,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c( sample_vector_cbd_then_ntt_out_3b(copy_of_prf_input, domain_separator) .fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_d0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_d0(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -4045,13 +3977,11 @@ serialize_unpacked_secret_key_2f(IndCpaPublicKeyUnpacked_af *public_key, IndCpaPrivateKeyUnpacked_af *private_key) { uint8_t public_key_serialized[1568U]; serialize_public_key_00( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1536U]; - serialize_secret_key_ff( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_ff(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1536U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -4239,15 +4169,11 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_af *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1536U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1536U, uint8_t, size_t); deserialize_ring_elements_reduced_d0(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1536U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1536U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[4U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4434,13 +4360,7 @@ static KRML_MUSTINLINE void invert_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -4471,10 +4391,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_d0( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -4500,11 +4417,7 @@ static KRML_MUSTINLINE void add_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = @@ -4630,27 +4543,8 @@ add_message_error_reduce_ef_8c( libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( result.coefficients[i0], (int16_t)1441); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp = - libcrux_ml_kem_vector_portable_add_0d( - self->coefficients[/* FIXME: Eurydice crashes with: Warning 11: in - top-level declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing - function cannot be translated into C*: let - mutable ret(Mark.Present,(Mark.AtMost 2), ): - int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the - following code: ```rust result.coefficients[i] - = Vector::barrett_reduce(Vector::add( - coefficient_normal_form, - &Vector::add(self.coefficients[i], - &message.coefficients[i]), )); ``` */ - i0], - &message->coefficients[i0]); + libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0], + &message->coefficients[i0]); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 = libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = @@ -4863,11 +4757,7 @@ static KRML_MUSTINLINE void compress_then_serialize_4_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient = compress_0d_d1(to_unsigned_field_modulus_8c(re.coefficients[i0])); @@ -4922,11 +4812,7 @@ static KRML_MUSTINLINE void compress_then_serialize_5_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients = compress_0d_f4(to_unsigned_representative_8c(re.coefficients[i0])); @@ -5015,11 +4901,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( IndCpaPublicKeyUnpacked_af *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1568U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -5031,7 +4913,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_dd0 uu____3 = sample_ring_element_cbd_3b(copy_of_prf_input, domain_separator0); @@ -5040,7 +4921,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( error_1, uu____3.fst, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_f1_440(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -5048,11 +4929,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[4U]; - compute_vector_u_d0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_d0(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -5061,14 +4940,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a( &message_as_ring_element); uint8_t ciphertext[1568U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[4U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)4U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_2f( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)1408U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_00( uu____6, Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, @@ -5707,14 +5584,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_7d( IndCpaPrivateKeyUnpacked_af *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[4U]; - deserialize_then_decompress_u_00( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_00(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_ff( - Eurydice_array_to_subslice_from( - (size_t)1568U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)1408U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1568U, ciphertext, + (size_t)1408U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_d0(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -5735,8 +5609,7 @@ with const generics static KRML_MUSTINLINE void decrypt_7d(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[4U]; - deserialize_secret_key_d0(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_d0(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[4U]; memcpy( @@ -6056,13 +5929,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_30( libcrux_ml_kem_types_MlKemPrivateKey_fa *private_key) { uint8_t t[32U]; - H_f1_fd(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)2U, - (size_t)768U * (size_t)2U + (size_t)32U, - uint8_t), + H_f1_fd(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)2U, + (size_t)768U * (size_t)2U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)2U + (size_t)32U, @@ -6512,10 +6381,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b0( memcpy(copy_of_randomness0, randomness0, (size_t)2U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_64( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -6575,7 +6440,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b0( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -6721,11 +6586,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_a0( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -6760,8 +6621,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_a0( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -6837,10 +6696,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0( IndCpaPrivateKeyUnpacked_d4 *private_key, IndCpaPublicKeyUnpacked_d4 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_10(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_10(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6870,8 +6726,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c0( sample_vector_cbd_then_ntt_out_3b0(copy_of_prf_input, domain_separator) .fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_a0(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_a0(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6896,13 +6752,11 @@ serialize_unpacked_secret_key_6d(IndCpaPublicKeyUnpacked_d4 *public_key, IndCpaPrivateKeyUnpacked_d4 *private_key) { uint8_t public_key_serialized[800U]; serialize_public_key_86( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[768U]; - serialize_secret_key_64( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_64(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[768U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -7090,15 +6944,11 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f0( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_d4 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)768U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)768U, uint8_t, size_t); deserialize_ring_elements_reduced_a0(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)768U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)768U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[2U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -7224,10 +7074,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_a0( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -7458,11 +7305,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( IndCpaPublicKeyUnpacked_d4 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[768U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -7475,7 +7318,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_400 uu____3 = sample_ring_element_cbd_3b0(copy_of_prf_input, domain_separator0); @@ -7484,7 +7326,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( error_1, uu____3.fst, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_f1_490(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -7492,11 +7334,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[2U]; - compute_vector_u_a0(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_a0(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -7505,14 +7345,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a0( &message_as_ring_element); uint8_t ciphertext[768U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[2U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)2U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_6d( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)640U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_86( uu____6, Eurydice_array_to_subslice_from((size_t)768U, ciphertext, @@ -7827,14 +7665,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_d1( IndCpaPrivateKeyUnpacked_d4 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[2U]; - deserialize_then_decompress_u_86( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_86(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_64( - Eurydice_array_to_subslice_from( - (size_t)768U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)640U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)768U, ciphertext, + (size_t)640U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_a0(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -7855,8 +7690,7 @@ with const generics static KRML_MUSTINLINE void decrypt_d1(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[2U]; - deserialize_secret_key_a0(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_a0(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[2U]; memcpy( @@ -8164,13 +7998,9 @@ with const generics bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; - H_f1_e0(Eurydice_array_to_subslice2(/* Eurydice can't access values directly - on the types. We need to go to the - `value` directly. */ - private_key->value, - (size_t)384U * (size_t)3U, - (size_t)768U * (size_t)3U + (size_t)32U, - uint8_t), + H_f1_e0(Eurydice_array_to_subslice2( + private_key->value, (size_t)384U * (size_t)3U, + (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); Eurydice_slice expected = Eurydice_array_to_subslice2( private_key->value, (size_t)768U * (size_t)3U + (size_t)32U, @@ -8626,10 +8456,6 @@ static KRML_MUSTINLINE void sample_from_xof_2b1( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = sample_from_uniform_distribution_next_89( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -8689,7 +8515,7 @@ static KRML_MUSTINLINE void sample_matrix_A_2b1( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -8824,11 +8650,7 @@ static KRML_MUSTINLINE void add_to_ring_element_ef_1b( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -8863,8 +8685,6 @@ static KRML_MUSTINLINE void compute_As_plus_e_1b( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = ZERO_ef_8c(); t_as_ntt[i0] = uu____0; for (size_t i1 = (size_t)0U; @@ -8940,10 +8760,7 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1( IndCpaPrivateKeyUnpacked_a0 *private_key, IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) := G(d || K) for - ML-KEM */ - key_generation_seed, - hashed); + cpa_keygen_seed_d8_9c(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -8973,8 +8790,8 @@ static KRML_MUSTINLINE void generate_keypair_unpacked_1c1( sample_vector_cbd_then_ntt_out_3b1(copy_of_prf_input, domain_separator) .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); - compute_As_plus_e_1b(/* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, - public_key->A, private_key->secret_as_ntt, error_as_ntt); + compute_As_plus_e_1b(public_key->t_as_ntt, public_key->A, + private_key->secret_as_ntt, error_as_ntt); uint8_t uu____5[32U]; core_result_Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -8999,13 +8816,11 @@ serialize_unpacked_secret_key_43(IndCpaPublicKeyUnpacked_a0 *public_key, IndCpaPrivateKeyUnpacked_a0 *private_key) { uint8_t public_key_serialized[1184U]; serialize_public_key_6c( - /* pk := (Encode_12(tˆ mod^{+}q) || ρ) */ public_key->t_as_ntt, + public_key->t_as_ntt, Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), public_key_serialized); uint8_t secret_key_serialized[1152U]; - serialize_secret_key_89( - /* sk := Encode_12(sˆ mod^{+}q) */ private_key->secret_as_ntt, - secret_key_serialized); + serialize_secret_key_89(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; memcpy(copy_of_secret_key_serialized, secret_key_serialized, @@ -9193,15 +9008,11 @@ generics static KRML_MUSTINLINE void build_unpacked_public_key_mut_3f1( Eurydice_slice public_key, IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); deserialize_ring_elements_reduced_1b(uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, - (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -9295,10 +9106,7 @@ with const generics static KRML_MUSTINLINE void invert_ntt_montgomery_1b( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; invert_ntt_at_layer_1_8c(&zeta_i, re); invert_ntt_at_layer_2_8c(&zeta_i, re); invert_ntt_at_layer_3_8c(&zeta_i, re); @@ -9491,11 +9299,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, - prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -9508,7 +9312,6 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_230 uu____3 = sample_ring_element_cbd_3b1(copy_of_prf_input, domain_separator0); @@ -9517,7 +9320,7 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; PRF_f1_410(Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -9525,11 +9328,9 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U]; - compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ public_key->A, - r_as_ntt, error_1, u); + compute_vector_u_1b(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = deserialize_then_decompress_message_8c(copy_of_message); @@ -9538,14 +9339,12 @@ static KRML_MUSTINLINE void encrypt_unpacked_2a1( &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); compress_then_serialize_u_43( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; compress_then_serialize_ring_element_v_6c( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -9830,14 +9629,11 @@ static KRML_MUSTINLINE void decrypt_unpacked_42( IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U]; - deserialize_then_decompress_u_6c( - /* u := Decompress_q(Decode_{d_u}(c), d_u) */ ciphertext, u_as_ntt); + deserialize_then_decompress_u_6c(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = deserialize_then_decompress_ring_element_v_89( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt); uint8_t ret0[32U]; @@ -9858,8 +9654,7 @@ with const generics static KRML_MUSTINLINE void decrypt_42(Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U]; - deserialize_secret_key_1b(/* sˆ := Decode_12(sk) */ secret_key, - secret_as_ntt); + deserialize_secret_key_1b(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U]; memcpy( diff --git a/libcrux-ml-kem/c/libcrux_mlkem_portable.h b/libcrux-ml-kem/c/libcrux_mlkem_portable.h index 012f00992..ccb5a6654 100644 --- a/libcrux-ml-kem/c/libcrux_mlkem_portable.h +++ b/libcrux-ml-kem/c/libcrux_mlkem_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem_portable_H diff --git a/libcrux-ml-kem/c/libcrux_sha3.h b/libcrux-ml-kem/c/libcrux_sha3.h index 16a61b7e6..393be1f15 100644 --- a/libcrux-ml-kem/c/libcrux_sha3.h +++ b/libcrux-ml-kem/c/libcrux_sha3.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_H diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.c b/libcrux-ml-kem/c/libcrux_sha3_avx2.c index 23fa30cd5..3274dc64a 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_avx2.c +++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "internal/libcrux_sha3_avx2.h" @@ -77,8 +77,7 @@ static KRML_MUSTINLINE __m256i and_not_xor_ef(__m256i a, __m256i b, __m256i c) { } static KRML_MUSTINLINE __m256i _veorq_n_u64(__m256i a, uint64_t c) { - __m256i c0 = mm256_set1_epi64x( - (int64_t) /* Casting here is required, doesn't change the value. */ c); + __m256i c0 = mm256_set1_epi64x((int64_t)c); return mm256_xor_si256(a, c0); } @@ -1431,13 +1430,13 @@ static KRML_MUSTINLINE void store_block_5b(__m256i (*s)[5U], s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U] [((size_t)4U * i0 + (size_t)2U) % (size_t)5U], __m256i); - __m256i v1h = mm256_permute2x128_si256( - (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], - s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], - __m256i); + __m256i v1h = + mm256_permute2x128_si256((int32_t)32, + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], + s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], + __m256i); __m256i v2l = mm256_permute2x128_si256( (int32_t)49, s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U], @@ -1748,16 +1747,7 @@ void libcrux_sha3_avx2_x4_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2, Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1, Eurydice_slice out2, Eurydice_slice out3) { - Eurydice_slice buf0[4U] = { - /* XXX: These functions could alternatively implement the same with the - portable implementation #[cfg(feature = "simd128")] { keccakx2::<136, - 0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136, - 0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136, - 0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]); - keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136, - 0x1fu8>([input3], [out3]); } */ - input0, - input1, input2, input3}; + Eurydice_slice buf0[4U] = {input0, input1, input2, input3}; Eurydice_slice buf[4U] = {out0, out1, out2, out3}; keccak_fb(buf0, buf); } @@ -1972,13 +1962,13 @@ static KRML_MUSTINLINE void store_block_3a(__m256i (*s)[5U], s[((size_t)4U * i0 + (size_t)2U) / (size_t)5U] [((size_t)4U * i0 + (size_t)2U) % (size_t)5U], __m256i); - __m256i v1h = mm256_permute2x128_si256( - (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], - s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] - [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], - __m256i); + __m256i v1h = + mm256_permute2x128_si256((int32_t)32, + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], + s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] + [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], + __m256i); __m256i v2l = mm256_permute2x128_si256( (int32_t)49, s[(size_t)4U * i0 / (size_t)5U][(size_t)4U * i0 % (size_t)5U], diff --git a/libcrux-ml-kem/c/libcrux_sha3_avx2.h b/libcrux-ml-kem/c/libcrux_sha3_avx2.h index 645f80b34..eaa8d8c25 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/c/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_avx2_H diff --git a/libcrux-ml-kem/c/libcrux_sha3_internal.h b/libcrux-ml-kem/c/libcrux_sha3_internal.h index 74eeb47a3..c68ee5802 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_internal.h +++ b/libcrux-ml-kem/c/libcrux_sha3_internal.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_internal_H @@ -1811,7 +1811,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out); } @@ -2160,7 +2159,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out); } @@ -2509,7 +2507,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out); } @@ -2698,7 +2695,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out); } @@ -2817,7 +2813,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out); } @@ -3166,7 +3161,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out); } diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.c b/libcrux-ml-kem/c/libcrux_sha3_neon.c index 5e4416bcd..8c9edc379 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_neon.c +++ b/libcrux-ml-kem/c/libcrux_sha3_neon.c @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #include "libcrux_sha3_neon.h" @@ -62,7 +62,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice out0, Eurydice_slice out1) { - /* TODO: make argument ordering consistent */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -73,9 +72,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, */ KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState libcrux_sha3_neon_x2_incremental_init(void) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let s0 = KeccakState::new(); let s1 = - * KeccakState::new(); [s0, s1] } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -87,10 +83,6 @@ libcrux_sha3_neon_x2_incremental_init(void) { KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -104,10 +96,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_first_three_blocks(&mut s0, out0); - * shake128_squeeze_first_three_blocks(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -121,10 +109,6 @@ KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_next_block(&mut s0, out0); - * shake128_squeeze_next_block(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -148,10 +132,6 @@ libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_five_blocks( KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); diff --git a/libcrux-ml-kem/c/libcrux_sha3_neon.h b/libcrux-ml-kem/c/libcrux_sha3_neon.h index 6e264c84f..c51c09cc5 100644 --- a/libcrux-ml-kem/c/libcrux_sha3_neon.h +++ b/libcrux-ml-kem/c/libcrux_sha3_neon.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: 0e587d6e842717408ea9357e00d47e372e505c80 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_neon_H diff --git a/libcrux-ml-kem/cg/code_gen.txt b/libcrux-ml-kem/cg/code_gen.txt index 7e79f022e..54242b657 100644 --- a/libcrux-ml-kem/cg/code_gen.txt +++ b/libcrux-ml-kem/cg/code_gen.txt @@ -1,6 +1,6 @@ This code was generated with the following revisions: -Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f -Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c -Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 -F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc -Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 +Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 +Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 +Karamel: 8c3612018c25889288da6857771be3ad03b75bcd +F*: 5643e656b989aca7629723653a2570c7df6252b9 +Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 diff --git a/libcrux-ml-kem/cg/libcrux_core.h b/libcrux-ml-kem/cg/libcrux_core.h index ca8a53171..b8e2354f8 100644 --- a/libcrux-ml-kem/cg/libcrux_core.h +++ b/libcrux-ml-kem/cg/libcrux_core.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_core_H diff --git a/libcrux-ml-kem/cg/libcrux_ct_ops.h b/libcrux-ml-kem/cg/libcrux_ct_ops.h index 5f693d09c..cf4a616ac 100644 --- a/libcrux-ml-kem/cg/libcrux_ct_ops.h +++ b/libcrux-ml-kem/cg/libcrux_ct_ops.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_ct_ops_H diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h index bb50d3eaf..f6933bc18 100644 --- a/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h +++ b/libcrux-ml-kem/cg/libcrux_mlkem768_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_avx2_H @@ -171,16 +171,11 @@ libcrux_ml_kem_vector_avx2_arithmetic_cond_subtract_3329(__m256i vector) { __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16( LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i v_minus_field_modulus = - libcrux_intrinsics_avx2_mm256_sub_epi16(/* Compute v_i - Q and crate a - mask from the sign bit of each - of these quantities. */ - vector, field_modulus); + libcrux_intrinsics_avx2_mm256_sub_epi16(vector, field_modulus); __m256i sign_mask = libcrux_intrinsics_avx2_mm256_srai_epi16( (int32_t)15, v_minus_field_modulus, __m256i); __m256i conditional_add_field_modulus = - libcrux_intrinsics_avx2_mm256_and_si256(/* If v_i - Q < 0 then add back Q - to (v_i - Q). */ - sign_mask, field_modulus); + libcrux_intrinsics_avx2_mm256_and_si256(sign_mask, field_modulus); return libcrux_intrinsics_avx2_mm256_add_epi16(v_minus_field_modulus, conditional_add_field_modulus); } @@ -562,7 +557,6 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m256i lhs, __m256i rhs, int16_t zeta0, int16_t zeta1, int16_t zeta2, int16_t zeta3) { - /* Compute the first term of the product */ __m256i shuffle_with = libcrux_intrinsics_avx2_mm256_set_epi8( (int8_t)15, (int8_t)14, (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, @@ -570,8 +564,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( (int8_t)11, (int8_t)10, (int8_t)7, (int8_t)6, (int8_t)3, (int8_t)2, (int8_t)13, (int8_t)12, (int8_t)9, (int8_t)8, (int8_t)5, (int8_t)4, (int8_t)1, (int8_t)0); - __m256i lhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - /* Prepare the left hand side */ lhs, shuffle_with); + __m256i lhs_shuffled = + libcrux_intrinsics_avx2_mm256_shuffle_epi8(lhs, shuffle_with); __m256i lhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64( (int32_t)216, lhs_shuffled, __m256i); __m128i lhs_evens = @@ -580,8 +574,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i lhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256( (int32_t)1, lhs_shuffled0, __m128i); __m256i lhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(lhs_odds); - __m256i rhs_shuffled = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - /* Prepare the right hand side */ rhs, shuffle_with); + __m256i rhs_shuffled = + libcrux_intrinsics_avx2_mm256_shuffle_epi8(rhs, shuffle_with); __m256i rhs_shuffled0 = libcrux_intrinsics_avx2_mm256_permute4x64_epi64( (int32_t)216, rhs_shuffled, __m256i); __m128i rhs_evens = @@ -590,8 +584,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( __m128i rhs_odds = libcrux_intrinsics_avx2_mm256_extracti128_si256( (int32_t)1, rhs_shuffled0, __m128i); __m256i rhs_odds0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(rhs_odds); - __m256i left = libcrux_intrinsics_avx2_mm256_mullo_epi32( - /* Start operating with them */ lhs_evens0, rhs_evens0); + __m256i left = + libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_evens0, rhs_evens0); __m256i right = libcrux_intrinsics_avx2_mm256_mullo_epi32(lhs_odds0, rhs_odds0); __m256i right0 = @@ -606,7 +600,7 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( libcrux_ml_kem_vector_avx2_arithmetic_montgomery_reduce_i32s( products_left); __m256i rhs_adjacent_swapped = libcrux_intrinsics_avx2_mm256_shuffle_epi8( - /* Compute the second term of the product */ rhs, + rhs, libcrux_intrinsics_avx2_mm256_set_epi8( (int8_t)13, (int8_t)12, (int8_t)15, (int8_t)14, (int8_t)9, (int8_t)8, (int8_t)11, (int8_t)10, (int8_t)5, (int8_t)4, (int8_t)7, (int8_t)6, @@ -621,10 +615,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_ntt_multiply( products_right); __m256i products_right1 = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)16, products_right0, __m256i); - return libcrux_intrinsics_avx2_mm256_blend_epi16( - (int32_t)170, - /* Combine them into one vector */ products_left0, products_right1, - __m256i); + return libcrux_intrinsics_avx2_mm256_blend_epi16((int32_t)170, products_left0, + products_right1, __m256i); } /** @@ -642,60 +634,13 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_ntt_multiply_09( KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_1( __m256i vector, uint8_t ret[2U]) { - __m256i lsb_to_msb = libcrux_intrinsics_avx2_mm256_slli_epi16( - (int32_t)15, - /* Suppose |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): 0¹⁵a₀ 0¹⁵b₀ 0¹⁵c₀ - 0¹⁵d₀ | 0¹⁵e₀ 0¹⁵f₀ 0¹⁵g₀ 0¹⁵h₀ | ... We care only about the least - significant bit in each lane, move it to the most significant position - to make it easier to work with. |vector| now becomes: a₀0¹⁵ b₀0¹⁵ c₀0¹⁵ - d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ g₀0¹⁵ h₀0¹⁵ | ↩ i₀0¹⁵ j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ - n₀0¹⁵ o₀0¹⁵ p₀0¹⁵ */ - vector, __m256i); - __m128i low_msbs = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* Get the first 8 16-bit - elements ... */ - lsb_to_msb); + __m256i lsb_to_msb = + libcrux_intrinsics_avx2_mm256_slli_epi16((int32_t)15, vector, __m256i); + __m128i low_msbs = libcrux_intrinsics_avx2_mm256_castsi256_si128(lsb_to_msb); __m128i high_msbs = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ... and the next 8 16-bit elements ... */ lsb_to_msb, __m128i); - __m128i msbs = - libcrux_intrinsics_avx2_mm_packs_epi16(/* ... and then pack them into - 8-bit values using signed - saturation. This function packs - all the |low_msbs|, and then the - high ones. low_msbs = a₀0¹⁵ - b₀0¹⁵ c₀0¹⁵ d₀0¹⁵ | e₀0¹⁵ f₀0¹⁵ - g₀0¹⁵ h₀0¹⁵ high_msbs = i₀0¹⁵ - j₀0¹⁵ k₀0¹⁵ l₀0¹⁵ | m₀0¹⁵ n₀0¹⁵ - o₀0¹⁵ p₀0¹⁵ We shifted by 15 - above to take advantage of the - signed saturation performed by - mm_packs_epi16: - if the sign - bit of the 16-bit element being - packed is 1, the corresponding - 8-bit element in |msbs| will be - 0xFF. - if the sign bit of the - 16-bit element being packed is - 0, the corresponding 8-bit - element in |msbs| will be 0. - Thus, if, for example, a₀ = 1, - e₀ = 1, and p₀ = 1, and every - other bit is 0, after packing - into 8 bit value, |msbs| will - look like: 0xFF 0x00 0x00 0x00 | - 0xFF 0x00 0x00 0x00 | 0x00 0x00 - 0x00 0x00 | 0x00 0x00 0x00 0xFF - */ - low_msbs, high_msbs); - int32_t bits_packed = - libcrux_intrinsics_avx2_mm_movemask_epi8(/* Now that every element is - either 0xFF or 0x00, we just - extract the most significant - bit from each element and - collate them into two bytes. - */ - msbs); + (int32_t)1, lsb_to_msb, __m128i); + __m128i msbs = libcrux_intrinsics_avx2_mm_packs_epi16(low_msbs, high_msbs); + int32_t bits_packed = libcrux_intrinsics_avx2_mm_movemask_epi8(msbs); uint8_t result[2U] = {(uint8_t)bits_packed, (uint8_t)(bits_packed >> 8U)}; memcpy(ret, result, (size_t)2U * sizeof(uint8_t)); } @@ -714,63 +659,18 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_i16s( int16_t a, int16_t b) { - __m256i coefficients = - libcrux_intrinsics_avx2_mm256_set_epi16(/* We need to take each bit from - the 2 bytes of input and put - them into their own 16-bit - lane. Ideally, we'd load the - two bytes into the vector, - duplicate them, and right-shift - the 0th element by 0 bits, the - first element by 1 bit, the - second by 2 bits and so on - before AND-ing with 0x1 to - leave only the least - signifinicant bit. But since - |_mm256_srlv_epi16| does not - exist, so we have to resort to - a workaround. Rather than - shifting each element by a - different amount, we'll - multiply each element by a - value such that the bit we're - interested in becomes the most - significant bit. The - coefficients are loaded as - follows: */ - b, b, b, b, b, b, b, b, a, a, a, - a, a, a, a, a); - __m256i coefficients_in_msb = - libcrux_intrinsics_avx2_mm256_mullo_epi16(/* And this vector, when - multiplied with the previous - one, ensures that the bit - we'd like to keep in each - lane becomes the most - significant bit upon - multiplication. */ - coefficients, - libcrux_intrinsics_avx2_mm256_set_epi16( - (int16_t)1 << 8U, - (int16_t)1 << 9U, - (int16_t)1 << 10U, - (int16_t)1 << 11U, - (int16_t)1 << 12U, - (int16_t)1 << 13U, - (int16_t)1 << 14U, - (int16_t)-32768, - (int16_t)1 << 8U, - (int16_t)1 << 9U, - (int16_t)1 << 10U, - (int16_t)1 << 11U, - (int16_t)1 << 12U, - (int16_t)1 << 13U, - (int16_t)1 << 14U, - (int16_t)-32768)); - return libcrux_intrinsics_avx2_mm256_srli_epi16( - (int32_t)15, - /* Now that they're all in the most significant bit position, shift them - down to the least significant bit. */ - coefficients_in_msb, __m256i); + __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16( + b, b, b, b, b, b, b, b, a, a, a, a, a, a, a, a); + __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16( + coefficients, libcrux_intrinsics_avx2_mm256_set_epi16( + (int16_t)1 << 8U, (int16_t)1 << 9U, (int16_t)1 << 10U, + (int16_t)1 << 11U, (int16_t)1 << 12U, (int16_t)1 << 13U, + (int16_t)1 << 14U, (int16_t)-32768, (int16_t)1 << 8U, + (int16_t)1 << 9U, (int16_t)1 << 10U, (int16_t)1 << 11U, + (int16_t)1 << 12U, (int16_t)1 << 13U, (int16_t)1 << 14U, + (int16_t)-32768)); + return libcrux_intrinsics_avx2_mm256_srli_epi16((int32_t)15, + coefficients_in_msb, __m256i); } KRML_ATTRIBUTE_TARGET("avx2") @@ -785,23 +685,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_1(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_1_deserialize_1_u8s( - Eurydice_slice_index( - bytes, - /* We need to take each bit from the 2 bytes of input and put them - into their own 16-bit lane. Ideally, we'd load the two bytes into - the vector, duplicate them, and right-shift the 0th element by 0 - bits, the first element by 1 bit, the second by 2 bits and so on - before AND-ing with 0x1 to leave only the least signifinicant bit. - But since |_mm256_srlv_epi16| does not exist, so we have to resort - to a workaround. Rather than shifting each element by a different - amount, we'll multiply each element by a value such that the bit - we're interested in becomes the most significant bit. The - coefficients are loaded as follows: And this vector, when - multiplied with the previous one, ensures that the bit we'd like to - keep in each lane becomes the most significant bit upon - multiplication. Now that they're all in the most significant bit - position, shift them down to the least significant bit. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *)); } @@ -837,70 +721,23 @@ static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_4( __m256i vector, uint8_t ret[8U]) { uint8_t serialized[16U] = {0U}; __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 4U, - /* If |vector| is laid out as follows: 0x000A 0x000B 0x000C 0x000D | - 0x000E 0x000F 0x000G 0x000H | .... |adjacent_2_combined| will be - laid out as a series of 32-bit integeres, as follows: 0x00_00_00_BA - 0x00_00_00_DC | 0x00_00_00_FE 0x00_00_00_HG | ... */ - vector); - __m256i adjacent_8_combined = - libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* Recall that - |adjacent_2_combined| goes - as follows: 0x00_00_00_BA - 0x00_00_00_DC | - 0x00_00_00_FE 0x00_00_00_HG - | ... Out of this, we only - need the first byte, the 4th - byte, the 8th byte and so on - from the bottom and the top - 128 bits. */ - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi8( - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, - (int8_t)4, (int8_t)0, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)8, - (int8_t)4, (int8_t)0)); - __m256i combined = - libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32(/* |adjacent_8_combined| - looks like this: 0: - 0xHG_FE_DC_BA 1: - 0x00_00_00_00 | 2: - 0x00_00_00_00 3: - 0x00_00_00_00 | 4: - 0xPO_NM_LK_JI .... - We put the element - at 4 after the - element at 0 ... */ - adjacent_8_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)0, - (int32_t)4, - (int32_t)0)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(4U, vector); + __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8( + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi8( + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)12, (int8_t)8, (int8_t)4, (int8_t)0)); + __m256i combined = libcrux_intrinsics_avx2_mm256_permutevar8x32_epi32( + adjacent_8_combined, libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)0, + (int32_t)0, (int32_t)0, (int32_t)4, (int32_t)0)); __m128i combined0 = libcrux_intrinsics_avx2_mm256_castsi256_si128(combined); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( - Eurydice_array_to_slice( - (size_t)16U, - /* ... so that we can read them out in one go. */ serialized, - uint8_t), - combined0); + Eurydice_array_to_slice((size_t)16U, serialized, uint8_t), combined0); uint8_t ret0[8U]; Result_15 dst; Eurydice_slice_to_array2( @@ -926,33 +763,8 @@ static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( int16_t b0, int16_t b1, int16_t b2, int16_t b3, int16_t b4, int16_t b5, int16_t b6, int16_t b7) { - __m256i coefficients = - libcrux_intrinsics_avx2_mm256_set_epi16(/* Every 4 bits from each byte of - input should be put into its - own 16-bit lane. Since - |_mm256_srlv_epi16| does not - exist, we have to resort to a - workaround. Rather than - shifting each element by a - different amount, we'll - multiply each element by a - value such that the bits we're - interested in become the most - significant bits (of an 8-bit - value). In this lane, the 4 - bits we need to put are already - the most significant bits of - |bytes[7]| (that is, b7). */ - b7, - /* In this lane, the 4 bits we - need to put are the least - significant bits, so we need to - shift the 4 least-significant - bits of |b7| to the most - significant bits (of an 8-bit - value). */ - b7, b6, b6, b5, b5, b4, b4, b3, - b3, b2, b2, b1, b1, b0, b0); + __m256i coefficients = libcrux_intrinsics_avx2_mm256_set_epi16( + b7, b7, b6, b6, b5, b5, b4, b4, b3, b3, b2, b2, b1, b1, b0, b0); __m256i coefficients_in_msb = libcrux_intrinsics_avx2_mm256_mullo_epi16( coefficients, libcrux_intrinsics_avx2_mm256_set_epi16( (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, @@ -962,14 +774,10 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_i16s( (int16_t)1 << 0U, (int16_t)1 << 4U, (int16_t)1 << 0U, (int16_t)1 << 4U)); __m256i coefficients_in_lsb = libcrux_intrinsics_avx2_mm256_srli_epi16( - (int32_t)4, - /* Once the 4-bit coefficients are in the most significant positions (of - an 8-bit value), shift them all down by 4. */ - coefficients_in_msb, __m256i); + (int32_t)4, coefficients_in_msb, __m256i); return libcrux_intrinsics_avx2_mm256_and_si256( - /* Zero the remaining bits. */ coefficients_in_lsb, - libcrux_intrinsics_avx2_mm256_set1_epi16(((int16_t)1 << 4U) - - (int16_t)1)); + coefficients_in_lsb, libcrux_intrinsics_avx2_mm256_set1_epi16( + ((int16_t)1 << 4U) - (int16_t)1)); } KRML_ATTRIBUTE_TARGET("avx2") @@ -986,23 +794,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_4(Eurydice_slice bytes) { return libcrux_ml_kem_vector_avx2_serialize_deserialize_4_deserialize_4_u8s( - Eurydice_slice_index( - bytes, - /* Every 4 bits from each byte of input should be put into its own - 16-bit lane. Since |_mm256_srlv_epi16| does not exist, we have to - resort to a workaround. Rather than shifting each element by a - different amount, we'll multiply each element by a value such that - the bits we're interested in become the most significant bits (of - an 8-bit value). In this lane, the 4 bits we need to put are - already the most significant bits of |bytes[7]| (that is, b7). In - this lane, the 4 bits we need to put are the least significant - bits, so we need to shift the 4 least-significant bits of |b7| to - the most significant bits (of an 8-bit value). These constants are - chosen to shift the bits of the values that we loaded into - |coefficients|. Once the 4-bit coefficients are in the most - significant positions (of an 8-bit value), shift them all down - by 4. Zero the remaining bits. */ - (size_t)0U, uint8_t, uint8_t *), + Eurydice_slice_index(bytes, (size_t)0U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)1U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)2U, uint8_t, uint8_t *), Eurydice_slice_index(bytes, (size_t)3U, uint8_t, uint8_t *), @@ -1026,106 +818,35 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_5( __m256i vector, uint8_t ret[10U]) { uint8_t serialized[32U] = {0U}; - __m256i adjacent_2_combined = - libcrux_intrinsics_avx2_mm256_madd_epi16(/* If |vector| is laid out as - follows (superscript number - indicates the corresponding - bit is duplicated that many - times): 0¹¹a₄a₃a₂a₁a₀ - 0¹¹b₄b₃b₂b₁b₀ 0¹¹c₄c₃c₂c₁c₀ - 0¹¹d₄d₃d₂d₁d₀ | ↩ - 0¹¹e₄e₃e₂e₁e₀ 0¹¹f₄f₃f₂f₁f₀ - 0¹¹g₄g₃g₂g₁g₀ 0¹¹h₄h₃h₂h₁h₀ | - ↩ |adjacent_2_combined| will - be laid out as a series of - 32-bit integers, as follows: - 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ - 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... */ - vector, - libcrux_intrinsics_avx2_mm256_set_epi16( - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, (int16_t)1, - (int16_t)1 << 5U, - (int16_t)1)); - __m256i adjacent_4_combined = - libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Recall that - |adjacent_2_combined| is laid - out as follows: - 0²²b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ - 0²²f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... This shift results in: - b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀ | ↩ - f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀ | ↩ - .... */ - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22, - (int32_t)0, (int32_t)22)); + __m256i adjacent_2_combined = libcrux_intrinsics_avx2_mm256_madd_epi16( + vector, libcrux_intrinsics_avx2_mm256_set_epi16( + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1, + (int16_t)1 << 5U, (int16_t)1, (int16_t)1 << 5U, (int16_t)1)); + __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32( + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)22, (int32_t)0, (int32_t)22, (int32_t)0, + (int32_t)22, (int32_t)0, (int32_t)22)); __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64( - (int32_t)22, - /* |adjacent_4_combined|, when viewed as 64-bit lanes, is: - 0²²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀0²² | ↩ - 0²²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀0²² | ↩ ... so we just shift - down by 22 bits to remove the least significant 0 bits that aren't part - of the bits we need. */ - adjacent_4_combined, __m256i); + (int32_t)22, adjacent_4_combined, __m256i); __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi32( - (int32_t)8, - /* |adjacent_4_combined|, when viewed as a set of 32-bit values, looks - like: 0:0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ 1:0³² - 2:0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ 3:0³² | ↩ To be able to - read out the bytes in one go, we need to shifts the bits in position 2 - to position 1 in each 128-bit lane. */ - adjacent_4_combined0, __m256i); - __m256i adjacent_8_combined0 = - libcrux_intrinsics_avx2_mm256_sllv_epi32(/* |adjacent_8_combined|, when - viewed as a set of 32-bit - values, now looks like: - 0¹²d₄d₃d₂d₁d₀c₄c₃c₂c₁c₀b₄b₃b₂b₁b₀a₄a₃a₂a₁a₀ - 0¹²h₄h₃h₂h₁h₀g₄g₃g₂g₁g₀f₄f₃f₂f₁f₀e₄e₃e₂e₁e₀ - 0³² 0³² | ↩ Once again, we - line these bits up by shifting - the up values at indices 0 and - 5 by 12, viewing the resulting - register as a set of 64-bit - values, and then shifting down - the 64-bit values by 12 bits. - */ - adjacent_8_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)0, - (int32_t)0, (int32_t)12)); + (int32_t)8, adjacent_4_combined0, __m256i); + __m256i adjacent_8_combined0 = libcrux_intrinsics_avx2_mm256_sllv_epi32( + adjacent_8_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)0, (int32_t)0, (int32_t)12, (int32_t)0, + (int32_t)0, (int32_t)0, (int32_t)12)); __m256i adjacent_8_combined1 = libcrux_intrinsics_avx2_mm256_srli_epi64( (int32_t)12, adjacent_8_combined0, __m256i); __m128i lower_8 = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 40 bits - starting at position 0 in - the lower 128-bit lane, - ... */ - adjacent_8_combined1); + libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined1); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)0U, (size_t)16U, uint8_t), lower_8); __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ... and the second 40 bits at position 0 in the upper 128-bit lane */ - adjacent_8_combined1, __m128i); + (int32_t)1, adjacent_8_combined1, __m128i); libcrux_intrinsics_avx2_mm_storeu_bytes_si128( Eurydice_array_to_subslice2(serialized, (size_t)5U, (size_t)21U, uint8_t), upper_8); @@ -1231,87 +952,27 @@ static inline core_core_arch_x86___m128i_x2 libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( __m256i vector) { __m256i adjacent_2_combined = - libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n( - 10U, - /* If |vector| is laid out as follows (superscript number indicates - the corresponding bit is duplicated that many times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ | ↩ ... - |adjacent_2_combined| will be laid out as a series of 32-bit - integers, as follows: 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ | ↩ .... */ - vector); - __m256i adjacent_4_combined = - libcrux_intrinsics_avx2_mm256_sllv_epi32(/* Shifting up the values at the - even indices by 12, we get: - b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ ... */ - adjacent_2_combined, - libcrux_intrinsics_avx2_mm256_set_epi32( - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12, - (int32_t)0, (int32_t)12)); + libcrux_ml_kem_vector_avx2_serialize_mm256_concat_pairs_n(10U, vector); + __m256i adjacent_4_combined = libcrux_intrinsics_avx2_mm256_sllv_epi32( + adjacent_2_combined, + libcrux_intrinsics_avx2_mm256_set_epi32( + (int32_t)0, (int32_t)12, (int32_t)0, (int32_t)12, (int32_t)0, + (int32_t)12, (int32_t)0, (int32_t)12)); __m256i adjacent_4_combined0 = libcrux_intrinsics_avx2_mm256_srli_epi64( - (int32_t)12, - /* Viewing this as a set of 64-bit integers we get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ ... Shifting down by 12 gives us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ ... */ - adjacent_4_combined, __m256i); - __m256i adjacent_8_combined = - libcrux_intrinsics_avx2_mm256_shuffle_epi8(/* |adjacent_4_combined|, when - the bottom and top 128 - bit-lanes are grouped into - bytes, looks like: - 0₇0₆0₅B₄B₃B₂B₁B₀ | ↩ - 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ | ↩ - In each 128-bit lane, we - want to put bytes 8, 9, 10, - 11, 12 after bytes 0, 1, 2, - 3 to allow for sequential - reading. */ - adjacent_4_combined0, - libcrux_intrinsics_avx2_mm256_set_epi8( - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, - (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, - (int8_t)1, (int8_t)0, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)-1, (int8_t)-1, - (int8_t)12, (int8_t)11, - (int8_t)10, (int8_t)9, - (int8_t)8, (int8_t)4, - (int8_t)3, (int8_t)2, - (int8_t)1, (int8_t)0)); + (int32_t)12, adjacent_4_combined, __m256i); + __m256i adjacent_8_combined = libcrux_intrinsics_avx2_mm256_shuffle_epi8( + adjacent_4_combined0, + libcrux_intrinsics_avx2_mm256_set_epi8( + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)-1, (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, + (int8_t)4, (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0, (int8_t)-1, + (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, (int8_t)-1, + (int8_t)12, (int8_t)11, (int8_t)10, (int8_t)9, (int8_t)8, (int8_t)4, + (int8_t)3, (int8_t)2, (int8_t)1, (int8_t)0)); __m128i lower_8 = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* We now have 64 bits - starting at position 0 in - the lower 128-bit lane, - ... */ - adjacent_8_combined); + libcrux_intrinsics_avx2_mm256_castsi256_si128(adjacent_8_combined); __m128i upper_8 = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* and 64 bits starting at position 0 in the upper 128-bit lane. */ - adjacent_8_combined, __m128i); + (int32_t)1, adjacent_8_combined, __m128i); return ( CLITERAL(core_core_arch_x86___m128i_x2){.fst = lower_8, .snd = upper_8}); } @@ -1320,167 +981,8 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_vector_avx2_serialize_serialize_10( __m256i vector, uint8_t ret[20U]) { core_core_arch_x86___m128i_x2 uu____0 = - libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec(/* If - |vector| - is - laid - out - as - follows - (superscript - number - indicates - the - corresponding - bit - is - duplicated - that - many - times): - 0⁶a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0⁶b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀ - 0⁶c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - 0⁶d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀ - | ↩ - 0⁶e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0⁶f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀ - 0⁶g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - 0⁶h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀ - | ↩ - ... - |adjacent_2_combined| - will - be - laid - out - as a - series - of - 32-bit - integers, - as - follows: - 0¹²b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - 0¹²f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - .... - Shifting - up - the - values - at - the - even - indices - by - 12, - we - get: - b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀ - | ↩ - f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀ - | ↩ - ... - Viewing - this - as a - set - of - 64-bit - integers - we - get: - 0¹²d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀0¹² - | ↩ - 0¹²h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀0¹² - | ↩ - ... - Shifting - down - by - 12 - gives - us: - 0²⁴d₉d₈d₇d₆d₅d₄d₃d₂d₁d₀c₉c₈c₇c₆c₅c₄c₃c₂c₁c₀b₉b₈b₇b₆b₅b₄b₃b₂b₁b₀a₉a₈a₇a₆a₅a₄a₃a₂a₁a₀ - | ↩ - 0²⁴h₉h₈h₇h₆h₅h₄h₃h₂h₁h₀g₉g₈g₇g₆g₅g₄g₃g₂g₁g₀f₉f₈f₇f₆f₅f₄f₃f₂f₁f₀e₉e₈e₇e₆e₅e₄e₃e₂e₁e₀ - | ↩ - ... - |adjacent_4_combined|, - when - the - bottom - and - top - 128 - bit-lanes - are - grouped - into - bytes, - looks - like: - 0₇0₆0₅B₄B₃B₂B₁B₀ - | ↩ - 0₁₅0₁₄0₁₃B₁₂B₁₁B₁₀B₉B₈ - | ↩ - In - each - 128-bit - lane, - we - want - to - put - bytes - 8, - 9, - 10, - 11, - 12 - after - bytes - 0, - 1, - 2, 3 - to - allow - for - sequential - reading. - We - now - have - 64 - bits - starting - at - position - 0 in - the - lower - 128-bit - lane, - ... - and - 64 - bits - starting - at - position - 0 in - the - upper - 128-bit - lane. - */ - vector); + libcrux_ml_kem_vector_avx2_serialize_serialize_10_serialize_10_vec( + vector); __m128i lower_8 = uu____0.fst; __m128i upper_8 = uu____0.snd; uint8_t serialized[32U] = {0U}; @@ -1536,20 +1038,16 @@ libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( (int16_t)1 << 6U)); __m256i coefficients1 = libcrux_intrinsics_avx2_mm256_srli_epi16( (int32_t)6, coefficients0, __m256i); - return libcrux_intrinsics_avx2_mm256_and_si256(/* Here I can prove this `and` - is not useful */ - coefficients1, - libcrux_intrinsics_avx2_mm256_set1_epi16( - ((int16_t)1 << 10U) - - (int16_t)1)); + return libcrux_intrinsics_avx2_mm256_and_si256( + coefficients1, libcrux_intrinsics_avx2_mm256_set1_epi16( + ((int16_t)1 << 10U) - (int16_t)1)); } KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_ml_kem_vector_avx2_serialize_deserialize_10(Eurydice_slice bytes) { - Eurydice_slice lower_coefficients = Eurydice_slice_subslice2( - /* Here I can prove this `and` is not useful */ bytes, (size_t)0U, - (size_t)16U, uint8_t); + Eurydice_slice lower_coefficients = + Eurydice_slice_subslice2(bytes, (size_t)0U, (size_t)16U, uint8_t); Eurydice_slice upper_coefficients = Eurydice_slice_subslice2(bytes, (size_t)4U, (size_t)20U, uint8_t); return libcrux_ml_kem_vector_avx2_serialize_deserialize_10_deserialize_10_vec( @@ -1735,70 +1233,28 @@ libcrux_ml_kem_vector_avx2_sampling_rejection_sample(Eurydice_slice input, __m256i field_modulus = libcrux_intrinsics_avx2_mm256_set1_epi16( LIBCRUX_ML_KEM_VECTOR_TRAITS_FIELD_MODULUS); __m256i potential_coefficients = - libcrux_ml_kem_vector_avx2_serialize_deserialize_12(/* The input bytes can - be interpreted as a - sequence of - serialized 12-bit - (i.e. uncompressed) - coefficients. Not - all coefficients - may be less than - FIELD_MODULUS - though. */ - input); + libcrux_ml_kem_vector_avx2_serialize_deserialize_12(input); __m256i compare_with_field_modulus = - libcrux_intrinsics_avx2_mm256_cmpgt_epi16(/* Suppose we view - |potential_coefficients| as - follows (grouping 64-bit - elements): A B C D | E F G H - | .... and A < 3329, D < 3329 - and H < 3329, - |compare_with_field_modulus| - will look like: 0xFF 0 0 0xFF - | 0 0 0 0xFF | ... */ - field_modulus, + libcrux_intrinsics_avx2_mm256_cmpgt_epi16(field_modulus, potential_coefficients); uint8_t good[2U]; - libcrux_ml_kem_vector_avx2_serialize_serialize_1(/* Since every bit in each - lane is either 0 or 1, we - only need one bit from - each lane in the register - to tell us what - coefficients to keep and - what to throw-away. - Combine all the bits - (there are 16) into two - bytes. */ - compare_with_field_modulus, + libcrux_ml_kem_vector_avx2_serialize_serialize_1(compare_with_field_modulus, good); uint8_t lower_shuffles[16U]; memcpy(lower_shuffles, - /* Each bit (and its corresponding position) represents an element we - want to sample. We'd like all such elements to be next to each other - starting at index 0, so that they can be read from the vector - easily. |REJECTION_SAMPLE_SHUFFLE_TABLE| encodes the byte-level - shuffling indices needed to make this happen. For e.g. if good[0] = - 0b0_0_0_0_0_0_1_0, we need to move the element in the 2-nd 16-bit - lane to the first. To do this, we need the byte-level shuffle - indices to be 2 3 X X X X ... */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[0U]], (size_t)16U * sizeof(uint8_t)); - __m128i lower_shuffles0 = - libcrux_intrinsics_avx2_mm_loadu_si128(Eurydice_array_to_slice( - (size_t)16U, - /* Shuffle the lower 8 16-bits accordingly ... */ lower_shuffles, - uint8_t)); + __m128i lower_shuffles0 = libcrux_intrinsics_avx2_mm_loadu_si128( + Eurydice_array_to_slice((size_t)16U, lower_shuffles, uint8_t)); __m128i lower_coefficients = libcrux_intrinsics_avx2_mm256_castsi256_si128(potential_coefficients); __m128i lower_coefficients0 = libcrux_intrinsics_avx2_mm_shuffle_epi8( lower_coefficients, lower_shuffles0); - libcrux_intrinsics_avx2_mm_storeu_si128( - /* ... then write them out ... */ output, lower_coefficients0); + libcrux_intrinsics_avx2_mm_storeu_si128(output, lower_coefficients0); size_t sampled_count = (size_t)core_num__u8_6__count_ones(good[0U]); uint8_t upper_shuffles[16U]; memcpy(upper_shuffles, - /* Do the same for |goood[1]| */ libcrux_ml_kem_vector_rej_sample_table_REJECTION_SAMPLE_SHUFFLE_TABLE[( size_t)good[1U]], (size_t)16U * sizeof(uint8_t)); @@ -1979,9 +1435,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)10); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -1991,15 +1445,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)10, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2009,29 +1459,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_ef( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)10, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)10, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2097,9 +1531,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)11); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2109,15 +1541,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)11, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2127,29 +1555,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_c4( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)11, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)11, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2253,13 +1665,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer, size_t _initial_coefficient_bound) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2354,11 +1760,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_barrett_reduce_09(self->coefficients[i0]); @@ -2455,9 +1857,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)4); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2467,15 +1867,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)4, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2485,29 +1881,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_d1( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)4, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)4, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2568,9 +1948,7 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i two_pow_coefficient_bits = libcrux_intrinsics_avx2_mm256_set1_epi32( (int32_t)1 << (uint32_t)(int32_t)5); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i decompressed_low = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2580,15 +1958,11 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i decompressed_low1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_low0, two_pow_coefficient_bits); __m256i decompressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_low1, __m256i); + (int32_t)5, decompressed_low1, __m256i); __m256i decompressed_low3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_low2, __m256i); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- */ vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i decompressed_high = libcrux_intrinsics_avx2_mm256_mullo_epi32( @@ -2598,29 +1972,13 @@ libcrux_ml_kem_vector_avx2_compress_decompress_ciphertext_coefficient_f4( __m256i decompressed_high1 = libcrux_intrinsics_avx2_mm256_add_epi32( decompressed_high0, two_pow_coefficient_bits); __m256i decompressed_high2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)5, - /* We can't shift in one go by (COEFFICIENT_BITS + 1) due to the lack of - support for const generic expressions. */ - decompressed_high1, __m256i); + (int32_t)5, decompressed_high1, __m256i); __m256i decompressed_high3 = libcrux_intrinsics_avx2_mm256_srli_epi32( (int32_t)1, decompressed_high2, __m256i); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - decompressed_low3, - decompressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + decompressed_low3, decompressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -2730,14 +2088,9 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *rhs) { for (size_t i = (size_t)0U; - i < - Eurydice_slice_len(Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of - loop are a workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, __m256i), - __m256i); + i < Eurydice_slice_len(Eurydice_array_to_slice( + (size_t)16U, self->coefficients, __m256i), + __m256i); i++) { size_t i0 = i; self->coefficients[i0] = libcrux_ml_kem_vector_avx2_add_09( @@ -2840,13 +2193,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_61( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -2878,10 +2225,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_ab( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_61(&zeta_i, re, (size_t)1U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_61(&zeta_i, re, (size_t)2U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_61(&zeta_i, re, (size_t)3U); @@ -3076,16 +2420,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_2f( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(/* u := - Decompress_q(Decode_{d_u}(c), - d_u) */ - ciphertext, u_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_ed(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 v = libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_ed( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message = libcrux_ml_kem_matrix_compute_message_ab(&v, secret_key->secret_as_ntt, u_as_ntt); @@ -3108,8 +2447,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_2f( Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_f6 secret_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab( - /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 copy_of_secret_as_ntt[3U]; memcpy( @@ -3658,10 +2996,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_6c( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -3728,7 +3062,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_6c( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -3750,15 +3084,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_fa( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4028,12 +3359,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; __m256i t = libcrux_ml_kem_vector_avx2_multiply_by_constant_09( re->coefficients[j + step], (int16_t)-1600); @@ -4054,10 +3380,7 @@ KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *re) { - libcrux_ml_kem_ntt_ntt_at_layer_7_61(/* Due to the small coefficient bound, we - can skip the first round of Montgomery - reductions. */ - re); + libcrux_ml_kem_ntt_ntt_at_layer_7_61(re); size_t zeta_i = (size_t)1U; libcrux_ml_kem_ntt_ntt_at_layer_4_plus_61(&zeta_i, re, (size_t)6U, (size_t)11207U); @@ -4268,11 +3591,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( @@ -4395,26 +3714,8 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_61( __m256i coefficient_normal_form = libcrux_ml_kem_vector_avx2_montgomery_multiply_by_constant_09( result.coefficients[i0], (int16_t)1441); - __m256i tmp = libcrux_ml_kem_vector_avx2_add_09( - self->coefficients - [/* FIXME: Eurydice crashes with: Warning 11: in top-level - declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing function cannot be - translated into C*: let mutable ret(Mark.Present,(Mark.AtMost - 2), ): int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the following code: - ```rust result.coefficients[i] = - Vector::barrett_reduce(Vector::add( coefficient_normal_form, - &Vector::add(self.coefficients[i], &message.coefficients[i]), - )); ``` */ - i0], - &message->coefficients[i0]); + __m256i tmp = libcrux_ml_kem_vector_avx2_add_09(self->coefficients[i0], + &message->coefficients[i0]); __m256i tmp0 = libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &tmp); result.coefficients[i0] = @@ -4472,23 +3773,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)10) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)10, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4497,17 +3784,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4521,23 +3802,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_ef( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -4601,23 +3869,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)11) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)11, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4626,17 +3880,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4650,23 +3898,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_c4( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -4782,23 +4017,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)4) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)4, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4807,17 +4028,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4831,23 +4046,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_d1( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -4878,11 +4080,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficient = libcrux_ml_kem_vector_avx2_compress_09_d1( libcrux_ml_kem_serialize_to_unsigned_field_modulus_61( @@ -4914,23 +4112,9 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( __m256i coefficient_bits_mask = libcrux_intrinsics_avx2_mm256_set1_epi32( ((int32_t)1 << (uint32_t)(int32_t)5) - (int32_t)1); __m128i coefficients_low = - libcrux_intrinsics_avx2_mm256_castsi256_si128(/* ---- Compress the first 8 - coefficients ---- Take - the bottom 128 bits, i.e. - the first 8 16-bit - coefficients */ - vector); + libcrux_intrinsics_avx2_mm256_castsi256_si128(vector); __m256i coefficients_low0 = - libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(/* If: coefficients_low[0:15] - = A - coefficients_low[16:31] = - B coefficients_low[32:63] - = C and so on ... after - this step: - coefficients_low[0:31] = A - coefficients_low[32:63] = - B and so on ... */ - coefficients_low); + libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_low); __m256i compressed_low = libcrux_intrinsics_avx2_mm256_slli_epi32( (int32_t)5, coefficients_low0, __m256i); __m256i compressed_low0 = libcrux_intrinsics_avx2_mm256_add_epi32( @@ -4939,17 +4123,11 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( libcrux_ml_kem_vector_avx2_compress_mulhi_mm256_epi32(compressed_low0, compression_factor); __m256i compressed_low2 = libcrux_intrinsics_avx2_mm256_srli_epi32( - (int32_t)3, - /* Due to the mulhi_mm256_epi32 we've already shifted right by 32 bits, we - just need to shift right by 35 - 32 = 3 more. */ - compressed_low1, __m256i); + (int32_t)3, compressed_low1, __m256i); __m256i compressed_low3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_low2, coefficient_bits_mask); __m128i coefficients_high = libcrux_intrinsics_avx2_mm256_extracti128_si256( - (int32_t)1, - /* ---- Compress the next 8 coefficients ---- Take the upper 128 bits, - i.e. the next 8 16-bit coefficients */ - vector, __m128i); + (int32_t)1, vector, __m128i); __m256i coefficients_high0 = libcrux_intrinsics_avx2_mm256_cvtepi16_epi32(coefficients_high); __m256i compressed_high = libcrux_intrinsics_avx2_mm256_slli_epi32( @@ -4963,23 +4141,10 @@ libcrux_ml_kem_vector_avx2_compress_compress_ciphertext_coefficient_f4( (int32_t)3, compressed_high1, __m256i); __m256i compressed_high3 = libcrux_intrinsics_avx2_mm256_and_si256( compressed_high2, coefficient_bits_mask); - __m256i compressed = - libcrux_intrinsics_avx2_mm256_packs_epi32(/* Combining them, and grouping - each set of 64-bits, this - function results in: 0: low - low low low | 1: high high - high high | 2: low low low - low | 3: high high high high - where each |low| and |high| - is a 16-bit element */ - compressed_low3, - compressed_high3); - return libcrux_intrinsics_avx2_mm256_permute4x64_epi64( - (int32_t)216, - /* To be in the right order, we need to move the |low|s above in position - 2 to position 1 and the |high|s in position 1 to position 2, and leave - the rest unchanged. */ - compressed, __m256i); + __m256i compressed = libcrux_intrinsics_avx2_mm256_packs_epi32( + compressed_low3, compressed_high3); + return libcrux_intrinsics_avx2_mm256_permute4x64_epi64((int32_t)216, + compressed, __m256i); } /** @@ -5010,11 +4175,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; __m256i coefficients = libcrux_ml_kem_vector_avx2_compress_09_f4( libcrux_ml_kem_vector_traits_to_unsigned_representative_61( @@ -5106,10 +4267,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -5122,7 +4280,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_230 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_b4( copy_of_prf_input, domain_separator0); @@ -5131,7 +4288,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; libcrux_ml_kem_hash_functions_avx2_PRF_a9_410( Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -5139,12 +4296,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( libcrux_ml_kem_sampling_sample_from_binomial_distribution_89( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 u[3U]; - libcrux_ml_kem_matrix_compute_vector_u_ab(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ - public_key->A, r_as_ntt, error_1, + libcrux_ml_kem_matrix_compute_vector_u_ab(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 message_as_ring_element = libcrux_ml_kem_serialize_deserialize_then_decompress_message_61( @@ -5154,14 +4309,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_74( public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_ind_cpa_compress_then_serialize_u_8c( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____6 = v; libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_ed( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -5654,18 +4807,11 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_61( libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *self, libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; __m256i coefficient_normal_form = libcrux_ml_kem_vector_traits_to_standard_domain_61( - self->coefficients[/* The coefficients are of the form aR^{-1} mod - q, which means calling to_montgomery_domain() - on them should return a mod q. */ - j]); + self->coefficients[j]); self->coefficients[j] = libcrux_ml_kem_vector_avx2_barrett_reduce_09( libcrux_ml_kem_vector_avx2_add_09(coefficient_normal_form, &error->coefficients[j])); @@ -5696,8 +4842,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_ab( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_f6 uu____0 = libcrux_ml_kem_polynomial_ZERO_ef_61(); t_as_ntt[i0] = uu____0; @@ -5778,9 +4922,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_d8_be(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -5813,8 +4955,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_22( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_matrix_compute_As_plus_e_ab( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -5950,18 +5092,12 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_8c( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key) { uint8_t public_key_serialized[1184U]; - libcrux_ml_kem_ind_cpa_serialize_public_key_ed(/* pk := (Encode_12(tˆ - mod^{+}q) || ρ) */ - public_key->t_as_ntt, - Eurydice_array_to_slice( - (size_t)32U, - public_key->seed_for_A, - uint8_t), - public_key_serialized); + libcrux_ml_kem_ind_cpa_serialize_public_key_ed( + public_key->t_as_ntt, + Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), + public_key_serialized); uint8_t secret_key_serialized[1152U]; - libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(/* sk := Encode_12(sˆ mod^{+}q) - */ - private_key->secret_as_ntt, + libcrux_ml_kem_ind_cpa_serialize_secret_key_ed(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; @@ -6633,9 +5769,7 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_63 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_33_be(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_33_be(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6668,8 +5802,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_220( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_f6)); libcrux_ml_kem_matrix_compute_As_plus_e_ab( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6834,10 +5968,7 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_ae( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; libcrux_ml_kem_hash_functions_avx2_H_a9_e0( - Eurydice_array_to_subslice2(/* Eurydice can't access values directly on - the types. We need to go to the `value` - directly. */ - private_key->value, (size_t)384U * (size_t)3U, + Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U, (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); @@ -7797,10 +6928,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_b3( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_ed( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -7868,7 +6995,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_b3( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_f6 sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -7891,15 +7018,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_bf( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_63 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_ab( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_f6(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -7934,10 +7058,7 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_e2( Eurydice_slice ind_cpa_public_key_hash = uu____0.thd; Eurydice_slice implicit_rejection_value = uu____0.f3; Eurydice_slice uu____1 = Eurydice_array_to_slice( - (size_t)3U, - /* XXX: We need to copy_from_slice here because karamel can't handle the - assignment cf. https://github.com/FStarLang/karamel/pull/491 */ - key_pair->private_key.ind_cpa_private_key.secret_as_ntt, + (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt, libcrux_ml_kem_polynomial_PolynomialRingElement_f6); libcrux_ml_kem_polynomial_PolynomialRingElement_f6 ret[3U]; libcrux_ml_kem_ind_cpa_deserialize_secret_key_ab(ind_cpa_secret_key, ret); diff --git a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h index 7a9446452..8f0de6a3e 100644 --- a/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h +++ b/libcrux-ml-kem/cg/libcrux_mlkem768_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_mlkem768_portable_H @@ -1235,28 +1235,11 @@ libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( static inline uint8_t libcrux_ml_kem_vector_portable_compress_compress_message_coefficient( uint16_t fe) { - int16_t shifted = - (int16_t)1664 - - (int16_t) /* The approach used here is inspired by: - https://github.com/cloudflare/circl/blob/main/pke/kyber/internal/common/poly.go#L150 - If 833 <= fe <= 2496, then -832 <= shifted <= 831 */ - fe; - int16_t mask = - /* If shifted < 0, then (shifted >> 15) ^ shifted = flip_bits(shifted) = - -shifted - 1, and so if -832 <= shifted < 0 then 0 < shifted_positive - <= 831 If shifted >= 0 then (shifted >> 15) ^ shifted = shifted, and so - if 0 <= shifted <= 831 then 0 <= shifted_positive <= 831 */ - shifted - - >> 15U; + int16_t shifted = (int16_t)1664 - (int16_t)fe; + int16_t mask = shifted >> 15U; int16_t shifted_to_positive = mask ^ shifted; int16_t shifted_positive_in_range = shifted_to_positive - (int16_t)832; - int16_t r0 = - /* If x <= 831, then x - 832 <= -1, and so x - 832 < 0, which means the - most significant bit of shifted_positive_in_range will be 1. */ - shifted_positive_in_range - - >> 15U; + int16_t r0 = shifted_positive_in_range >> 15U; int16_t r1 = r0 & (int16_t)1; return (uint8_t)r1; } @@ -1293,16 +1276,7 @@ libcrux_ml_kem_vector_portable_arithmetic_get_n_least_significant_bits( static inline int16_t libcrux_ml_kem_vector_portable_compress_compress_ciphertext_coefficient( uint8_t coefficient_bits, uint16_t fe) { - uint64_t compressed = - (uint64_t) /* hax_debug_assert!( coefficient_bits == 4 || coefficient_bits - == 5 || coefficient_bits == 10 || coefficient_bits == 11 ); - hax_debug_assert!(fe <= (FIELD_MODULUS as u16)); This has to - be constant time due to: - https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/ldX0ThYJuBo/m/ovODsdY7AwAJ - */ - fe - - << (uint32_t)coefficient_bits; + uint64_t compressed = (uint64_t)fe << (uint32_t)coefficient_bits; compressed = compressed + 1664ULL; compressed = compressed * 10321340ULL; compressed = compressed >> 35U; @@ -2904,13 +2878,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer, size_t _initial_coefficient_bound) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] + (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3006,11 +2974,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_poly_barrett_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( @@ -3285,11 +3249,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_to_ring_element_ef_1b( for (size_t i = (size_t)0U; i < Eurydice_slice_len( Eurydice_array_to_slice( - (size_t)16U, - /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - self->coefficients, + (size_t)16U, self->coefficients, libcrux_ml_kem_vector_portable_vector_type_PortableVector), libcrux_ml_kem_vector_portable_vector_type_PortableVector); i++) { @@ -3396,13 +3356,7 @@ libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_4_plus_8c( size_t *zeta_i, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re, size_t layer) { size_t step = (size_t)1U << (uint32_t)layer; - for (size_t i0 = (size_t)0U; - i0 < (size_t)128U >> - (uint32_t) /* The semicolon and parentheses at the end of loop are a - workaround for the following bug - https://github.com/hacspec/hax/issues/720 */ - layer; - i0++) { + for (size_t i0 = (size_t)0U; i0 < (size_t)128U >> (uint32_t)layer; i0++) { size_t round = i0; zeta_i[0U] = zeta_i[0U] - (size_t)1U; size_t offset = round * step * (size_t)2U; @@ -3433,10 +3387,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_invert_ntt_invert_ntt_montgomery_1b( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t zeta_i = - /* We only ever call this function after matrix/vector multiplication */ - LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT - - / (size_t)2U; + LIBCRUX_ML_KEM_CONSTANTS_COEFFICIENTS_IN_RING_ELEMENT / (size_t)2U; libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_1_8c(&zeta_i, re, (size_t)1U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_2_8c(&zeta_i, re, (size_t)2U); libcrux_ml_kem_invert_ntt_invert_ntt_at_layer_3_8c(&zeta_i, re, (size_t)3U); @@ -3640,16 +3591,11 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_unpacked_42( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d u_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(/* u := - Decompress_q(Decode_{d_u}(c), - d_u) */ - ciphertext, u_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_then_decompress_u_6c(ciphertext, u_as_ntt); libcrux_ml_kem_polynomial_PolynomialRingElement_1d v = libcrux_ml_kem_serialize_deserialize_then_decompress_ring_element_v_89( - Eurydice_array_to_subslice_from( - (size_t)1088U, - /* v := Decompress_q(Decode_{d_v}(c + d_u·k·n / 8), d_v) */ - ciphertext, (size_t)960U, uint8_t, size_t)); + Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, + (size_t)960U, uint8_t, size_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message = libcrux_ml_kem_matrix_compute_message_1b(&v, secret_key->secret_as_ntt, u_as_ntt); @@ -3671,8 +3617,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_decrypt_42( Eurydice_slice secret_key, uint8_t *ciphertext, uint8_t ret[32U]) { libcrux_ml_kem_polynomial_PolynomialRingElement_1d secret_as_ntt[3U]; - libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b( - /* sˆ := Decode_12(sk) */ secret_key, secret_as_ntt); + libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(secret_key, secret_as_ntt); /* Passing arrays by value in Rust generates a copy in C */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d copy_of_secret_as_ntt[3U]; memcpy( @@ -4207,10 +4152,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_sampling_sample_from_xof_2b( memcpy(copy_of_randomness0, randomness0, (size_t)3U * sizeof(uint8_t[504U])); bool done = libcrux_ml_kem_sampling_sample_from_uniform_distribution_next_89( copy_of_randomness0, sampled_coefficients, out); - /* Requiring more than 5 blocks to sample a ring element should be very - * unlikely according to: https://eprint.iacr.org/2023/708.pdf To avoid - * failing here, we squeeze more blocks out of the state until we have enough. - */ while (true) { if (done) { break; @@ -4277,7 +4218,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_sample_matrix_A_2b( i++) { size_t j = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d sample = sampled[j]; - if (/* A[i][j] = A_transpose[j][i] */ transpose) { + if (transpose) { A_transpose[j][i1] = sample; } else { A_transpose[i1][j] = sample; @@ -4299,15 +4240,12 @@ libcrux_ml_kem_ind_cpa_build_unpacked_public_key_mut_3f( Eurydice_slice public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *unpacked_public_key) { - Eurydice_slice uu____0 = Eurydice_slice_subslice_to( - /* tˆ := Decode_12(pk) */ public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice uu____0 = + Eurydice_slice_subslice_to(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_serialize_deserialize_ring_elements_reduced_1b( uu____0, unpacked_public_key->t_as_ntt); Eurydice_slice seed = - Eurydice_slice_subslice_from(/* ρ := pk + 12·k·n / 8 for i from 0 to k−1 - do for j from 0 to k − 1 do AˆT[i][j] := - Parse(XOF(ρ, i, j)) end for end for */ - public_key, (size_t)1152U, uint8_t, size_t); + Eurydice_slice_subslice_from(public_key, (size_t)1152U, uint8_t, size_t); libcrux_ml_kem_polynomial_PolynomialRingElement_1d(*uu____1)[3U] = unpacked_public_key->A; uint8_t ret[34U]; @@ -4555,12 +4493,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_at_layer_7_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { size_t step = LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT / (size_t)2U; - for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - step; - i++) { + for (size_t i = (size_t)0U; i < step; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector t = libcrux_ml_kem_vector_portable_multiply_by_constant_0d( @@ -4582,10 +4515,7 @@ with const generics static KRML_MUSTINLINE void libcrux_ml_kem_ntt_ntt_binomially_sampled_ring_element_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *re) { - libcrux_ml_kem_ntt_ntt_at_layer_7_8c(/* Due to the small coefficient bound, we - can skip the first round of Montgomery - reductions. */ - re); + libcrux_ml_kem_ntt_ntt_at_layer_7_8c(re); size_t zeta_i = (size_t)1U; libcrux_ml_kem_ntt_ntt_at_layer_4_plus_8c(&zeta_i, re, (size_t)6U, (size_t)11207U); @@ -4792,11 +4722,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_polynomial_add_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = @@ -4928,28 +4854,8 @@ libcrux_ml_kem_polynomial_add_message_error_reduce_ef_8c( libcrux_ml_kem_vector_portable_montgomery_multiply_by_constant_0d( result.coefficients[i0], (int16_t)1441); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp = - libcrux_ml_kem_vector_portable_add_0d( - self->coefficients - [/* FIXME: Eurydice crashes with: Warning 11: in - top-level declaration - libcrux_ml_kem.polynomial.{libcrux_ml_kem::polynomial::PolynomialRingElement[TraitClause@0]}.add_message_error_reduce__libcrux_ml_kem_libcrux_polynomials_PortableVector: - this expression is not Low*; the enclosing - function cannot be translated into C*: let - mutable ret(Mark.Present,(Mark.AtMost 2), ): - int16_t[16size_t] = $any in - libcrux_ml_kem.libcrux_polynomials.{(libcrux_ml_kem::libcrux_polynomials::libcrux_traits::Operations␣for␣libcrux_ml_kem::libcrux_polynomials::PortableVector)}.add - ((@9: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:int16_t[16size_t][16size_t])[@4] - &(((@8: - libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t]*)[0uint32_t]:libcrux_ml_kem_libcrux_polynomials_PortableVector[16size_t])[@4]) - @0; @0 Warning 11 is fatal, exiting. On the - following code: ```rust result.coefficients[i] - = Vector::barrett_reduce(Vector::add( - coefficient_normal_form, - &Vector::add(self.coefficients[i], - &message.coefficients[i]), )); ``` */ - i0], - &message->coefficients[i0]); + libcrux_ml_kem_vector_portable_add_0d(self->coefficients[i0], + &message->coefficients[i0]); libcrux_ml_kem_vector_portable_vector_type_PortableVector tmp0 = libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, &tmp); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = @@ -5206,11 +5112,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_4_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient = libcrux_ml_kem_vector_portable_compress_0d_d1( @@ -5270,11 +5172,7 @@ libcrux_ml_kem_serialize_compress_then_serialize_5_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d re, Eurydice_slice serialized) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t i0 = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficients = libcrux_ml_kem_vector_portable_compress_0d_f4( @@ -5366,10 +5264,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key, uint8_t message[32U], Eurydice_slice randomness, uint8_t ret[1088U]) { uint8_t prf_input[33U]; - libcrux_ml_kem_utils_into_padded_array_c8(/* for i from 0 to k−1 do r[i] := - CBD{η1}(PRF(r, N)) N := N + 1 end - for rˆ := NTT(r) */ - randomness, prf_input); + libcrux_ml_kem_utils_into_padded_array_c8(randomness, prf_input); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input0[33U]; memcpy(copy_of_prf_input0, prf_input, (size_t)33U * sizeof(uint8_t)); @@ -5382,7 +5277,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( uint8_t domain_separator0 = uu____1.snd; /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_prf_input[33U]; - /* for i from 0 to k−1 do e1[i] := CBD_{η2}(PRF(r,N)) N := N + 1 end for */ memcpy(copy_of_prf_input, prf_input, (size_t)33U * sizeof(uint8_t)); tuple_23 uu____3 = libcrux_ml_kem_ind_cpa_sample_ring_element_cbd_3b( copy_of_prf_input, domain_separator0); @@ -5391,7 +5285,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( error_1, uu____3.fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); uint8_t domain_separator = uu____3.snd; - prf_input[32U] = /* e_2 := CBD{η2}(PRF(r, N)) */ domain_separator; + prf_input[32U] = domain_separator; uint8_t prf_output[128U]; libcrux_ml_kem_hash_functions_portable_PRF_f1_410( Eurydice_array_to_slice((size_t)33U, prf_input, uint8_t), prf_output); @@ -5399,12 +5293,10 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( libcrux_ml_kem_sampling_sample_from_binomial_distribution_a0( Eurydice_array_to_slice((size_t)128U, prf_output, uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d u[3U]; - libcrux_ml_kem_matrix_compute_vector_u_1b(/* u := NTT^{-1}(AˆT ◦ rˆ) + e_1 */ - public_key->A, r_as_ntt, error_1, + libcrux_ml_kem_matrix_compute_vector_u_1b(public_key->A, r_as_ntt, error_1, u); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_message[32U]; - /* v := NTT^{−1}(tˆT ◦ rˆ) + e_2 + Decompress_q(Decode_1(m),1) */ memcpy(copy_of_message, message, (size_t)32U * sizeof(uint8_t)); libcrux_ml_kem_polynomial_PolynomialRingElement_1d message_as_ring_element = libcrux_ml_kem_serialize_deserialize_then_decompress_message_8c( @@ -5414,14 +5306,12 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_encrypt_unpacked_2a( public_key->t_as_ntt, r_as_ntt, &error_2, &message_as_ring_element); uint8_t ciphertext[1088U] = {0U}; libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____5[3U]; - /* c_1 := Encode_{du}(Compress_q(u,d_u)) */ memcpy( uu____5, u, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_ind_cpa_compress_then_serialize_u_43( uu____5, Eurydice_array_to_subslice2(ciphertext, (size_t)0U, (size_t)960U, uint8_t)); - /* c_2 := Encode_{dv}(Compress_q(v,d_v)) */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____6 = v; libcrux_ml_kem_serialize_compress_then_serialize_ring_element_v_6c( uu____6, Eurydice_array_to_subslice_from((size_t)1088U, ciphertext, @@ -5847,20 +5737,12 @@ libcrux_ml_kem_polynomial_add_standard_error_reduce_ef_8c( libcrux_ml_kem_polynomial_PolynomialRingElement_1d *self, libcrux_ml_kem_polynomial_PolynomialRingElement_1d *error) { for (size_t i = (size_t)0U; - i < - /* The semicolon and parentheses at the end of loop are a workaround for - the following bug https://github.com/hacspec/hax/issues/720 */ - LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; - i++) { + i < LIBCRUX_ML_KEM_POLYNOMIAL_VECTORS_IN_RING_ELEMENT; i++) { size_t j = i; libcrux_ml_kem_vector_portable_vector_type_PortableVector coefficient_normal_form = libcrux_ml_kem_vector_traits_to_standard_domain_8c( - self->coefficients[/* The coefficients are of the form aR^{-1} - mod q, which means calling - to_montgomery_domain() on them should - return a mod q. */ - j]); + self->coefficients[j]); libcrux_ml_kem_vector_portable_vector_type_PortableVector uu____0 = libcrux_ml_kem_vector_portable_barrett_reduce_0d( libcrux_ml_kem_vector_portable_add_0d(coefficient_normal_form, @@ -5892,8 +5774,6 @@ static KRML_MUSTINLINE void libcrux_ml_kem_matrix_compute_As_plus_e_1b( i++) { size_t i0 = i; libcrux_ml_kem_polynomial_PolynomialRingElement_1d *row = matrix_A[i0]; - /* This may be externally provided memory. Ensure that `t_as_ntt` is all 0. - */ libcrux_ml_kem_polynomial_PolynomialRingElement_1d uu____0 = libcrux_ml_kem_polynomial_ZERO_ef_8c(); t_as_ntt[i0] = uu____0; @@ -5973,9 +5853,7 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_d8_9c(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6008,8 +5886,8 @@ static KRML_MUSTINLINE void libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_matrix_compute_As_plus_e_1b( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6141,18 +6019,12 @@ libcrux_ml_kem_ind_cpa_serialize_unpacked_secret_key_43( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key) { uint8_t public_key_serialized[1184U]; - libcrux_ml_kem_ind_cpa_serialize_public_key_6c(/* pk := (Encode_12(tˆ - mod^{+}q) || ρ) */ - public_key->t_as_ntt, - Eurydice_array_to_slice( - (size_t)32U, - public_key->seed_for_A, - uint8_t), - public_key_serialized); + libcrux_ml_kem_ind_cpa_serialize_public_key_6c( + public_key->t_as_ntt, + Eurydice_array_to_slice((size_t)32U, public_key->seed_for_A, uint8_t), + public_key_serialized); uint8_t secret_key_serialized[1152U]; - libcrux_ml_kem_ind_cpa_serialize_secret_key_89(/* sk := Encode_12(sˆ mod^{+}q) - */ - private_key->secret_as_ntt, + libcrux_ml_kem_ind_cpa_serialize_secret_key_89(private_key->secret_as_ntt, secret_key_serialized); /* Passing arrays by value in Rust generates a copy in C */ uint8_t copy_of_secret_key_serialized[1152U]; @@ -6733,9 +6605,7 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0( libcrux_ml_kem_ind_cpa_unpacked_IndCpaPrivateKeyUnpacked_a0 *private_key, libcrux_ml_kem_ind_cpa_unpacked_IndCpaPublicKeyUnpacked_a0 *public_key) { uint8_t hashed[64U]; - libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(/* (ρ,σ) := G(d) for Kyber, (ρ,σ) - := G(d || K) for ML-KEM */ - key_generation_seed, hashed); + libcrux_ml_kem_variant_cpa_keygen_seed_33_9c(key_generation_seed, hashed); Eurydice_slice_uint8_t_x2 uu____0 = Eurydice_slice_split_at( Eurydice_array_to_slice((size_t)64U, hashed, uint8_t), (size_t)32U, uint8_t, Eurydice_slice_uint8_t_x2); @@ -6768,8 +6638,8 @@ libcrux_ml_kem_ind_cpa_generate_keypair_unpacked_1c0( .fst, (size_t)3U * sizeof(libcrux_ml_kem_polynomial_PolynomialRingElement_1d)); libcrux_ml_kem_matrix_compute_As_plus_e_1b( - /* tˆ := Aˆ ◦ sˆ + eˆ */ public_key->t_as_ntt, public_key->A, - private_key->secret_as_ntt, error_as_ntt); + public_key->t_as_ntt, public_key->A, private_key->secret_as_ntt, + error_as_ntt); uint8_t uu____5[32U]; Result_fb dst; Eurydice_slice_to_array2(&dst, seed_for_A, Eurydice_slice, uint8_t[32U]); @@ -6907,10 +6777,7 @@ static KRML_MUSTINLINE bool libcrux_ml_kem_ind_cca_validate_private_key_only_d6( libcrux_ml_kem_types_MlKemPrivateKey_d9 *private_key) { uint8_t t[32U]; libcrux_ml_kem_hash_functions_portable_H_f1_e0( - Eurydice_array_to_subslice2(/* Eurydice can't access values directly on - the types. We need to go to the `value` - directly. */ - private_key->value, (size_t)384U * (size_t)3U, + Eurydice_array_to_subslice2(private_key->value, (size_t)384U * (size_t)3U, (size_t)768U * (size_t)3U + (size_t)32U, uint8_t), t); @@ -7728,10 +7595,7 @@ libcrux_ml_kem_ind_cca_unpacked_keys_from_private_key_df( Eurydice_slice ind_cpa_public_key_hash = uu____0.thd; Eurydice_slice implicit_rejection_value = uu____0.f3; Eurydice_slice uu____1 = Eurydice_array_to_slice( - (size_t)3U, - /* XXX: We need to copy_from_slice here because karamel can't handle the - assignment cf. https://github.com/FStarLang/karamel/pull/491 */ - key_pair->private_key.ind_cpa_private_key.secret_as_ntt, + (size_t)3U, key_pair->private_key.ind_cpa_private_key.secret_as_ntt, libcrux_ml_kem_polynomial_PolynomialRingElement_1d); libcrux_ml_kem_polynomial_PolynomialRingElement_1d ret[3U]; libcrux_ml_kem_ind_cpa_deserialize_secret_key_1b(ind_cpa_secret_key, ret); diff --git a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h index 5955882fa..7a519bf7c 100644 --- a/libcrux-ml-kem/cg/libcrux_sha3_avx2.h +++ b/libcrux-ml-kem/cg/libcrux_sha3_avx2.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_avx2_H @@ -104,9 +104,7 @@ libcrux_sha3_simd_avx2_and_not_xor_ef(__m256i a, __m256i b, __m256i c) { KRML_ATTRIBUTE_TARGET("avx2") static KRML_MUSTINLINE __m256i libcrux_sha3_simd_avx2__veorq_n_u64(__m256i a, uint64_t c) { - __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x( - (int64_t) /* Casting here is required, doesn't change the value. */ - c); + __m256i c0 = libcrux_intrinsics_avx2_mm256_set1_epi64x((int64_t)c); return libcrux_intrinsics_avx2_mm256_xor_si256(a, c0); } @@ -1701,7 +1699,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_5b( __m256i); __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256( (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], @@ -2036,15 +2034,7 @@ static KRML_MUSTINLINE void libcrux_sha3_avx2_x4_shake256( Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice input2, Eurydice_slice input3, Eurydice_slice out0, Eurydice_slice out1, Eurydice_slice out2, Eurydice_slice out3) { - Eurydice_slice buf0[4U] = { - /* XXX: These functions could alternatively implement the same with the - portable implementation #[cfg(feature = "simd128")] { keccakx2::<136, - 0x1fu8>([input0, input1], [out0, out1]); keccakx2::<136, - 0x1fu8>([input2, input3], [out2, out3]); } { keccakx1::<136, - 0x1fu8>([input0], [out0]); keccakx1::<136, 0x1fu8>([input1], [out1]); - keccakx1::<136, 0x1fu8>([input2], [out2]); keccakx1::<136, - 0x1fu8>([input3], [out3]); } */ - input0, input1, input2, input3}; + Eurydice_slice buf0[4U] = {input0, input1, input2, input3}; Eurydice_slice buf[4U] = {out0, out1, out2, out3}; libcrux_sha3_generic_keccak_keccak_fb(buf0, buf); } @@ -2284,7 +2274,7 @@ static KRML_MUSTINLINE void libcrux_sha3_simd_avx2_store_block_3a( __m256i); __m256i v1h = libcrux_intrinsics_avx2_mm256_permute2x128_si256( (int32_t)32, - s[((size_t)4U * /* 0 0 2 2 */ i0 + (size_t)1U) / (size_t)5U] + s[((size_t)4U * i0 + (size_t)1U) / (size_t)5U] [((size_t)4U * i0 + (size_t)1U) % (size_t)5U], s[((size_t)4U * i0 + (size_t)3U) / (size_t)5U] [((size_t)4U * i0 + (size_t)3U) % (size_t)5U], diff --git a/libcrux-ml-kem/cg/libcrux_sha3_portable.h b/libcrux-ml-kem/cg/libcrux_sha3_portable.h index 211cf1919..a606f5f71 100644 --- a/libcrux-ml-kem/cg/libcrux_sha3_portable.h +++ b/libcrux-ml-kem/cg/libcrux_sha3_portable.h @@ -4,11 +4,11 @@ * SPDX-License-Identifier: MIT or Apache-2.0 * * This code was generated with the following revisions: - * Charon: 3a133fe0eee9bd3928d5bb16c24ddd2dd0f3ee7f - * Eurydice: 1fff1c51ae6e6c87eafd28ec9d5594f54bc91c0c - * Karamel: c31a22c1e07d2118c07ee5cebb640d863e31a198 - * F*: 2c32d6e230851bbceadac7a21fc418fa2bb7e4bc - * Libcrux: cbc0d48933fbcbffaaf1f817d7fbd4047a7630a1 + * Charon: 45f5a34f336e35c6cc2253bc90cbdb8d812cefa9 + * Eurydice: e2db6e88adc9995ca9d3dedf7fa9bc4095e9ca20 + * Karamel: 8c3612018c25889288da6857771be3ad03b75bcd + * F*: 5643e656b989aca7629723653a2570c7df6252b9 + * Libcrux: fbef3649fa222b800fc7dcc349855bcd7de48e36 */ #ifndef __libcrux_sha3_portable_H @@ -1654,7 +1654,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_96( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e(copy_of_data, out); } @@ -2013,7 +2012,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e0(copy_of_data, out); } @@ -2142,7 +2140,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_ad0( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e1(copy_of_data, out); } @@ -2749,7 +2746,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_1e( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e2(copy_of_data, out); } @@ -3108,7 +3104,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_7c( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e3(copy_of_data, out); } @@ -3404,7 +3399,6 @@ static KRML_MUSTINLINE void libcrux_sha3_portable_keccakx1_c6( Eurydice_slice data[1U], Eurydice_slice out[1U]) { /* Passing arrays by value in Rust generates a copy in C */ Eurydice_slice copy_of_data[1U]; - /* generic_keccak::keccak_xof::<1, u64, RATE, DELIM>(data, out); or */ memcpy(copy_of_data, data, (size_t)1U * sizeof(Eurydice_slice)); libcrux_sha3_generic_keccak_keccak_9e4(copy_of_data, out); } @@ -3502,7 +3496,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_shake256(Eurydice_slice input0, Eurydice_slice input1, Eurydice_slice out0, Eurydice_slice out1) { - /* TODO: make argument ordering consistent */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3517,9 +3510,6 @@ typedef struct libcrux_sha3_neon_x2_incremental_KeccakState_s { */ static KRML_MUSTINLINE libcrux_sha3_neon_x2_incremental_KeccakState libcrux_sha3_neon_x2_incremental_init(void) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let s0 = KeccakState::new(); let s1 = - * KeccakState::new(); [s0, s1] } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3532,10 +3522,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3549,10 +3535,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_first_three_blocks( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_first_three_blocks(&mut s0, out0); - * shake128_squeeze_first_three_blocks(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3566,10 +3548,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake128_squeeze_next_block( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice out0, Eurydice_slice out1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_squeeze_next_block(&mut s0, out0); - * shake128_squeeze_next_block(&mut s1, out1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3594,10 +3572,6 @@ static KRML_MUSTINLINE void libcrux_sha3_neon_x2_incremental_shake256_absorb_final( libcrux_sha3_neon_x2_incremental_KeccakState *s, Eurydice_slice data0, Eurydice_slice data1) { - /* XXX: These functions could alternatively implement the same with the - * portable implementation { let [mut s0, mut s1] = s; - * shake128_absorb_final(&mut s0, data0); shake128_absorb_final(&mut s1, - * data1); } */ KRML_HOST_EPRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, "panic!"); KRML_HOST_EXIT(255U); @@ -3759,13 +3733,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c6( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)136U) { - consumed = (size_t)136U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)136U) { + consumed = (size_t)136U - self->buf_len; for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -3871,9 +3840,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c6( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c6(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; @@ -4220,13 +4187,8 @@ static inline size_t libcrux_sha3_generic_keccak_fill_buffer_8b_c60( size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); size_t consumed = (size_t)0U; if (self->buf_len > (size_t)0U) { - if ( - /* There's something buffered internally to consume. */ self->buf_len + - input_len >= - (size_t)168U) { - consumed = (size_t)168U - /* We have enough data when combining the - internal buffer and the input. */ - self->buf_len; + if (self->buf_len + input_len >= (size_t)168U) { + consumed = (size_t)168U - self->buf_len; for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; Eurydice_slice uu____0 = Eurydice_array_to_subslice_from( @@ -4332,9 +4294,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_absorb_8b_c60( memcpy(copy_of_inputs, inputs, (size_t)1U * sizeof(Eurydice_slice)); size_t input_remainder_len = libcrux_sha3_generic_keccak_absorb_full_8b_c60(uu____0, copy_of_inputs); - if ( - /* ... buffer the rest if there's not enough input (left). */ - input_remainder_len > (size_t)0U) { + if (input_remainder_len > (size_t)0U) { size_t input_len = Eurydice_slice_len(inputs[0U], uint8_t); for (size_t i = (size_t)0U; i < (size_t)1U; i++) { size_t i0 = i; @@ -4724,13 +4684,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( size_t blocks = out_len / (size_t)136U; size_t last = out_len - out_len % (size_t)136U; size_t mid; - if ((size_t)136U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)136U >= out_len) { mid = out_len; } else { mid = (size_t)136U; @@ -4744,11 +4698,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( libcrux_sha3_portable_keccak_store_5a_5b(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -4757,11 +4708,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c6( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)136U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice)); @@ -4856,13 +4803,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( size_t blocks = out_len / (size_t)168U; size_t last = out_len - out_len % (size_t)168U; size_t mid; - if ((size_t)168U >= - /* Squeeze out one to start with. XXX: Eurydice does not extract - `core::cmp::min`, so we do this instead. (cf. - https://github.com/AeneasVerif/eurydice/issues/49) */ - out_len - - ) { + if ((size_t)168U >= out_len) { mid = out_len; } else { mid = (size_t)168U; @@ -4876,11 +4817,8 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( libcrux_sha3_portable_keccak_store_5a_3a(self->inner.st, out00); core_ops_range_Range_08 iter = core_iter_traits_collect___core__iter__traits__collect__IntoIterator_for_I__1__into_iter( - (CLITERAL(core_ops_range_Range_08){ - .start = (size_t)1U, - .end = /* If we got asked for more than one block, squeeze out - more. */ - blocks}), + (CLITERAL(core_ops_range_Range_08){.start = (size_t)1U, + .end = blocks}), core_ops_range_Range_08, core_ops_range_Range_08); while (true) { if (core_iter_range___core__iter__traits__iterator__Iterator_for_core__ops__range__Range_A__TraitClause_0___6__next( @@ -4889,11 +4827,7 @@ static KRML_MUSTINLINE void libcrux_sha3_generic_keccak_squeeze_8b_c60( break; } else { Eurydice_slice_uint8_t_1size_t__x2 uu____1 = - libcrux_sha3_portable_keccak_split_at_mut_n_5a(/* Here we know that we - always have full - blocks to write out. - */ - out_rest, + libcrux_sha3_portable_keccak_split_at_mut_n_5a(out_rest, (size_t)168U); Eurydice_slice out0[1U]; memcpy(out0, uu____1.fst, (size_t)1U * sizeof(Eurydice_slice));