diff --git a/Cargo.toml b/Cargo.toml index 67610c780..153cf07a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,6 +121,9 @@ lto = "fat" codegen-units = 1 panic = "abort" +[profile.dev.package."libcrux-ml-dsa"] +opt-level = 1 + [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ 'cfg(hax)', diff --git a/libcrux-ml-dsa/src/ml_dsa_generic.rs b/libcrux-ml-dsa/src/ml_dsa_generic.rs index 9b0443525..987f99621 100644 --- a/libcrux-ml-dsa/src/ml_dsa_generic.rs +++ b/libcrux-ml-dsa/src/ml_dsa_generic.rs @@ -52,9 +52,8 @@ pub(crate) fn generate_key_pair< let (seed_for_error_vectors, seed_for_signing) = seed_expanded.split_at(SEED_FOR_ERROR_VECTORS_SIZE); - let a_as_ntt = samplex4::matrix_A::( - into_padded_array(seed_for_a), - ); + let a_as_ntt = + samplex4::matrix_A::(into_padded_array(seed_for_a)); let (s1, s2) = samplex4::sample_s1_and_s2::( into_padded_array(seed_for_error_vectors), @@ -246,9 +245,8 @@ pub(crate) fn sign_internal< SIGNING_KEY_SIZE, >(signing_key); - let A_as_ntt = samplex4::matrix_A::( - into_padded_array(&seed_for_A), - ); + let A_as_ntt = + samplex4::matrix_A::(into_padded_array(&seed_for_A)); let mut message_representative = [0; MESSAGE_REPRESENTATIVE_SIZE]; derive_message_representative( @@ -492,9 +490,8 @@ pub(crate) fn verify_internal< signature.signer_response, (2 << GAMMA1_EXPONENT) - BETA, ) { - let A_as_ntt = samplex4::matrix_A::( - into_padded_array(&seed_for_A), - ); + let A_as_ntt = + samplex4::matrix_A::(into_padded_array(&seed_for_A)); let mut verification_key_hash = [0; BYTES_FOR_VERIFICATION_KEY_HASH]; Shake256::shake256::( diff --git a/libcrux-ml-dsa/src/sample.rs b/libcrux-ml-dsa/src/sample.rs index 99e7d33f2..16f2b1f65 100644 --- a/libcrux-ml-dsa/src/sample.rs +++ b/libcrux-ml-dsa/src/sample.rs @@ -31,8 +31,7 @@ fn rejection_sample_less_than_field_modulus( done } -#[inline(always)] -pub(crate) fn sample_four_ring_elements( +pub(crate) fn sample_four_ring_elements( mut seed0: [u8; 34], domain_separator0: u16, domain_separator1: u16, @@ -44,6 +43,8 @@ pub(crate) fn sample_four_ring_elements, PolynomialRingElement, ) { + use crate::hash_functions::shake128::XofX4; + // Prepare the seeds seed0[32] = domain_separator0 as u8; seed0[33] = (domain_separator0 >> 8) as u8; @@ -60,7 +61,12 @@ pub(crate) fn sample_four_ring_elements> 8) as u8; - let mut state = Shake128::init_absorb(&seed0, &seed1, &seed2, &seed3); + // FIXME: We use the portable implementation here, since the + // compiler has an easier time optimizing it, compared to the AVX2 + // version, which actually results in faster code (except for key + // generation), even in the AVX2 instantiation of ML-DSA. + let mut state = + crate::hash_functions::portable::Shake128X4::init_absorb(&seed0, &seed1, &seed2, &seed3); let mut randomness0 = [0u8; shake128::FIVE_BLOCKS_SIZE]; let mut randomness1 = [0u8; shake128::FIVE_BLOCKS_SIZE]; @@ -483,10 +489,10 @@ mod tests { // This is just a wrapper around sample_four_ring_elements, for testing // purposes. - fn sample_ring_element_uniform( + fn sample_ring_element_uniform( seed: [u8; 34], ) -> PolynomialRingElement { - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, ((seed[33] as u16) << 8) | (seed[32] as u16), 0, @@ -554,7 +560,7 @@ mod tests { ]; assert_eq!( - sample_ring_element_uniform::(seed).to_i32_array(), + sample_ring_element_uniform::(seed).to_i32_array(), expected_coefficients ); @@ -568,8 +574,7 @@ mod tests { 0xB1, 0x83, 0x9B, 0x86, 0x06, 0xF5, 0x94, 0x8B, 0x9D, 0x72, 0xA9, 0x56, 0xDC, 0xF1, 0x01, 0x16, 0xDA, 0x9E, 0x01, 0x00, ]; - let actual_coefficients = - sample_ring_element_uniform::(seed).to_i32_array(); + let actual_coefficients = sample_ring_element_uniform::(seed).to_i32_array(); assert_eq!(actual_coefficients[0], 1_165_602); assert_eq!( diff --git a/libcrux-ml-dsa/src/samplex4.rs b/libcrux-ml-dsa/src/samplex4.rs index 918deb8ce..1ac7e7530 100644 --- a/libcrux-ml-dsa/src/samplex4.rs +++ b/libcrux-ml-dsa/src/samplex4.rs @@ -1,5 +1,5 @@ use crate::{ - hash_functions::{shake128, shake256}, + hash_functions::shake256, polynomial::PolynomialRingElement, sample::{sample_four_error_ring_elements, sample_four_ring_elements}, simd::traits::Operations, @@ -30,7 +30,6 @@ fn update_matrix( @@ -39,7 +38,7 @@ pub(crate) fn matrix_A_4_by_4< let mut A: Matrix = [[PolynomialRingElement::::ZERO(); COLUMNS_IN_A]; ROWS_IN_A]; - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(0, 0), generate_domain_separator(0, 1), @@ -51,7 +50,7 @@ pub(crate) fn matrix_A_4_by_4< update_matrix(&mut A, 0, 2, four_ring_elements.2); update_matrix(&mut A, 0, 3, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(1, 0), generate_domain_separator(1, 1), @@ -63,7 +62,7 @@ pub(crate) fn matrix_A_4_by_4< update_matrix(&mut A, 1, 2, four_ring_elements.2); update_matrix(&mut A, 1, 3, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(2, 0), generate_domain_separator(2, 1), @@ -75,7 +74,7 @@ pub(crate) fn matrix_A_4_by_4< update_matrix(&mut A, 2, 2, four_ring_elements.2); update_matrix(&mut A, 2, 3, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(3, 0), generate_domain_separator(3, 1), @@ -94,7 +93,6 @@ pub(crate) fn matrix_A_4_by_4< #[inline(always)] pub(crate) fn matrix_A_6_by_5< SIMDUnit: Operations, - Shake128X4: shake128::XofX4, const ROWS_IN_A: usize, const COLUMNS_IN_A: usize, >( @@ -102,7 +100,7 @@ pub(crate) fn matrix_A_6_by_5< ) -> [[PolynomialRingElement; COLUMNS_IN_A]; ROWS_IN_A] { let mut A = [[PolynomialRingElement::::ZERO(); COLUMNS_IN_A]; ROWS_IN_A]; - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(0, 0), generate_domain_separator(0, 1), @@ -114,7 +112,7 @@ pub(crate) fn matrix_A_6_by_5< update_matrix(&mut A, 0, 2, four_ring_elements.2); update_matrix(&mut A, 0, 3, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(0, 4), generate_domain_separator(1, 0), @@ -126,7 +124,7 @@ pub(crate) fn matrix_A_6_by_5< update_matrix(&mut A, 1, 1, four_ring_elements.2); update_matrix(&mut A, 1, 2, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(1, 3), generate_domain_separator(1, 4), @@ -138,7 +136,7 @@ pub(crate) fn matrix_A_6_by_5< update_matrix(&mut A, 2, 0, four_ring_elements.2); update_matrix(&mut A, 2, 1, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(2, 2), generate_domain_separator(2, 3), @@ -150,7 +148,7 @@ pub(crate) fn matrix_A_6_by_5< update_matrix(&mut A, 2, 4, four_ring_elements.2); update_matrix(&mut A, 3, 0, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(3, 1), generate_domain_separator(3, 2), @@ -162,7 +160,7 @@ pub(crate) fn matrix_A_6_by_5< update_matrix(&mut A, 3, 3, four_ring_elements.2); update_matrix(&mut A, 3, 4, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(4, 0), generate_domain_separator(4, 1), @@ -174,7 +172,7 @@ pub(crate) fn matrix_A_6_by_5< update_matrix(&mut A, 4, 2, four_ring_elements.2); update_matrix(&mut A, 4, 3, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(4, 4), generate_domain_separator(5, 0), @@ -187,7 +185,7 @@ pub(crate) fn matrix_A_6_by_5< update_matrix(&mut A, 5, 2, four_ring_elements.3); // The the last 2 sampled ring elements are discarded here. - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(5, 3), generate_domain_separator(5, 4), @@ -203,7 +201,6 @@ pub(crate) fn matrix_A_6_by_5< #[inline(always)] pub(crate) fn matrix_A_8_by_7< SIMDUnit: Operations, - Shake128X4: shake128::XofX4, const ROWS_IN_A: usize, const COLUMNS_IN_A: usize, >( @@ -211,7 +208,7 @@ pub(crate) fn matrix_A_8_by_7< ) -> [[PolynomialRingElement; COLUMNS_IN_A]; ROWS_IN_A] { let mut A = [[PolynomialRingElement::::ZERO(); COLUMNS_IN_A]; ROWS_IN_A]; - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(0, 0), generate_domain_separator(0, 1), @@ -223,7 +220,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 0, 2, four_ring_elements.2); update_matrix(&mut A, 0, 3, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(0, 4), generate_domain_separator(0, 5), @@ -235,7 +232,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 0, 6, four_ring_elements.2); update_matrix(&mut A, 1, 0, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(1, 1), generate_domain_separator(1, 2), @@ -247,7 +244,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 1, 3, four_ring_elements.2); update_matrix(&mut A, 1, 4, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(1, 5), generate_domain_separator(1, 6), @@ -259,7 +256,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 2, 0, four_ring_elements.2); update_matrix(&mut A, 2, 1, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(2, 2), generate_domain_separator(2, 3), @@ -271,7 +268,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 2, 4, four_ring_elements.2); update_matrix(&mut A, 2, 5, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(2, 6), generate_domain_separator(3, 0), @@ -283,7 +280,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 3, 1, four_ring_elements.2); update_matrix(&mut A, 3, 2, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(3, 3), generate_domain_separator(3, 4), @@ -295,7 +292,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 3, 5, four_ring_elements.2); update_matrix(&mut A, 3, 6, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(4, 0), generate_domain_separator(4, 1), @@ -307,7 +304,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 4, 2, four_ring_elements.2); update_matrix(&mut A, 4, 3, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(4, 4), generate_domain_separator(4, 5), @@ -319,7 +316,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 4, 6, four_ring_elements.2); update_matrix(&mut A, 5, 0, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(5, 1), generate_domain_separator(5, 2), @@ -331,7 +328,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 5, 3, four_ring_elements.2); update_matrix(&mut A, 5, 4, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(5, 5), generate_domain_separator(5, 6), @@ -343,7 +340,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 6, 0, four_ring_elements.2); update_matrix(&mut A, 6, 1, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(6, 2), generate_domain_separator(6, 3), @@ -355,7 +352,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 6, 4, four_ring_elements.2); update_matrix(&mut A, 6, 5, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(6, 6), generate_domain_separator(7, 0), @@ -367,7 +364,7 @@ pub(crate) fn matrix_A_8_by_7< update_matrix(&mut A, 7, 1, four_ring_elements.2); update_matrix(&mut A, 7, 2, four_ring_elements.3); - let four_ring_elements = sample_four_ring_elements::( + let four_ring_elements = sample_four_ring_elements::( seed, generate_domain_separator(7, 3), generate_domain_separator(7, 4), @@ -383,18 +380,13 @@ pub(crate) fn matrix_A_8_by_7< } #[allow(non_snake_case)] #[inline(always)] -pub(crate) fn matrix_A< - SIMDUnit: Operations, - Shake128X4: shake128::XofX4, - const ROWS_IN_A: usize, - const COLUMNS_IN_A: usize, ->( +pub(crate) fn matrix_A( seed: [u8; 34], ) -> [[PolynomialRingElement; COLUMNS_IN_A]; ROWS_IN_A] { match (ROWS_IN_A as u8, COLUMNS_IN_A as u8) { - (4, 4) => matrix_A_4_by_4::(seed), - (6, 5) => matrix_A_6_by_5::(seed), - (8, 7) => matrix_A_8_by_7::(seed), + (4, 4) => matrix_A_4_by_4::(seed), + (6, 5) => matrix_A_6_by_5::(seed), + (8, 7) => matrix_A_8_by_7::(seed), _ => unreachable!(), } }