Skip to content

Commit

Permalink
replace mul add sequence with fused mul_add
Browse files Browse the repository at this point in the history
  • Loading branch information
sarah el kazdadi committed Feb 22, 2024
1 parent fe7b914 commit fc24e96
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/fft128/f128_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ pub mod x86 {
#[inline(always)]
fn mul_f128x4(self, a0: f64x4, a1: f64x4, b0: f64x4, b1: f64x4) -> (f64x4, f64x4) {
let (p1, p2) = two_prod_f64x4(self, a0, b0);
let p2 = self.add_f64x4(p2, self.mul_add_f64x4(a0, b1, self.mul_f64x4(a1, b0)));
let p2 = self.mul_add_f64x4(a0, b1, self.mul_add_f64x4(a1, b0, p2));
quick_two_sum_f64x4(self, p1, p2)
}
}
Expand Down Expand Up @@ -885,7 +885,7 @@ pub mod x86 {
#[inline(always)]
fn mul_f128x8(self, a0: f64x8, a1: f64x8, b0: f64x8, b1: f64x8) -> (f64x8, f64x8) {
let (p1, p2) = two_prod_f64x8(self, a0, b0);
let p2 = self.add_f64x8(p2, self.mul_add_f64x8(a0, b1, self.mul_f64x8(a1, b0)));
let p2 = self.mul_add_f64x8(a0, b1, self.mul_add_f64x8(a1, b0, p2));
quick_two_sum_f64x8(self, p1, p2)
}

Expand Down Expand Up @@ -943,7 +943,7 @@ pub mod x86 {
#[inline(always)]
fn mul_f128x16(self, a0: f64x16, a1: f64x16, b0: f64x16, b1: f64x16) -> (f64x16, f64x16) {
let (p1, p2) = two_prod_f64x16(self, a0, b0);
let p2 = self.add_f64x16(p2, self.mul_add_f64x16(a0, b1, self.mul_f64x16(a1, b0)));
let p2 = self.mul_add_f64x16(a0, b1, self.mul_add_f64x16(a1, b0, p2));
quick_two_sum_f64x16(self, p1, p2)
}

Expand Down

0 comments on commit fc24e96

Please sign in to comment.