diff --git a/runbench b/runbench index 76651fc..7330f4c 100755 --- a/runbench +++ b/runbench @@ -20,8 +20,7 @@ bins = [] { "base" => "-O3", "march-native" => "-O3 -march=native", -# Fast math seems to return broken results and not be a benefit -# "ffast-math" => "-O3 -march=native -ffast-math", + "ffast-math" => "-O3 -march=native -ffast-math", }.each do |name, opts| bench = "#{BINDIR}/clangbench-#{name}" if !File.exist?(bench) diff --git a/src/main.rs b/src/main.rs index e9ee24d..1c765af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -47,9 +47,9 @@ fn main() { pixout[2] = r * matrix[2][0] + g * matrix[2][1] + b * matrix[2][2] + e * matrix[2][3]; } let to_time = time::precise_time_ns(); - let mut sum = 0f32; + let mut sum = 0f64; for v in out { - sum += v; + sum += v as f64; } println!("{:.2} ms/megapixel (sum is {})", ((to_time - from_time) as f32)/((num_pixels as f32)), @@ -70,9 +70,9 @@ fn main() { pixout[2] = z_comps.extract(0) + z_comps.extract(1) + z_comps.extract(2); } let to_time = time::precise_time_ns(); - let mut sum = 0f32; + let mut sum = 0f64; for v in out { - sum += v; + sum += v as f64; } println!("{:.2} ms/megapixel (sum is {}) (explicit simd)", ((to_time - from_time) as f32)/((num_pixels as f32)), diff --git a/test.c b/test.c index d5789ba..ee4ab5b 100644 --- a/test.c +++ b/test.c @@ -56,7 +56,7 @@ int main(void) { int64_t to_time = time_in_micros(); // Calculate the pixel average - float sum = 0.0f; + double sum = 0.0f; for(int i=0; i