ziglang · andrewrk · Jun 23, 2019 · Jun 19, 2019 · Jun 18, 2019 · Jun 19, 2019
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -389,6 +389,8 @@ set(EMBEDDED_SOFTFLOAT_SOURCES
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_subMagsF32.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_subMagsF64.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_tryPropagateNaNF128M.c"
+    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_mulAdd.c"
+    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_mulAdd.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/softfloat_state.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/ui32_to_f128M.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/ui64_to_f128M.c"

diff --git a/doc/langref.html.in b/doc/langref.html.in
@@ -6259,6 +6259,13 @@ comptime {
       This function is only valid within function scope.
       </p>
 
+      {#header_close#}
+      {#header_open|@mulAdd#}
+      <pre>{#syntax#}@mulAdd(comptime T: type, a: T, b: T, c: T) T{#endsyntax#}</pre>
+      <p>
+      Fused multiply add (for floats), similar to {#syntax#}(a * b) + c{#endsyntax#}, except
+      only rounds once, and is thus more accurate.
+      </p>
       {#header_close#}
 
       {#header_open|@byteSwap#}
@@ -7347,10 +7354,91 @@ test "@setRuntimeSafety" {
       <pre>{#syntax#}@sqrt(comptime T: type, value: T) T{#endsyntax#}</pre>
       <p>
       Performs the square root of a floating point number. Uses a dedicated hardware instruction
-      when available. Currently only supports f32 and f64 at runtime. f128 at runtime is TODO.
+      when available. Supports f16, f32, f64, and f128, as well as vectors.
+      </p>
+      {#header_close#}
+      {#header_open|@sin#}
+      <pre>{#syntax#}@sin(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Sine trigometric function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
       </p>
+      {#header_close#}
+      {#header_open|@cos#}
+      <pre>{#syntax#}@cos(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Cosine trigometric function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@exp#}
+      <pre>{#syntax#}@exp(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Base-e exponential function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@exp2#}
+      <pre>{#syntax#}@exp2(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Base-2 exponential function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@ln#}
+      <pre>{#syntax#}@ln(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the natural logarithm of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@log2#}
+      <pre>{#syntax#}@log2(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the logarithm to the base 2 of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@log10#}
+      <pre>{#syntax#}@log10(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the logarithm to the base 10 of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@fabs#}
+      <pre>{#syntax#}@fabs(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the absolute value of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@floor#}
+      <pre>{#syntax#}@floor(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the largest integral value not greater than the given floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@ceil#}
+      <pre>{#syntax#}@ceil(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the largest integral value not less than the given floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@trunc#}
+      <pre>{#syntax#}@trunc(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Rounds the given floating point number to an integer, towards zero. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@round#}
+      <pre>{#syntax#}@round(comptime T: type, value: T) T{#endsyntax#}</pre>
       <p>
-      This is a low-level intrinsic. Most code can use {#syntax#}std.math.sqrt{#endsyntax#} instead.
+      Rounds the given floating point number to an integer, away from zero. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
       </p>
       {#header_close#}
 

diff --git a/src/all_types.hpp b/src/all_types.hpp
@@ -1406,6 +1406,7 @@ enum BuiltinFnId {
     BuiltinFnIdSubWithOverflow,
     BuiltinFnIdMulWithOverflow,
     BuiltinFnIdShlWithOverflow,
+    BuiltinFnIdMulAdd,
     BuiltinFnIdCInclude,
     BuiltinFnIdCDefine,
     BuiltinFnIdCUndef,
@@ -1433,6 +1434,19 @@ enum BuiltinFnId {
     BuiltinFnIdRem,
     BuiltinFnIdMod,
     BuiltinFnIdSqrt,
+    BuiltinFnIdSin,
+    BuiltinFnIdCos,
+    BuiltinFnIdExp,
+    BuiltinFnIdExp2,
+    BuiltinFnIdLn,
+    BuiltinFnIdLog2,
+    BuiltinFnIdLog10,
+    BuiltinFnIdFabs,
+    BuiltinFnIdFloor,
+    BuiltinFnIdCeil,
+    BuiltinFnIdTrunc,
+    BuiltinFnIdNearbyInt,
+    BuiltinFnIdRound,
     BuiltinFnIdTruncate,
     BuiltinFnIdIntCast,
     BuiltinFnIdFloatCast,
@@ -1554,9 +1568,8 @@ enum ZigLLVMFnId {
     ZigLLVMFnIdClz,
     ZigLLVMFnIdPopCount,
     ZigLLVMFnIdOverflowArithmetic,
-    ZigLLVMFnIdFloor,
-    ZigLLVMFnIdCeil,
-    ZigLLVMFnIdSqrt,
+    ZigLLVMFnIdFMA,
+    ZigLLVMFnIdFloatOp,
     ZigLLVMFnIdBswap,
     ZigLLVMFnIdBitReverse,
 };
@@ -1583,7 +1596,9 @@ struct ZigLLVMFnKey {
             uint32_t bit_count;
         } pop_count;
         struct {
+            BuiltinFnId op;
             uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
         } floating;
         struct {
             AddSubMul add_sub_mul;
@@ -2235,6 +2250,8 @@ enum IrInstructionId {
     IrInstructionIdHandle,
     IrInstructionIdAlignOf,
     IrInstructionIdOverflowOp,
+    IrInstructionIdMulAdd,
+    IrInstructionIdFloatOp,
     IrInstructionIdTestErr,
     IrInstructionIdUnwrapErrCode,
     IrInstructionIdUnwrapErrPayload,
@@ -2296,7 +2313,6 @@ enum IrInstructionId {
     IrInstructionIdAddImplicitReturnType,
     IrInstructionIdMergeErrRetTraces,
     IrInstructionIdMarkErrRetTracePtr,
-    IrInstructionIdSqrt,
     IrInstructionIdErrSetCast,
     IrInstructionIdToBytes,
     IrInstructionIdFromBytes,
@@ -3038,6 +3054,15 @@ struct IrInstructionOverflowOp {
     ZigType *result_ptr_type;
 };
 
+struct IrInstructionMulAdd {
+    IrInstruction base;
+
+    IrInstruction *type_value;
+    IrInstruction *op1;
+    IrInstruction *op2;
+    IrInstruction *op3;
+};
+
 struct IrInstructionAlignOf {
     IrInstruction base;
 
@@ -3461,11 +3486,13 @@ struct IrInstructionMarkErrRetTracePtr {
     IrInstruction *err_ret_trace_ptr;
 };
 
-struct IrInstructionSqrt {
+// For float ops which take a single argument
+struct IrInstructionFloatOp {
     IrInstruction base;
 
+    BuiltinFnId op;
     IrInstruction *type;
-    IrInstruction *op;
+    IrInstruction *op1;
 };
 
 struct IrInstructionCheckRuntimeScope {

diff --git a/src/analyze.cpp b/src/analyze.cpp
@@ -5736,12 +5736,13 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.clz.bit_count) * (uint32_t)2428952817;
         case ZigLLVMFnIdPopCount:
             return (uint32_t)(x.data.clz.bit_count) * (uint32_t)101195049;
-        case ZigLLVMFnIdFloor:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1899859168;
-        case ZigLLVMFnIdCeil:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1953839089;
-        case ZigLLVMFnIdSqrt:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)2225366385;
+        case ZigLLVMFnIdFloatOp:
+            return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
+                   (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025) +
+                   (uint32_t)(x.data.floating.op) * (uint32_t)43789879;
+        case ZigLLVMFnIdFMA:
+            return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
+                   (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBswap:
             return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
         case ZigLLVMFnIdBitReverse:
@@ -5769,10 +5770,13 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
             return a.data.bswap.bit_count == b.data.bswap.bit_count;
         case ZigLLVMFnIdBitReverse:
             return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
-        case ZigLLVMFnIdFloor:
-        case ZigLLVMFnIdCeil:
-        case ZigLLVMFnIdSqrt:
-            return a.data.floating.bit_count == b.data.floating.bit_count;
+        case ZigLLVMFnIdFloatOp:
+            return a.data.floating.bit_count == b.data.floating.bit_count &&
+                   a.data.floating.vector_len == b.data.floating.vector_len &&
+                   a.data.floating.op == b.data.floating.op;
+        case ZigLLVMFnIdFMA:
+            return a.data.floating.bit_count == b.data.floating.bit_count &&
+                   a.data.floating.vector_len == b.data.floating.vector_len;
         case ZigLLVMFnIdOverflowArithmetic:
             return (a.data.overflow_arithmetic.bit_count == b.data.overflow_arithmetic.bit_count) &&
                 (a.data.overflow_arithmetic.add_sub_mul == b.data.overflow_arithmetic.add_sub_mul) &&