From bbfb53d52411cc5b1f560293c757bff252e1e06f Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Wed, 19 Jun 2019 12:06:42 -0500
Subject: [PATCH 1/5] c: add fma and fmaf

---
 std/special/c.zig | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/std/special/c.zig b/std/special/c.zig
index 456070f60962..b3cf54619fec 100644
--- a/std/special/c.zig
+++ b/std/special/c.zig
@@ -266,7 +266,12 @@ export fn floor(x: f64) f64 {
 export fn ceil(x: f64) f64 {
     return math.ceil(x);
 }
-
+export fn fma(a: f64, b: f64, c: f64) f64 {
+    return math.fma(f64, a, b, c);
+}
+export fn fmaf(a: f32, b: f32, c: f32) f32 {
+    return math.fma(f32, a, b, c);
+}
 fn generic_fmod(comptime T: type, x: T, y: T) T {
     @setRuntimeSafety(false);
 

From fce2d2d18be279359dcd75254506d46085c59aaf Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Tue, 18 Jun 2019 17:28:49 -0500
Subject: [PATCH 2/5] stage1: add support for @mulAdd fused-multiply-add for
 floats and vectors of floats

Not all of the softfloat library is being built....

Vector support is very buggy at the moment, but should work when the bugs are fixed.
(as I had the same code working with another vector function, that hasn't been merged yet).
---
 CMakeLists.txt                  |   2 +
 doc/langref.html.in             |   7 ++
 src/all_types.hpp               |  13 +++
 src/analyze.cpp                 |   7 +-
 src/codegen.cpp                 |  46 ++++++++-
 src/ir.cpp                      | 171 ++++++++++++++++++++++++++++++++
 src/ir_print.cpp                |  19 ++++
 test/stage1/behavior/muladd.zig |  34 +++++++
 8 files changed, 292 insertions(+), 7 deletions(-)
 create mode 100644 test/stage1/behavior/muladd.zig

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d6f8176e4096..6a06afc9faba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -389,6 +389,8 @@ set(EMBEDDED_SOFTFLOAT_SOURCES
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_subMagsF32.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_subMagsF64.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/s_tryPropagateNaNF128M.c"
+    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f16_mulAdd.c"
+    "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/f128M_mulAdd.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/softfloat_state.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/ui32_to_f128M.c"
     "${CMAKE_SOURCE_DIR}/deps/SoftFloat-3e/source/ui64_to_f128M.c"
diff --git a/doc/langref.html.in b/doc/langref.html.in
index 5ddd572e514f..9b95946256da 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6259,6 +6259,13 @@ comptime {
       This function is only valid within function scope.
       </p>
 
+      {#header_close#}
+      {#header_open|@mulAdd#}
+      <pre>{#syntax#}@mulAdd(comptime T: type, a: T, b: T, c: T) T{#endsyntax#}</pre>
+      <p>
+      Fused multiply add (for floats), similar to {#syntax#}(a * b) + c{#endsyntax#}, except
+      only rounds once, and is thus more accurate.
+      </p>
       {#header_close#}
 
       {#header_open|@byteSwap#}
diff --git a/src/all_types.hpp b/src/all_types.hpp
index 5aa1c78ea1ef..83df71b95f48 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1406,6 +1406,7 @@ enum BuiltinFnId {
     BuiltinFnIdSubWithOverflow,
     BuiltinFnIdMulWithOverflow,
     BuiltinFnIdShlWithOverflow,
+    BuiltinFnIdMulAdd,
     BuiltinFnIdCInclude,
     BuiltinFnIdCDefine,
     BuiltinFnIdCUndef,
@@ -1554,6 +1555,7 @@ enum ZigLLVMFnId {
     ZigLLVMFnIdClz,
     ZigLLVMFnIdPopCount,
     ZigLLVMFnIdOverflowArithmetic,
+    ZigLLVMFnIdFMA,
     ZigLLVMFnIdFloor,
     ZigLLVMFnIdCeil,
     ZigLLVMFnIdSqrt,
@@ -1584,6 +1586,7 @@ struct ZigLLVMFnKey {
         } pop_count;
         struct {
             uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
         } floating;
         struct {
             AddSubMul add_sub_mul;
@@ -2235,6 +2238,7 @@ enum IrInstructionId {
     IrInstructionIdHandle,
     IrInstructionIdAlignOf,
     IrInstructionIdOverflowOp,
+    IrInstructionIdMulAdd,
     IrInstructionIdTestErr,
     IrInstructionIdUnwrapErrCode,
     IrInstructionIdUnwrapErrPayload,
@@ -3038,6 +3042,15 @@ struct IrInstructionOverflowOp {
     ZigType *result_ptr_type;
 };
 
+struct IrInstructionMulAdd {
+    IrInstruction base;
+
+    IrInstruction *type_value;
+    IrInstruction *op1;
+    IrInstruction *op2;
+    IrInstruction *op3;
+};
+
 struct IrInstructionAlignOf {
     IrInstruction base;
 
diff --git a/src/analyze.cpp b/src/analyze.cpp
index c7e35367c33e..bff740cd52cf 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -5737,11 +5737,11 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
         case ZigLLVMFnIdPopCount:
             return (uint32_t)(x.data.clz.bit_count) * (uint32_t)101195049;
         case ZigLLVMFnIdFloor:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1899859168;
         case ZigLLVMFnIdCeil:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1953839089;
         case ZigLLVMFnIdSqrt:
-            return (uint32_t)(x.data.floating.bit_count) * (uint32_t)2225366385;
+        case ZigLLVMFnIdFMA:
+            return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
+                   (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBswap:
             return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
         case ZigLLVMFnIdBitReverse:
@@ -5772,6 +5772,7 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
         case ZigLLVMFnIdFloor:
         case ZigLLVMFnIdCeil:
         case ZigLLVMFnIdSqrt:
+        case ZigLLVMFnIdFMA:
             return a.data.floating.bit_count == b.data.floating.bit_count;
         case ZigLLVMFnIdOverflowArithmetic:
             return (a.data.overflow_arithmetic.bit_count == b.data.overflow_arithmetic.bit_count) &&
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 3dd6995c61cc..6691652a5e04 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -807,31 +807,51 @@ static LLVMValueRef get_int_overflow_fn(CodeGen *g, ZigType *operand_type, AddSu
 }
 
 static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn_id) {
-    assert(type_entry->id == ZigTypeIdFloat);
+    assert(type_entry->id == ZigTypeIdFloat ||
+           type_entry->id == ZigTypeIdVector);
+
+    bool is_vector = (type_entry->id == ZigTypeIdVector);
+    ZigType *float_type = is_vector ? type_entry->data.vector.elem_type : type_entry;
 
     ZigLLVMFnKey key = {};
     key.id = fn_id;
-    key.data.floating.bit_count = (uint32_t)type_entry->data.floating.bit_count;
+    key.data.floating.bit_count = (uint32_t)float_type->data.floating.bit_count;
+    key.data.floating.vector_len = is_vector ? (uint32_t)type_entry->data.vector.len : 0;
 
     auto existing_entry = g->llvm_fn_table.maybe_get(key);
     if (existing_entry)
         return existing_entry->value;
 
     const char *name;
+    uint32_t num_args;
     if (fn_id == ZigLLVMFnIdFloor) {
         name = "floor";
+        num_args = 1;
     } else if (fn_id == ZigLLVMFnIdCeil) {
         name = "ceil";
+        num_args = 1;
     } else if (fn_id == ZigLLVMFnIdSqrt) {
         name = "sqrt";
+        num_args = 1;
+    } else if (fn_id == ZigLLVMFnIdFMA) {
+        name = "fma";
+        num_args = 3;
     } else {
         zig_unreachable();
     }
 
     char fn_name[64];
-    sprintf(fn_name, "llvm.%s.f%" ZIG_PRI_usize "", name, type_entry->data.floating.bit_count);
+    if (is_vector)
+        sprintf(fn_name, "llvm.%s.v%" PRIu32 "f%" PRIu32, name, key.data.floating.vector_len, key.data.floating.bit_count);
+    else
+        sprintf(fn_name, "llvm.%s.f%" PRIu32, name, key.data.floating.bit_count);
     LLVMTypeRef float_type_ref = get_llvm_type(g, type_entry);
-    LLVMTypeRef fn_type = LLVMFunctionType(float_type_ref, &float_type_ref, 1, false);
+    LLVMTypeRef return_elem_types[3] = {
+        float_type_ref,
+        float_type_ref,
+        float_type_ref,
+    };
+    LLVMTypeRef fn_type = LLVMFunctionType(float_type_ref, return_elem_types, num_args, false);
     LLVMValueRef fn_val = LLVMAddFunction(g->module, fn_name, fn_type);
     assert(LLVMGetIntrinsicID(fn_val));
 
@@ -5437,6 +5457,21 @@ static LLVMValueRef ir_render_sqrt(CodeGen *g, IrExecutable *executable, IrInstr
     return LLVMBuildCall(g->builder, fn_val, &op, 1, "");
 }
 
+static LLVMValueRef ir_render_mul_add(CodeGen *g, IrExecutable *executable, IrInstructionMulAdd *instruction) {
+    LLVMValueRef op1 = ir_llvm_value(g, instruction->op1);
+    LLVMValueRef op2 = ir_llvm_value(g, instruction->op2);
+    LLVMValueRef op3 = ir_llvm_value(g, instruction->op3);
+    assert(instruction->base.value.type->id == ZigTypeIdFloat ||
+           instruction->base.value.type->id == ZigTypeIdVector);
+    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdFMA);
+    LLVMValueRef args[3] = {
+        op1,
+        op2,
+        op3,
+    };
+    return LLVMBuildCall(g->builder, fn_val, args, 3, "");
+}
+
 static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInstructionBswap *instruction) {
     LLVMValueRef op = ir_llvm_value(g, instruction->op);
     ZigType *int_type = instruction->base.value.type;
@@ -5781,6 +5816,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_mark_err_ret_trace_ptr(g, executable, (IrInstructionMarkErrRetTracePtr *)instruction);
         case IrInstructionIdSqrt:
             return ir_render_sqrt(g, executable, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdMulAdd:
+            return ir_render_mul_add(g, executable, (IrInstructionMulAdd *)instruction);
         case IrInstructionIdArrayToVector:
             return ir_render_array_to_vector(g, executable, (IrInstructionArrayToVector *)instruction);
         case IrInstructionIdVectorToArray:
@@ -7398,6 +7435,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdRem, "rem", 2);
     create_builtin_fn(g, BuiltinFnIdMod, "mod", 2);
     create_builtin_fn(g, BuiltinFnIdSqrt, "sqrt", 2);
+    create_builtin_fn(g, BuiltinFnIdMulAdd, "mulAdd", 4);
     create_builtin_fn(g, BuiltinFnIdInlineCall, "inlineCall", SIZE_MAX);
     create_builtin_fn(g, BuiltinFnIdNoInlineCall, "noInlineCall", SIZE_MAX);
     create_builtin_fn(g, BuiltinFnIdNewStackCall, "newStackCall", SIZE_MAX);
diff --git a/src/ir.cpp b/src/ir.cpp
index 5c09e48b2d7c..c2c6cb615416 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -747,6 +747,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionTestErr *) {
     return IrInstructionIdTestErr;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionMulAdd *) {
+  return IrInstructionIdMulAdd;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionUnwrapErrCode *) {
     return IrInstructionIdUnwrapErrCode;
 }
@@ -2308,6 +2312,22 @@ static IrInstruction *ir_build_overflow_op(IrBuilder *irb, Scope *scope, AstNode
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_mul_add(IrBuilder *irb, Scope *scope, AstNode *source_node,
+        IrInstruction *type_value, IrInstruction *op1, IrInstruction *op2, IrInstruction *op3) {
+    IrInstructionMulAdd *instruction = ir_build_instruction<IrInstructionMulAdd>(irb, scope, source_node);
+    instruction->type_value = type_value;
+    instruction->op1 = op1;
+    instruction->op2 = op2;
+    instruction->op3 = op3;
+
+    ir_ref_instruction(type_value, irb->current_basic_block);
+    ir_ref_instruction(op1, irb->current_basic_block);
+    ir_ref_instruction(op2, irb->current_basic_block);
+    ir_ref_instruction(op3, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_align_of(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *type_value) {
     IrInstructionAlignOf *instruction = ir_build_instruction<IrInstructionAlignOf>(irb, scope, source_node);
     instruction->type_value = type_value;
@@ -4028,6 +4048,33 @@ static IrInstruction *ir_gen_overflow_op(IrBuilder *irb, Scope *scope, AstNode *
     return ir_build_overflow_op(irb, scope, node, op, type_value, op1, op2, result_ptr, nullptr);
 }
 
+static IrInstruction *ir_gen_mul_add(IrBuilder *irb, Scope *scope, AstNode *node) {
+    assert(node->type == NodeTypeFnCallExpr);
+
+    AstNode *type_node = node->data.fn_call_expr.params.at(0);
+    AstNode *op1_node = node->data.fn_call_expr.params.at(1);
+    AstNode *op2_node = node->data.fn_call_expr.params.at(2);
+    AstNode *op3_node = node->data.fn_call_expr.params.at(3);
+
+    IrInstruction *type_value = ir_gen_node(irb, type_node, scope);
+    if (type_value == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    IrInstruction *op1 = ir_gen_node(irb, op1_node, scope);
+    if (op1 == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    IrInstruction *op2 = ir_gen_node(irb, op2_node, scope);
+    if (op2 == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    IrInstruction *op3 = ir_gen_node(irb, op3_node, scope);
+    if (op3 == irb->codegen->invalid_instruction)
+        return irb->codegen->invalid_instruction;
+
+    return ir_build_mul_add(irb, scope, node, type_value, op1, op2, op3);
+}
+
 static IrInstruction *ir_gen_this(IrBuilder *irb, Scope *orig_scope, AstNode *node) {
     for (Scope *it_scope = orig_scope; it_scope != nullptr; it_scope = it_scope->parent) {
         if (it_scope->id == ScopeIdDecls) {
@@ -4687,6 +4734,8 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
             return ir_lval_wrap(irb, scope, ir_gen_overflow_op(irb, scope, node, IrOverflowOpMul), lval);
         case BuiltinFnIdShlWithOverflow:
             return ir_lval_wrap(irb, scope, ir_gen_overflow_op(irb, scope, node, IrOverflowOpShl), lval);
+        case BuiltinFnIdMulAdd:
+            return ir_lval_wrap(irb, scope, ir_gen_mul_add(irb, scope, node), lval);
         case BuiltinFnIdTypeName:
             {
                 AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@@ -21185,6 +21234,125 @@ static IrInstruction *ir_analyze_instruction_overflow_op(IrAnalyze *ira, IrInstr
     return result;
 }
 
+static void ir_eval_mul_add(IrAnalyze *ira, IrInstructionMulAdd *source_instr, ZigType *float_type,
+    ConstExprValue *op1, ConstExprValue *op2, ConstExprValue *op3, ConstExprValue *out_val) {
+    if (float_type->id == ZigTypeIdComptimeFloat) {
+        f128M_mulAdd(&out_val->data.x_bigfloat.value, &op1->data.x_bigfloat.value, &op2->data.x_bigfloat.value,
+            &op3->data.x_bigfloat.value);
+    } else if (float_type->id == ZigTypeIdFloat) {
+        switch (float_type->data.floating.bit_count) {
+            case 16:
+                out_val->data.x_f16 = f16_mulAdd(op1->data.x_f16, op2->data.x_f16, op3->data.x_f16);
+                break;
+            case 32:
+                out_val->data.x_f32 = fmaf(op1->data.x_f32, op2->data.x_f32, op3->data.x_f32);
+                break;
+            case 64:
+                out_val->data.x_f64 = fma(op1->data.x_f64, op2->data.x_f64, op3->data.x_f64);
+                break;
+            case 128:
+                f128M_mulAdd(&op1->data.x_f128, &op2->data.x_f128, &op3->data.x_f128, &out_val->data.x_f128);
+                break;
+            default:
+                zig_unreachable();
+        }
+    } else {
+        zig_unreachable();
+    }
+}
+
+static IrInstruction *ir_analyze_instruction_mul_add(IrAnalyze *ira, IrInstructionMulAdd *instruction) {
+    IrInstruction *type_value = instruction->type_value->child;
+    if (type_is_invalid(type_value->value.type))
+        return ira->codegen->invalid_instruction;
+    
+    ZigType *expr_type = ir_resolve_type(ira, type_value);
+    if (type_is_invalid(expr_type))
+        return ira->codegen->invalid_instruction;
+    
+    // Only allow float types, and vectors of floats.
+    ZigType *float_type = (expr_type->id == ZigTypeIdVector) ? expr_type->data.vector.elem_type : expr_type;
+    if (float_type->id != ZigTypeIdFloat) {
+        ir_add_error(ira, type_value,
+            buf_sprintf("expected float or vector of float type, found '%s'", buf_ptr(&float_type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    IrInstruction *op1 = instruction->op1->child;
+    if (type_is_invalid(op1->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *casted_op1 = ir_implicit_cast(ira, op1, expr_type);
+    if (type_is_invalid(casted_op1->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *op2 = instruction->op2->child;
+    if (type_is_invalid(op2->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *casted_op2 = ir_implicit_cast(ira, op2, expr_type);
+    if (type_is_invalid(casted_op2->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *op3 = instruction->op3->child;
+    if (type_is_invalid(op3->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *casted_op3 = ir_implicit_cast(ira, op3, expr_type);
+    if (type_is_invalid(casted_op3->value.type))
+        return ira->codegen->invalid_instruction;
+
+    if (instr_is_comptime(casted_op1) &&
+        instr_is_comptime(casted_op2) &&
+        instr_is_comptime(casted_op3)) {
+        ConstExprValue *op1_const = ir_resolve_const(ira, casted_op1, UndefBad);
+        if (!op1_const)
+            return ira->codegen->invalid_instruction;
+        ConstExprValue *op2_const = ir_resolve_const(ira, casted_op2, UndefBad);
+        if (!op2_const)
+            return ira->codegen->invalid_instruction;
+        ConstExprValue *op3_const = ir_resolve_const(ira, casted_op3, UndefBad);
+        if (!op3_const)
+            return ira->codegen->invalid_instruction;
+
+        IrInstruction *result = ir_const(ira, &instruction->base, expr_type);
+        ConstExprValue *out_val = &result->value;
+
+        if (expr_type->id == ZigTypeIdVector) {
+            expand_undef_array(ira->codegen, op1_const);
+            expand_undef_array(ira->codegen, op2_const);
+            expand_undef_array(ira->codegen, op3_const);
+            out_val->special = ConstValSpecialUndef;
+            expand_undef_array(ira->codegen, out_val);
+            size_t len = expr_type->data.vector.len;
+            for (size_t i = 0; i < len; i += 1) {
+                ConstExprValue *float_operand_op1 = &op1_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_operand_op2 = &op2_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_operand_op3 = &op3_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_out_val = &out_val->data.x_array.data.s_none.elements[i];
+                assert(float_operand_op1->type == float_type);
+                assert(float_operand_op2->type == float_type);
+                assert(float_operand_op3->type == float_type);
+                assert(float_out_val->type == float_type);
+                ir_eval_mul_add(ira, instruction, float_type,
+                        op1_const, op2_const, op3_const, float_out_val);
+                float_out_val->type = float_type;
+            }
+            out_val->type = expr_type;
+            out_val->special = ConstValSpecialStatic;
+        } else {
+            ir_eval_mul_add(ira, instruction, float_type, op1_const, op2_const, op3_const, out_val);
+        }
+        return result;
+    }
+
+    IrInstruction *result = ir_build_mul_add(&ira->new_irb,
+            instruction->base.scope, instruction->base.source_node,
+            type_value, casted_op1, casted_op2, casted_op3);
+    result->value.type = expr_type;
+    return result;
+}
+
 static IrInstruction *ir_analyze_instruction_test_err(IrAnalyze *ira, IrInstructionTestErr *instruction) {
     IrInstruction *value = instruction->value->child;
     if (type_is_invalid(value->value.type))
@@ -23596,6 +23764,8 @@ static IrInstruction *ir_analyze_instruction_nocast(IrAnalyze *ira, IrInstructio
             return ir_analyze_instruction_mark_err_ret_trace_ptr(ira, (IrInstructionMarkErrRetTracePtr *)instruction);
         case IrInstructionIdSqrt:
             return ir_analyze_instruction_sqrt(ira, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdMulAdd:
+            return ir_analyze_instruction_mul_add(ira, (IrInstructionMulAdd *)instruction);
         case IrInstructionIdIntToErr:
             return ir_analyze_instruction_int_to_err(ira, (IrInstructionIntToErr *)instruction);
         case IrInstructionIdErrToInt:
@@ -23835,6 +24005,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdCoroPromise:
         case IrInstructionIdPromiseResultType:
         case IrInstructionIdSqrt:
+        case IrInstructionIdMulAdd:
         case IrInstructionIdAtomicLoad:
         case IrInstructionIdIntCast:
         case IrInstructionIdFloatCast:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index bf9ced89c5ee..e205c8e067c0 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -1439,6 +1439,22 @@ static void ir_print_sqrt(IrPrint *irp, IrInstructionSqrt *instruction) {
     fprintf(irp->f, ")");
 }
 
+static void ir_print_mul_add(IrPrint *irp, IrInstructionMulAdd *instruction) {
+    fprintf(irp->f, "@mulAdd(");
+    if (instruction->type_value != nullptr) {
+        ir_print_other_instruction(irp, instruction->type_value);
+    } else {
+        fprintf(irp->f, "null");
+    }
+    fprintf(irp->f, ",");
+    ir_print_other_instruction(irp, instruction->op1);
+    fprintf(irp->f, ",");
+    ir_print_other_instruction(irp, instruction->op2);
+    fprintf(irp->f, ",");
+    ir_print_other_instruction(irp, instruction->op3);
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_decl_var_gen(IrPrint *irp, IrInstructionDeclVarGen *decl_var_instruction) {
     ZigVar *var = decl_var_instruction->var;
     const char *var_or_const = decl_var_instruction->var->gen_is_const ? "const" : "var";
@@ -1905,6 +1921,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdSqrt:
             ir_print_sqrt(irp, (IrInstructionSqrt *)instruction);
             break;
+        case IrInstructionIdMulAdd:
+            ir_print_mul_add(irp, (IrInstructionMulAdd *)instruction);
+            break;
         case IrInstructionIdAtomicLoad:
             ir_print_atomic_load(irp, (IrInstructionAtomicLoad *)instruction);
             break;
diff --git a/test/stage1/behavior/muladd.zig b/test/stage1/behavior/muladd.zig
new file mode 100644
index 000000000000..143e6a93e431
--- /dev/null
+++ b/test/stage1/behavior/muladd.zig
@@ -0,0 +1,34 @@
+const expect = @import("std").testing.expect;
+
+test "@mulAdd" {
+    comptime testMulAdd();
+    testMulAdd();
+}
+
+fn testMulAdd() void {
+    {
+        var a: f16 = 5.5;
+        var b: f16 = 2.5;
+        var c: f16 = 6.25;
+        expect(@mulAdd(f16, a, b, c) == 20);
+    }
+    {
+        var a: f32 = 5.5;
+        var b: f32 = 2.5;
+        var c: f32 = 6.25;
+        expect(@mulAdd(f32, a, b, c) == 20);
+    }
+    {
+        var a: f64 = 5.5;
+        var b: f64 = 2.5;
+        var c: f64 = 6.25;
+        expect(@mulAdd(f64, a, b, c) == 20);
+    }
+    // Awaits implementation in libm.zig
+    //{
+    //    var a: f16 = 5.5;
+    //    var b: f128 = 2.5;
+    //    var c: f128 = 6.25;
+    //    expect(@mulAdd(f128, a, b, c) == 20);
+    //}
+}
\ No newline at end of file

From 39ad072a84af006ab89b72177524a859a50bb662 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Wed, 19 Jun 2019 11:48:30 -0500
Subject: [PATCH 3/5] test: include muladd.zig in behavior tests

---
 test/stage1/behavior.zig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/stage1/behavior.zig b/test/stage1/behavior.zig
index 707d46fd8cc8..10e7c1a09beb 100644
--- a/test/stage1/behavior.zig
+++ b/test/stage1/behavior.zig
@@ -70,6 +70,7 @@ comptime {
     _ = @import("behavior/optional.zig");
     _ = @import("behavior/pointers.zig");
     _ = @import("behavior/popcount.zig");
+    _ = @import("behavior/muladd.zig");
     _ = @import("behavior/ptrcast.zig");
     _ = @import("behavior/pub_enum.zig");
     _ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");

From ebde2ff899c16612c7ff58df61f3946be47c51c8 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Fri, 21 Jun 2019 08:43:10 -0500
Subject: [PATCH 4/5] stage1: update fn_key_eql() for @mulAdd() on vectors

---
 src/analyze.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/analyze.cpp b/src/analyze.cpp
index bff740cd52cf..15b42c7f9dad 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -5772,8 +5772,10 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
         case ZigLLVMFnIdFloor:
         case ZigLLVMFnIdCeil:
         case ZigLLVMFnIdSqrt:
-        case ZigLLVMFnIdFMA:
             return a.data.floating.bit_count == b.data.floating.bit_count;
+        case ZigLLVMFnIdFMA:
+            return a.data.floating.bit_count == b.data.floating.bit_count &&
+                   a.data.floating.vector_len == b.data.floating.vector_len;
         case ZigLLVMFnIdOverflowArithmetic:
             return (a.data.overflow_arithmetic.bit_count == b.data.overflow_arithmetic.bit_count) &&
                 (a.data.overflow_arithmetic.add_sub_mul == b.data.overflow_arithmetic.add_sub_mul) &&

From 71e014caecaa54fdd8a0516710d2d9597da41398 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Fri, 21 Jun 2019 16:18:59 -0500
Subject: [PATCH 5/5] stage1: add @sin @cos @exp @exp2 @ln @log2 @log10 @fabs
 @floor @ceil @trunc @round

and expand @sqrt

This revealed that the accuracy of ln is not as good as the current algorithm in
musl and glibc, and should be ported again.

v2: actually include tests
v3: fix reversal of in and out arguments on f128M_sqrt()
    add test for @sqrt on comptime_float
    do not include @nearbyInt() until it works on all targets.
---
 doc/langref.html.in              |  85 +++++++-
 src/all_types.hpp                |  26 ++-
 src/analyze.cpp                  |  15 +-
 src/codegen.cpp                  |  68 +++---
 src/ir.cpp                       | 355 +++++++++++++++++++++++++------
 src/ir.hpp                       |   1 +
 src/ir_print.cpp                 |  11 +-
 src/util.cpp                     |   1 +
 std/special/c.zig                |  44 ++--
 test/stage1/behavior.zig         |   1 +
 test/stage1/behavior/floatop.zig | 243 +++++++++++++++++++++
 11 files changed, 719 insertions(+), 131 deletions(-)
 create mode 100644 test/stage1/behavior/floatop.zig

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 9b95946256da..30fe9a36485c 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -7354,10 +7354,91 @@ test "@setRuntimeSafety" {
       <pre>{#syntax#}@sqrt(comptime T: type, value: T) T{#endsyntax#}</pre>
       <p>
       Performs the square root of a floating point number. Uses a dedicated hardware instruction
-      when available. Currently only supports f32 and f64 at runtime. f128 at runtime is TODO.
+      when available. Supports f16, f32, f64, and f128, as well as vectors.
       </p>
+      {#header_close#}
+      {#header_open|@sin#}
+      <pre>{#syntax#}@sin(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Sine trigometric function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@cos#}
+      <pre>{#syntax#}@cos(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Cosine trigometric function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@exp#}
+      <pre>{#syntax#}@exp(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Base-e exponential function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@exp2#}
+      <pre>{#syntax#}@exp2(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Base-2 exponential function on a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@ln#}
+      <pre>{#syntax#}@ln(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the natural logarithm of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@log2#}
+      <pre>{#syntax#}@log2(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the logarithm to the base 2 of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@log10#}
+      <pre>{#syntax#}@log10(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the logarithm to the base 10 of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@fabs#}
+      <pre>{#syntax#}@fabs(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the absolute value of a floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@floor#}
+      <pre>{#syntax#}@floor(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the largest integral value not greater than the given floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@ceil#}
+      <pre>{#syntax#}@ceil(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Returns the largest integral value not less than the given floating point number. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@trunc#}
+      <pre>{#syntax#}@trunc(comptime T: type, value: T) T{#endsyntax#}</pre>
+      <p>
+      Rounds the given floating point number to an integer, towards zero. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
+      </p>
+      {#header_close#}
+      {#header_open|@round#}
+      <pre>{#syntax#}@round(comptime T: type, value: T) T{#endsyntax#}</pre>
       <p>
-      This is a low-level intrinsic. Most code can use {#syntax#}std.math.sqrt{#endsyntax#} instead.
+      Rounds the given floating point number to an integer, away from zero. Uses a dedicated hardware instruction
+      when available. Currently supports f32 and f64.
       </p>
       {#header_close#}
 
diff --git a/src/all_types.hpp b/src/all_types.hpp
index 83df71b95f48..6595218bcf4e 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1434,6 +1434,19 @@ enum BuiltinFnId {
     BuiltinFnIdRem,
     BuiltinFnIdMod,
     BuiltinFnIdSqrt,
+    BuiltinFnIdSin,
+    BuiltinFnIdCos,
+    BuiltinFnIdExp,
+    BuiltinFnIdExp2,
+    BuiltinFnIdLn,
+    BuiltinFnIdLog2,
+    BuiltinFnIdLog10,
+    BuiltinFnIdFabs,
+    BuiltinFnIdFloor,
+    BuiltinFnIdCeil,
+    BuiltinFnIdTrunc,
+    BuiltinFnIdNearbyInt,
+    BuiltinFnIdRound,
     BuiltinFnIdTruncate,
     BuiltinFnIdIntCast,
     BuiltinFnIdFloatCast,
@@ -1556,9 +1569,7 @@ enum ZigLLVMFnId {
     ZigLLVMFnIdPopCount,
     ZigLLVMFnIdOverflowArithmetic,
     ZigLLVMFnIdFMA,
-    ZigLLVMFnIdFloor,
-    ZigLLVMFnIdCeil,
-    ZigLLVMFnIdSqrt,
+    ZigLLVMFnIdFloatOp,
     ZigLLVMFnIdBswap,
     ZigLLVMFnIdBitReverse,
 };
@@ -1585,6 +1596,7 @@ struct ZigLLVMFnKey {
             uint32_t bit_count;
         } pop_count;
         struct {
+            BuiltinFnId op;
             uint32_t bit_count;
             uint32_t vector_len; // 0 means not a vector
         } floating;
@@ -2239,6 +2251,7 @@ enum IrInstructionId {
     IrInstructionIdAlignOf,
     IrInstructionIdOverflowOp,
     IrInstructionIdMulAdd,
+    IrInstructionIdFloatOp,
     IrInstructionIdTestErr,
     IrInstructionIdUnwrapErrCode,
     IrInstructionIdUnwrapErrPayload,
@@ -2300,7 +2313,6 @@ enum IrInstructionId {
     IrInstructionIdAddImplicitReturnType,
     IrInstructionIdMergeErrRetTraces,
     IrInstructionIdMarkErrRetTracePtr,
-    IrInstructionIdSqrt,
     IrInstructionIdErrSetCast,
     IrInstructionIdToBytes,
     IrInstructionIdFromBytes,
@@ -3474,11 +3486,13 @@ struct IrInstructionMarkErrRetTracePtr {
     IrInstruction *err_ret_trace_ptr;
 };
 
-struct IrInstructionSqrt {
+// For float ops which take a single argument
+struct IrInstructionFloatOp {
     IrInstruction base;
 
+    BuiltinFnId op;
     IrInstruction *type;
-    IrInstruction *op;
+    IrInstruction *op1;
 };
 
 struct IrInstructionCheckRuntimeScope {
diff --git a/src/analyze.cpp b/src/analyze.cpp
index 15b42c7f9dad..13b35e0aff43 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -5736,9 +5736,10 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.clz.bit_count) * (uint32_t)2428952817;
         case ZigLLVMFnIdPopCount:
             return (uint32_t)(x.data.clz.bit_count) * (uint32_t)101195049;
-        case ZigLLVMFnIdFloor:
-        case ZigLLVMFnIdCeil:
-        case ZigLLVMFnIdSqrt:
+        case ZigLLVMFnIdFloatOp:
+            return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
+                   (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025) +
+                   (uint32_t)(x.data.floating.op) * (uint32_t)43789879;
         case ZigLLVMFnIdFMA:
             return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
                    (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
@@ -5769,10 +5770,10 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
             return a.data.bswap.bit_count == b.data.bswap.bit_count;
         case ZigLLVMFnIdBitReverse:
             return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
-        case ZigLLVMFnIdFloor:
-        case ZigLLVMFnIdCeil:
-        case ZigLLVMFnIdSqrt:
-            return a.data.floating.bit_count == b.data.floating.bit_count;
+        case ZigLLVMFnIdFloatOp:
+            return a.data.floating.bit_count == b.data.floating.bit_count &&
+                   a.data.floating.vector_len == b.data.floating.vector_len &&
+                   a.data.floating.op == b.data.floating.op;
         case ZigLLVMFnIdFMA:
             return a.data.floating.bit_count == b.data.floating.bit_count &&
                    a.data.floating.vector_len == b.data.floating.vector_len;
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 6691652a5e04..41caa29dbd55 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -806,7 +806,7 @@ static LLVMValueRef get_int_overflow_fn(CodeGen *g, ZigType *operand_type, AddSu
     return fn_val;
 }
 
-static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn_id) {
+static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn_id, BuiltinFnId op) {
     assert(type_entry->id == ZigTypeIdFloat ||
            type_entry->id == ZigTypeIdVector);
 
@@ -817,6 +817,7 @@ static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn
     key.id = fn_id;
     key.data.floating.bit_count = (uint32_t)float_type->data.floating.bit_count;
     key.data.floating.vector_len = is_vector ? (uint32_t)type_entry->data.vector.len : 0;
+    key.data.floating.op = op;
 
     auto existing_entry = g->llvm_fn_table.maybe_get(key);
     if (existing_entry)
@@ -824,18 +825,12 @@ static LLVMValueRef get_float_fn(CodeGen *g, ZigType *type_entry, ZigLLVMFnId fn
 
     const char *name;
     uint32_t num_args;
-    if (fn_id == ZigLLVMFnIdFloor) {
-        name = "floor";
-        num_args = 1;
-    } else if (fn_id == ZigLLVMFnIdCeil) {
-        name = "ceil";
-        num_args = 1;
-    } else if (fn_id == ZigLLVMFnIdSqrt) {
-        name = "sqrt";
-        num_args = 1;
-    } else if (fn_id == ZigLLVMFnIdFMA) {
+    if (fn_id == ZigLLVMFnIdFMA) {
         name = "fma";
         num_args = 3;
+    } else if (fn_id == ZigLLVMFnIdFloatOp) {
+        name = float_op_to_name(op, true);
+        num_args = 1;
     } else {
         zig_unreachable();
     }
@@ -2480,22 +2475,17 @@ static LLVMValueRef gen_overflow_shr_op(CodeGen *g, ZigType *type_entry,
     return result;
 }
 
-static LLVMValueRef gen_floor(CodeGen *g, LLVMValueRef val, ZigType *type_entry) {
-    if (type_entry->id == ZigTypeIdInt)
+static LLVMValueRef gen_float_op(CodeGen *g, LLVMValueRef val, ZigType *type_entry, BuiltinFnId op) {
+    if ((op == BuiltinFnIdCeil ||
+         op == BuiltinFnIdFloor) &&
+        type_entry->id == ZigTypeIdInt)
         return val;
+    assert(type_entry->id == ZigTypeIdFloat);
 
-    LLVMValueRef floor_fn = get_float_fn(g, type_entry, ZigLLVMFnIdFloor);
+    LLVMValueRef floor_fn = get_float_fn(g, type_entry, ZigLLVMFnIdFloatOp, op);
     return LLVMBuildCall(g->builder, floor_fn, &val, 1, "");
 }
 
-static LLVMValueRef gen_ceil(CodeGen *g, LLVMValueRef val, ZigType *type_entry) {
-    if (type_entry->id == ZigTypeIdInt)
-        return val;
-
-    LLVMValueRef ceil_fn = get_float_fn(g, type_entry, ZigLLVMFnIdCeil);
-    return LLVMBuildCall(g->builder, ceil_fn, &val, 1, "");
-}
-
 enum DivKind {
     DivKindFloat,
     DivKindTrunc,
@@ -2571,7 +2561,7 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
                 return result;
             case DivKindExact:
                 if (want_runtime_safety) {
-                    LLVMValueRef floored = gen_floor(g, result, type_entry);
+                    LLVMValueRef floored = gen_float_op(g, result, type_entry, BuiltinFnIdFloor);
                     LLVMBasicBlockRef ok_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactOk");
                     LLVMBasicBlockRef fail_block = LLVMAppendBasicBlock(g->cur_fn_val, "DivExactFail");
                     LLVMValueRef ok_bit = LLVMBuildFCmp(g->builder, LLVMRealOEQ, floored, result, "");
@@ -2593,12 +2583,12 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
                     LLVMBuildCondBr(g->builder, ltz, ltz_block, gez_block);
 
                     LLVMPositionBuilderAtEnd(g->builder, ltz_block);
-                    LLVMValueRef ceiled = gen_ceil(g, result, type_entry);
+                    LLVMValueRef ceiled = gen_float_op(g, result, type_entry, BuiltinFnIdCeil);
                     LLVMBasicBlockRef ceiled_end_block = LLVMGetInsertBlock(g->builder);
                     LLVMBuildBr(g->builder, end_block);
 
                     LLVMPositionBuilderAtEnd(g->builder, gez_block);
-                    LLVMValueRef floored = gen_floor(g, result, type_entry);
+                    LLVMValueRef floored = gen_float_op(g, result, type_entry, BuiltinFnIdFloor);
                     LLVMBasicBlockRef floored_end_block = LLVMGetInsertBlock(g->builder);
                     LLVMBuildBr(g->builder, end_block);
 
@@ -2610,7 +2600,7 @@ static LLVMValueRef gen_div(CodeGen *g, bool want_runtime_safety, bool want_fast
                     return phi;
                 }
             case DivKindFloor:
-                return gen_floor(g, result, type_entry);
+                return gen_float_op(g, result, type_entry, BuiltinFnIdFloor);
         }
         zig_unreachable();
     }
@@ -5450,10 +5440,10 @@ static LLVMValueRef ir_render_mark_err_ret_trace_ptr(CodeGen *g, IrExecutable *e
     return nullptr;
 }
 
-static LLVMValueRef ir_render_sqrt(CodeGen *g, IrExecutable *executable, IrInstructionSqrt *instruction) {
-    LLVMValueRef op = ir_llvm_value(g, instruction->op);
+static LLVMValueRef ir_render_float_op(CodeGen *g, IrExecutable *executable, IrInstructionFloatOp *instruction) {
+    LLVMValueRef op = ir_llvm_value(g, instruction->op1);
     assert(instruction->base.value.type->id == ZigTypeIdFloat);
-    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdSqrt);
+    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdFloatOp, instruction->op);
     return LLVMBuildCall(g->builder, fn_val, &op, 1, "");
 }
 
@@ -5463,7 +5453,7 @@ static LLVMValueRef ir_render_mul_add(CodeGen *g, IrExecutable *executable, IrIn
     LLVMValueRef op3 = ir_llvm_value(g, instruction->op3);
     assert(instruction->base.value.type->id == ZigTypeIdFloat ||
            instruction->base.value.type->id == ZigTypeIdVector);
-    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdFMA);
+    LLVMValueRef fn_val = get_float_fn(g, instruction->base.value.type, ZigLLVMFnIdFMA, BuiltinFnIdMulAdd);
     LLVMValueRef args[3] = {
         op1,
         op2,
@@ -5814,8 +5804,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_merge_err_ret_traces(g, executable, (IrInstructionMergeErrRetTraces *)instruction);
         case IrInstructionIdMarkErrRetTracePtr:
             return ir_render_mark_err_ret_trace_ptr(g, executable, (IrInstructionMarkErrRetTracePtr *)instruction);
-        case IrInstructionIdSqrt:
-            return ir_render_sqrt(g, executable, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdFloatOp:
+            return ir_render_float_op(g, executable, (IrInstructionFloatOp *)instruction);
         case IrInstructionIdMulAdd:
             return ir_render_mul_add(g, executable, (IrInstructionMulAdd *)instruction);
         case IrInstructionIdArrayToVector:
@@ -7435,6 +7425,20 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdRem, "rem", 2);
     create_builtin_fn(g, BuiltinFnIdMod, "mod", 2);
     create_builtin_fn(g, BuiltinFnIdSqrt, "sqrt", 2);
+    create_builtin_fn(g, BuiltinFnIdSin, "sin", 2);
+    create_builtin_fn(g, BuiltinFnIdCos, "cos", 2);
+    create_builtin_fn(g, BuiltinFnIdExp, "exp", 2);
+    create_builtin_fn(g, BuiltinFnIdExp2, "exp2", 2);
+    create_builtin_fn(g, BuiltinFnIdLn, "ln", 2);
+    create_builtin_fn(g, BuiltinFnIdLog2, "log2", 2);
+    create_builtin_fn(g, BuiltinFnIdLog10, "log10", 2);
+    create_builtin_fn(g, BuiltinFnIdFabs, "fabs", 2);
+    create_builtin_fn(g, BuiltinFnIdFloor, "floor", 2);
+    create_builtin_fn(g, BuiltinFnIdCeil, "ceil", 2);
+    create_builtin_fn(g, BuiltinFnIdTrunc, "trunc", 2);
+    //Needs library support on Windows
+    //create_builtin_fn(g, BuiltinFnIdNearbyInt, "nearbyInt", 2);
+    create_builtin_fn(g, BuiltinFnIdRound, "round", 2);
     create_builtin_fn(g, BuiltinFnIdMulAdd, "mulAdd", 4);
     create_builtin_fn(g, BuiltinFnIdInlineCall, "inlineCall", SIZE_MAX);
     create_builtin_fn(g, BuiltinFnIdNoInlineCall, "noInlineCall", SIZE_MAX);
diff --git a/src/ir.cpp b/src/ir.cpp
index c2c6cb615416..50d2a0686809 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -991,8 +991,8 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionMarkErrRetTraceP
     return IrInstructionIdMarkErrRetTracePtr;
 }
 
-static constexpr IrInstructionId ir_instruction_id(IrInstructionSqrt *) {
-    return IrInstructionIdSqrt;
+static constexpr IrInstructionId ir_instruction_id(IrInstructionFloatOp *) {
+    return IrInstructionIdFloatOp;
 }
 
 static constexpr IrInstructionId ir_instruction_id(IrInstructionCheckRuntimeScope *) {
@@ -2312,6 +2312,59 @@ static IrInstruction *ir_build_overflow_op(IrBuilder *irb, Scope *scope, AstNode
     return &instruction->base;
 }
 
+
+//TODO Powi, Pow, minnum, maxnum, maximum, minimum, copysign,
+// lround, llround, lrint, llrint
+// So far this is only non-complicated type functions.
+const char *float_op_to_name(BuiltinFnId op, bool llvm_name) {
+    const bool b = llvm_name;
+
+    switch (op) {
+    case BuiltinFnIdSqrt:
+        return "sqrt";
+    case BuiltinFnIdSin:
+        return "sin";
+    case BuiltinFnIdCos:
+        return "cos";
+    case BuiltinFnIdExp:
+        return "exp";
+    case BuiltinFnIdExp2:
+        return "exp2";
+    case BuiltinFnIdLn:
+        return b ? "log" : "ln";
+    case BuiltinFnIdLog10:
+        return "log10";
+    case BuiltinFnIdLog2:
+        return "log2";
+    case BuiltinFnIdFabs:
+        return "fabs";
+    case BuiltinFnIdFloor:
+        return "floor";
+    case BuiltinFnIdCeil:
+        return "ceil";
+    case BuiltinFnIdTrunc:
+        return "trunc";
+    case BuiltinFnIdNearbyInt:
+        return b ? "nearbyint" : "nearbyInt";
+    case BuiltinFnIdRound:
+        return "round";
+    default:
+        zig_unreachable();
+    }
+}
+
+static IrInstruction *ir_build_float_op(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *type, IrInstruction *op1, BuiltinFnId op) {
+    IrInstructionFloatOp *instruction = ir_build_instruction<IrInstructionFloatOp>(irb, scope, source_node);
+    instruction->type = type;
+    instruction->op1 = op1;
+    instruction->op = op;
+
+    if (type != nullptr) ir_ref_instruction(type, irb->current_basic_block);
+    ir_ref_instruction(op1, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_mul_add(IrBuilder *irb, Scope *scope, AstNode *source_node,
         IrInstruction *type_value, IrInstruction *op1, IrInstruction *op2, IrInstruction *op3) {
     IrInstructionMulAdd *instruction = ir_build_instruction<IrInstructionMulAdd>(irb, scope, source_node);
@@ -3033,17 +3086,6 @@ static IrInstruction *ir_build_mark_err_ret_trace_ptr(IrBuilder *irb, Scope *sco
     return &instruction->base;
 }
 
-static IrInstruction *ir_build_sqrt(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *type, IrInstruction *op) {
-    IrInstructionSqrt *instruction = ir_build_instruction<IrInstructionSqrt>(irb, scope, source_node);
-    instruction->type = type;
-    instruction->op = op;
-
-    if (type != nullptr) ir_ref_instruction(type, irb->current_basic_block);
-    ir_ref_instruction(op, irb->current_basic_block);
-
-    return &instruction->base;
-}
-
 static IrInstruction *ir_build_has_decl(IrBuilder *irb, Scope *scope, AstNode *source_node,
         IrInstruction *container, IrInstruction *name)
 {
@@ -4400,6 +4442,19 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                 return ir_lval_wrap(irb, scope, bin_op, lval);
             }
         case BuiltinFnIdSqrt:
+        case BuiltinFnIdSin:
+        case BuiltinFnIdCos:
+        case BuiltinFnIdExp:
+        case BuiltinFnIdExp2:
+        case BuiltinFnIdLn:
+        case BuiltinFnIdLog2:
+        case BuiltinFnIdLog10:
+        case BuiltinFnIdFabs:
+        case BuiltinFnIdFloor:
+        case BuiltinFnIdCeil:
+        case BuiltinFnIdTrunc:
+        case BuiltinFnIdNearbyInt:
+        case BuiltinFnIdRound:
             {
                 AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
                 IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
@@ -4411,7 +4466,7 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                 if (arg1_value == irb->codegen->invalid_instruction)
                     return arg1_value;
 
-                IrInstruction *ir_sqrt = ir_build_sqrt(irb, scope, node, arg0_value, arg1_value);
+                IrInstruction *ir_sqrt = ir_build_float_op(irb, scope, node, arg0_value, arg1_value, builtin_fn->id);
                 return ir_lval_wrap(irb, scope, ir_sqrt, lval);
             }
         case BuiltinFnIdTruncate:
@@ -23214,70 +23269,248 @@ static IrInstruction *ir_analyze_instruction_mark_err_ret_trace_ptr(IrAnalyze *i
     return result;
 }
 
-static IrInstruction *ir_analyze_instruction_sqrt(IrAnalyze *ira, IrInstructionSqrt *instruction) {
-    ZigType *float_type = ir_resolve_type(ira, instruction->type->child);
-    if (type_is_invalid(float_type))
-        return ira->codegen->invalid_instruction;
+static void ir_eval_float_op(IrAnalyze *ira, IrInstructionFloatOp *source_instr, ZigType *float_type,
+    ConstExprValue *op, ConstExprValue *out_val) {
+    assert(ira && source_instr && float_type && out_val && op);
+    assert(float_type->id == ZigTypeIdFloat ||
+           float_type->id == ZigTypeIdComptimeFloat);
 
-    IrInstruction *op = instruction->op->child;
-    if (type_is_invalid(op->value.type))
+    BuiltinFnId fop = source_instr->op;
+    unsigned bits;
+
+    if (float_type->id == ZigTypeIdComptimeFloat) {
+        bits = 128;
+    } else if (float_type->id == ZigTypeIdFloat)
+        bits = float_type->data.floating.bit_count;
+
+    switch (bits) {
+    case 16: {
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            out_val->data.x_f16 = f16_sqrt(op->data.x_f16);
+            break;
+        case BuiltinFnIdSin:
+        case BuiltinFnIdCos:
+        case BuiltinFnIdExp:
+        case BuiltinFnIdExp2:
+        case BuiltinFnIdLn:
+        case BuiltinFnIdLog10:
+        case BuiltinFnIdLog2:
+        case BuiltinFnIdFabs:
+        case BuiltinFnIdFloor:
+        case BuiltinFnIdCeil:
+        case BuiltinFnIdTrunc:
+        case BuiltinFnIdNearbyInt:
+        case BuiltinFnIdRound:
+            zig_panic("unimplemented f16 builtin");
+        default:
+            zig_unreachable();
+        };
+        break;
+    };
+    case 32: {
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            out_val->data.x_f32 = sqrtf(op->data.x_f32);
+            break;
+        case BuiltinFnIdSin:
+            out_val->data.x_f32 = sinf(op->data.x_f32);
+            break;
+        case BuiltinFnIdCos:
+            out_val->data.x_f32 = cosf(op->data.x_f32);
+            break;
+        case BuiltinFnIdExp:
+            out_val->data.x_f32 = expf(op->data.x_f32);
+            break;
+        case BuiltinFnIdExp2:
+            out_val->data.x_f32 = exp2f(op->data.x_f32);
+            break;
+        case BuiltinFnIdLn:
+            out_val->data.x_f32 = logf(op->data.x_f32);
+            break;
+        case BuiltinFnIdLog10:
+            out_val->data.x_f32 = log10f(op->data.x_f32);
+            break;
+        case BuiltinFnIdLog2:
+            out_val->data.x_f32 = log2f(op->data.x_f32);
+            break;
+        case BuiltinFnIdFabs:
+            out_val->data.x_f32 = fabsf(op->data.x_f32);
+            break;
+        case BuiltinFnIdFloor:
+            out_val->data.x_f32 = floorf(op->data.x_f32);
+            break;
+        case BuiltinFnIdCeil:
+            out_val->data.x_f32 = ceilf(op->data.x_f32);
+            break;
+        case BuiltinFnIdTrunc:
+            out_val->data.x_f32 = truncf(op->data.x_f32);
+            break;
+        case BuiltinFnIdNearbyInt:
+            out_val->data.x_f32 = nearbyintf(op->data.x_f32);
+            break;
+        case BuiltinFnIdRound:
+            out_val->data.x_f32 = roundf(op->data.x_f32);
+            break;
+        default:
+            zig_unreachable();
+        };
+        break;
+    };
+    case 64: {
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            out_val->data.x_f64 = sqrt(op->data.x_f64);
+            break;
+        case BuiltinFnIdSin:
+            out_val->data.x_f64 = sin(op->data.x_f64);
+            break;
+        case BuiltinFnIdCos:
+            out_val->data.x_f64 = cos(op->data.x_f64);
+            break;
+        case BuiltinFnIdExp:
+            out_val->data.x_f64 = exp(op->data.x_f64);
+            break;
+        case BuiltinFnIdExp2:
+            out_val->data.x_f64 = exp2(op->data.x_f64);
+            break;
+        case BuiltinFnIdLn:
+            out_val->data.x_f64 = log(op->data.x_f64);
+            break;
+        case BuiltinFnIdLog10:
+            out_val->data.x_f64 = log10(op->data.x_f64);
+            break;
+        case BuiltinFnIdLog2:
+            out_val->data.x_f64 = log2(op->data.x_f64);
+            break;
+        case BuiltinFnIdFabs:
+            out_val->data.x_f64 = fabs(op->data.x_f64);
+            break;
+        case BuiltinFnIdFloor:
+            out_val->data.x_f64 = floor(op->data.x_f64);
+            break;
+        case BuiltinFnIdCeil:
+            out_val->data.x_f64 = ceil(op->data.x_f64);
+            break;
+        case BuiltinFnIdTrunc:
+            out_val->data.x_f64 = trunc(op->data.x_f64);
+            break;
+        case BuiltinFnIdNearbyInt:
+            out_val->data.x_f64 = nearbyint(op->data.x_f64);
+            break;
+        case BuiltinFnIdRound:
+            out_val->data.x_f64 = round(op->data.x_f64);
+            break;
+        default:
+            zig_unreachable();
+        }
+        break;
+    };
+    case 128: {
+        float128_t *out, *in;
+        if (float_type->id == ZigTypeIdComptimeFloat) {
+            out = &out_val->data.x_bigfloat.value;
+            in = &op->data.x_bigfloat.value;
+        } else {
+            out = &out_val->data.x_f128;
+            in = &op->data.x_f128;
+        }
+        switch (fop) {
+        case BuiltinFnIdSqrt:
+            f128M_sqrt(in, out);
+            break;
+        case BuiltinFnIdNearbyInt:
+        case BuiltinFnIdSin:
+        case BuiltinFnIdCos:
+        case BuiltinFnIdExp:
+        case BuiltinFnIdExp2:
+        case BuiltinFnIdLn:
+        case BuiltinFnIdLog10:
+        case BuiltinFnIdLog2:
+        case BuiltinFnIdFabs:
+        case BuiltinFnIdFloor:
+        case BuiltinFnIdCeil:
+        case BuiltinFnIdTrunc:
+        case BuiltinFnIdRound:
+            zig_panic("unimplemented f128 builtin");
+        default:
+            zig_unreachable();
+        }
+        break;
+    };
+    default:
+        zig_unreachable();
+    }
+}
+
+static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstructionFloatOp *instruction) {
+    IrInstruction *type = instruction->type->child;
+    if (type_is_invalid(type->value.type))
+        return ira->codegen->invalid_instruction;
+    
+    ZigType *expr_type = ir_resolve_type(ira, type);
+    if (type_is_invalid(expr_type))
         return ira->codegen->invalid_instruction;
 
-    bool ok_type = float_type->id == ZigTypeIdComptimeFloat || float_type->id == ZigTypeIdFloat;
-    if (!ok_type) {
-        ir_add_error(ira, instruction->type, buf_sprintf("@sqrt does not support type '%s'", buf_ptr(&float_type->name)));
+    // Only allow float types, and vectors of floats.
+    ZigType *float_type = (expr_type->id == ZigTypeIdVector) ? expr_type->data.vector.elem_type : expr_type;
+    if (float_type->id != ZigTypeIdFloat && float_type->id != ZigTypeIdComptimeFloat) {
+        ir_add_error(ira, instruction->type, buf_sprintf("@%s does not support type '%s'", float_op_to_name(instruction->op, false), buf_ptr(&float_type->name)));
         return ira->codegen->invalid_instruction;
     }
 
-    IrInstruction *casted_op = ir_implicit_cast(ira, op, float_type);
-    if (type_is_invalid(casted_op->value.type))
+    IrInstruction *op1 = instruction->op1->child;
+    if (type_is_invalid(op1->value.type))
         return ira->codegen->invalid_instruction;
 
-    if (instr_is_comptime(casted_op)) {
-        ConstExprValue *val = ir_resolve_const(ira, casted_op, UndefBad);
-        if (!val)
+    IrInstruction *casted_op1 = ir_implicit_cast(ira, op1, float_type);
+    if (type_is_invalid(casted_op1->value.type))
+        return ira->codegen->invalid_instruction;
+
+    if (instr_is_comptime(casted_op1)) {
+        // Our comptime 16-bit and 128-bit support is quite limited.
+        if ((float_type->id == ZigTypeIdComptimeFloat ||
+            float_type->data.floating.bit_count == 16 ||
+            float_type->data.floating.bit_count == 128) &&
+            instruction->op != BuiltinFnIdSqrt) {
+            ir_add_error(ira, instruction->type, buf_sprintf("@%s does not support type '%s'", float_op_to_name(instruction->op, false), buf_ptr(&float_type->name)));
             return ira->codegen->invalid_instruction;
+        }
 
-        IrInstruction *result = ir_const(ira, &instruction->base, float_type);
+        ConstExprValue *op1_const = ir_resolve_const(ira, casted_op1, UndefBad);
+        if (!op1_const)
+            return ira->codegen->invalid_instruction;
+
+        IrInstruction *result = ir_const(ira, &instruction->base, expr_type);
         ConstExprValue *out_val = &result->value;
 
-        if (float_type->id == ZigTypeIdComptimeFloat) {
-            bigfloat_sqrt(&out_val->data.x_bigfloat, &val->data.x_bigfloat);
-        } else if (float_type->id == ZigTypeIdFloat) {
-            switch (float_type->data.floating.bit_count) {
-                case 16:
-                    out_val->data.x_f16 = f16_sqrt(val->data.x_f16);
-                    break;
-                case 32:
-                    out_val->data.x_f32 = sqrtf(val->data.x_f32);
-                    break;
-                case 64:
-                    out_val->data.x_f64 = sqrt(val->data.x_f64);
-                    break;
-                case 128:
-                    f128M_sqrt(&val->data.x_f128, &out_val->data.x_f128);
-                    break;
-                default:
-                    zig_unreachable();
+        if (expr_type->id == ZigTypeIdVector) {
+            expand_undef_array(ira->codegen, op1_const);
+            out_val->special = ConstValSpecialUndef;
+            expand_undef_array(ira->codegen, out_val);
+            size_t len = expr_type->data.vector.len;
+            for (size_t i = 0; i < len; i += 1) {
+                ConstExprValue *float_operand_op1 = &op1_const->data.x_array.data.s_none.elements[i];
+                ConstExprValue *float_out_val = &out_val->data.x_array.data.s_none.elements[i];
+                assert(float_operand_op1->type == float_type);
+                assert(float_out_val->type == float_type);
+                ir_eval_float_op(ira, instruction, float_type,
+                        op1_const, float_out_val);
+                float_out_val->type = float_type;
             }
+            out_val->type = expr_type;
+            out_val->special = ConstValSpecialStatic;
         } else {
-            zig_unreachable();
+            ir_eval_float_op(ira, instruction, float_type, op1_const, out_val);
         }
-
         return result;
     }
 
     ir_assert(float_type->id == ZigTypeIdFloat, &instruction->base);
-    if (float_type->data.floating.bit_count != 16 &&
-        float_type->data.floating.bit_count != 32 &&
-        float_type->data.floating.bit_count != 64) {
-        ir_add_error(ira, instruction->type, buf_sprintf("compiler TODO: add implementation of sqrt for '%s'", buf_ptr(&float_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
 
-    IrInstruction *result = ir_build_sqrt(&ira->new_irb, instruction->base.scope,
-            instruction->base.source_node, nullptr, casted_op);
-    result->value.type = float_type;
+    IrInstruction *result = ir_build_float_op(&ira->new_irb, instruction->base.scope,
+            instruction->base.source_node, nullptr, casted_op1, instruction->op);
+    result->value.type = expr_type;
     return result;
 }
 
@@ -23762,8 +23995,8 @@ static IrInstruction *ir_analyze_instruction_nocast(IrAnalyze *ira, IrInstructio
             return ir_analyze_instruction_merge_err_ret_traces(ira, (IrInstructionMergeErrRetTraces *)instruction);
         case IrInstructionIdMarkErrRetTracePtr:
             return ir_analyze_instruction_mark_err_ret_trace_ptr(ira, (IrInstructionMarkErrRetTracePtr *)instruction);
-        case IrInstructionIdSqrt:
-            return ir_analyze_instruction_sqrt(ira, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdFloatOp:
+            return ir_analyze_instruction_float_op(ira, (IrInstructionFloatOp *)instruction);
         case IrInstructionIdMulAdd:
             return ir_analyze_instruction_mul_add(ira, (IrInstructionMulAdd *)instruction);
         case IrInstructionIdIntToErr:
@@ -24004,7 +24237,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdCoroFree:
         case IrInstructionIdCoroPromise:
         case IrInstructionIdPromiseResultType:
-        case IrInstructionIdSqrt:
+        case IrInstructionIdFloatOp:
         case IrInstructionIdMulAdd:
         case IrInstructionIdAtomicLoad:
         case IrInstructionIdIntCast:
diff --git a/src/ir.hpp b/src/ir.hpp
index 4fb75522122f..597624e2e674 100644
--- a/src/ir.hpp
+++ b/src/ir.hpp
@@ -26,5 +26,6 @@ bool ir_has_side_effects(IrInstruction *instruction);
 struct IrAnalyze;
 ConstExprValue *const_ptr_pointee(IrAnalyze *ira, CodeGen *codegen, ConstExprValue *const_val,
         AstNode *source_node);
+const char *float_op_to_name(BuiltinFnId op, bool llvm_name);
 
 #endif
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index e205c8e067c0..165d9b473946 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -1427,15 +1427,16 @@ static void ir_print_mark_err_ret_trace_ptr(IrPrint *irp, IrInstructionMarkErrRe
     fprintf(irp->f, ")");
 }
 
-static void ir_print_sqrt(IrPrint *irp, IrInstructionSqrt *instruction) {
-    fprintf(irp->f, "@sqrt(");
+static void ir_print_float_op(IrPrint *irp, IrInstructionFloatOp *instruction) {
+
+    fprintf(irp->f, "@%s(", float_op_to_name(instruction->op, false));
     if (instruction->type != nullptr) {
         ir_print_other_instruction(irp, instruction->type);
     } else {
         fprintf(irp->f, "null");
     }
     fprintf(irp->f, ",");
-    ir_print_other_instruction(irp, instruction->op);
+    ir_print_other_instruction(irp, instruction->op1);
     fprintf(irp->f, ")");
 }
 
@@ -1918,8 +1919,8 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdMarkErrRetTracePtr:
             ir_print_mark_err_ret_trace_ptr(irp, (IrInstructionMarkErrRetTracePtr *)instruction);
             break;
-        case IrInstructionIdSqrt:
-            ir_print_sqrt(irp, (IrInstructionSqrt *)instruction);
+        case IrInstructionIdFloatOp:
+            ir_print_float_op(irp, (IrInstructionFloatOp *)instruction);
             break;
         case IrInstructionIdMulAdd:
             ir_print_mul_add(irp, (IrInstructionMulAdd *)instruction);
diff --git a/src/util.cpp b/src/util.cpp
index 9a6a3829934d..f85565806f2b 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -13,6 +13,7 @@
 #include "userland.h"
 
 void zig_panic(const char *format, ...) {
+    abort();
     va_list ap;
     va_start(ap, format);
     vfprintf(stderr, format, ap);
diff --git a/std/special/c.zig b/std/special/c.zig
index b3cf54619fec..15cefbd2a097 100644
--- a/std/special/c.zig
+++ b/std/special/c.zig
@@ -254,24 +254,32 @@ export fn fmod(x: f64, y: f64) f64 {
 
 // TODO add intrinsics for these (and probably the double version too)
 // and have the math stuff use the intrinsic. same as @mod and @rem
-export fn floorf(x: f32) f32 {
-    return math.floor(x);
-}
-export fn ceilf(x: f32) f32 {
-    return math.ceil(x);
-}
-export fn floor(x: f64) f64 {
-    return math.floor(x);
-}
-export fn ceil(x: f64) f64 {
-    return math.ceil(x);
-}
-export fn fma(a: f64, b: f64, c: f64) f64 {
-    return math.fma(f64, a, b, c);
-}
-export fn fmaf(a: f32, b: f32, c: f32) f32 {
-    return math.fma(f32, a, b, c);
-}
+export fn floorf(x: f32) f32 {return math.floor(x);}
+export fn ceilf(x: f32) f32 {return math.ceil(x);}
+export fn floor(x: f64) f64 {return math.floor(x);}
+export fn ceil(x: f64) f64 {return math.ceil(x);}
+export fn fma(a: f64, b: f64, c: f64) f64 {return math.fma(f64, a, b, c);}
+export fn fmaf(a: f32, b: f32, c: f32) f32 {return math.fma(f32, a, b, c);}
+export fn sin(a: f64) f64 {return math.sin(a);}
+export fn sinf(a: f32) f32 {return math.sin(a);}
+export fn cos(a: f64) f64 {return math.cos(a);}
+export fn cosf(a: f32) f32 {return math.cos(a);}
+export fn exp(a: f64) f64 {return math.exp(a);}
+export fn expf(a: f32) f32 {return math.exp(a);}
+export fn exp2(a: f64) f64 {return math.exp2(a);}
+export fn exp2f(a: f32) f32 {return math.exp2(a);}
+export fn log(a: f64) f64 {return math.ln(a);}
+export fn logf(a: f32) f32 {return math.ln(a);}
+export fn log2(a: f64) f64 {return math.log2(a);}
+export fn log2f(a: f32) f32 {return math.log2(a);}
+export fn log10(a: f64) f64 {return math.log10(a);}
+export fn log10f(a: f32) f32 {return math.log10(a);}
+export fn fabs(a: f64) f64 {return math.fabs(a);}
+export fn fabsf(a: f32) f32 {return math.fabs(a);}
+export fn trunc(a: f64) f64 {return math.trunc(a);}
+export fn truncf(a: f32) f32 {return math.trunc(a);}
+export fn round(a: f64) f64 {return math.round(a);}
+export fn roundf(a: f32) f32 {return math.round(a);}
 fn generic_fmod(comptime T: type, x: T, y: T) T {
     @setRuntimeSafety(false);
 
diff --git a/test/stage1/behavior.zig b/test/stage1/behavior.zig
index 10e7c1a09beb..efefed33ba34 100644
--- a/test/stage1/behavior.zig
+++ b/test/stage1/behavior.zig
@@ -71,6 +71,7 @@ comptime {
     _ = @import("behavior/pointers.zig");
     _ = @import("behavior/popcount.zig");
     _ = @import("behavior/muladd.zig");
+    _ = @import("behavior/floatop.zig");
     _ = @import("behavior/ptrcast.zig");
     _ = @import("behavior/pub_enum.zig");
     _ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");
diff --git a/test/stage1/behavior/floatop.zig b/test/stage1/behavior/floatop.zig
new file mode 100644
index 000000000000..de2f6815a623
--- /dev/null
+++ b/test/stage1/behavior/floatop.zig
@@ -0,0 +1,243 @@
+const expect = @import("std").testing.expect;
+const pi = @import("std").math.pi;
+const e = @import("std").math.e;
+
+test "@sqrt" {
+    comptime testSqrt();
+    testSqrt();
+}
+
+fn testSqrt() void {
+    {
+        var a: f16 = 4;
+        expect(@sqrt(f16, a) == 2);
+    }
+    {
+        var a: f32 = 9;
+        expect(@sqrt(f32, a) == 3);
+    }
+    {
+        var a: f64 = 25;
+        expect(@sqrt(f64, a) == 5);
+    }
+    {
+        const a: comptime_float = 25.0;
+        expect(@sqrt(comptime_float, a) == 5.0);
+    }
+    // Waiting on a c.zig implementation
+    //{
+    //    var a: f128 = 49;
+    //    expect(@sqrt(f128, a) == 7);
+    //}
+}
+
+test "@sin" {
+    comptime testSin();
+    testSin();
+}
+
+fn testSin() void {
+    // TODO - this is actually useful and should be implemented
+    // (all the trig functions for f16)
+    // but will probably wait till self-hosted
+    //{
+    //    var a: f16 = pi;
+    //    expect(@sin(f16, a/2) == 1);
+    //}
+    {
+        var a: f32 = 0;
+        expect(@sin(f32, a) == 0);
+    }
+    {
+        var a: f64 = 0;
+        expect(@sin(f64, a) == 0);
+    }
+    // TODO
+    //{
+    //    var a: f16 = pi;
+    //    expect(@sqrt(f128, a/2) == 1);
+    //}
+}
+
+test "@cos" {
+    comptime testCos();
+    testCos();
+}
+
+fn testCos() void {
+    {
+        var a: f32 = 0;
+        expect(@cos(f32, a) == 1);
+    }
+    {
+        var a: f64 = 0;
+        expect(@cos(f64, a) == 1);
+    }
+}
+
+test "@exp" {
+    comptime testExp();
+    testExp();
+}
+
+fn testExp() void {
+    {
+        var a: f32 = 0;
+        expect(@exp(f32, a) == 1);
+    }
+    {
+        var a: f64 = 0;
+        expect(@exp(f64, a) == 1);
+    }
+}
+
+test "@exp2" {
+    comptime testExp2();
+    testExp2();
+}
+
+fn testExp2() void {
+    {
+        var a: f32 = 2;
+        expect(@exp2(f32, a) == 4);
+    }
+    {
+        var a: f64 = 2;
+        expect(@exp2(f64, a) == 4);
+    }
+}
+
+test "@ln" {
+    // Old musl (and glibc?), and our current math.ln implementation do not return 1
+    // so also accept those values.
+    comptime testLn();
+    testLn();
+}
+
+fn testLn() void {
+    {
+        var a: f32 = e;
+        expect(@ln(f32, a) == 1 or @ln(f32, a) == @bitCast(f32, u32(0x3f7fffff)));
+    }
+    {
+        var a: f64 = e;
+        expect(@ln(f64, a) == 1 or @ln(f64, a) == @bitCast(f64, u64(0x3ff0000000000000)));
+    }
+}
+
+test "@log2" {
+    comptime testLog2();
+    testLog2();
+}
+
+fn testLog2() void {
+    {
+        var a: f32 = 4;
+        expect(@log2(f32, a) == 2);
+    }
+    {
+        var a: f64 = 4;
+        expect(@log2(f64, a) == 2);
+    }
+}
+
+test "@log10" {
+    comptime testLog10();
+    testLog10();
+}
+
+fn testLog10() void {
+    {
+        var a: f32 = 100;
+        expect(@log10(f32, a) == 2);
+    }
+    {
+        var a: f64 = 1000;
+        expect(@log10(f64, a) == 3);
+    }
+}
+
+test "@fabs" {
+    comptime testFabs();
+    testFabs();
+}
+
+fn testFabs() void {
+    {
+        var a: f32 = -2.5;
+        var b: f32 = 2.5;
+        expect(@fabs(f32, a) == 2.5);
+        expect(@fabs(f32, b) == 2.5);
+    }
+    {
+        var a: f64 = -2.5;
+        var b: f64 = 2.5;
+        expect(@fabs(f64, a) == 2.5);
+        expect(@fabs(f64, b) == 2.5);
+    }
+}
+
+test "@floor" {
+    comptime testFloor();
+    testFloor();
+}
+
+fn testFloor() void {
+    {
+        var a: f32 = 2.1;
+        expect(@floor(f32, a) == 2);
+    }
+    {
+        var a: f64 = 3.5;
+        expect(@floor(f64, a) == 3);
+    }
+}
+
+test "@ceil" {
+    comptime testCeil();
+    testCeil();
+}
+
+fn testCeil() void {
+    {
+        var a: f32 = 2.1;
+        expect(@ceil(f32, a) == 3);
+    }
+    {
+        var a: f64 = 3.5;
+        expect(@ceil(f64, a) == 4);
+    }
+}
+
+test "@trunc" {
+    comptime testTrunc();
+    testTrunc();
+}
+
+fn testTrunc() void {
+    {
+        var a: f32 = 2.1;
+        expect(@trunc(f32, a) == 2);
+    }
+    {
+        var a: f64 = -3.5;
+        expect(@trunc(f64, a) == -3);
+    }
+}
+
+// This is waiting on library support for the Windows build (not sure why the other's don't need it)
+//test "@nearbyInt" {
+//    comptime testNearbyInt();
+//    testNearbyInt();
+//}
+
+//fn testNearbyInt() void {
+//    {
+//        var a: f32 = 2.1;
+//        expect(@nearbyInt(f32, a) == 2);
+//    }
+//    {
+//        var a: f64 = -3.75;
+//        expect(@nearbyInt(f64, a) == -4);
+//    }
+//}