diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 642675b0048..19b8646c36b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -109,8 +109,8 @@ jobs: - name: gi config: --enable-middle-end=flambda2 os: ubuntu-latest - build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1' - ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1' + build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1,vectorize=1' + ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1,vectorize=1' check_arch: true - name: cfg-selection diff --git a/Makefile.common-jst b/Makefile.common-jst index 6784deee559..1b7d025762f 100644 --- a/Makefile.common-jst +++ b/Makefile.common-jst @@ -78,6 +78,11 @@ runtime-stdlib: boot-compiler # Dune does not believe the compiler can make .cmxs unless the following file exists. @touch _build/runtime_stdlib_install/lib/ocaml_runtime_stdlib/dynlink.cmxa +# This target is a polling version of "make runtime-stdlib" +.PHONY: runtime-stdlib-hacking +runtime-stdlib-hacking: boot-compiler + RUNTIME_DIR=$(RUNTIME_DIR) $(dune) build -w $(ws_runstd) --only-package=ocaml_runtime_stdlib @install + compiler: runtime-stdlib RUNTIME_DIR=$(RUNTIME_DIR) SYSTEM=$(SYSTEM) MODEL=$(MODEL) \ ASPP="$(ASPP)" ASPPFLAGS="$(ASPPFLAGS)" \ diff --git a/backend/.ocamlformat-enable b/backend/.ocamlformat-enable index c992d5b7245..ac2ffe9e3d8 100644 --- a/backend/.ocamlformat-enable +++ b/backend/.ocamlformat-enable @@ -3,15 +3,21 @@ amd64/selection.ml amd64/selection_utils.ml amd64/simd*.ml amd64/stack_check.ml +amd64/vectorize_specific.ml arm64/cfg_selection.ml arm64/selection.ml arm64/selection_utils.ml arm64/simd*.ml arm64/stack_check.ml +arm64/vectorize_specific.ml asm_targets/**/*.ml asm_targets/**/*.mli cfg/**/*.ml cfg/**/*.mli +cfg/vectorize.ml +cfg/vectorize.mli +vectorize_utils.ml +vectorize_utils.mli cfg_selectgen.ml cfg_selectgen.mli cfg_selection.mli diff --git a/backend/amd64/arch.ml b/backend/amd64/arch.ml index 96bb4df7214..8098cbf7158 100644 --- a/backend/amd64/arch.ml +++ b/backend/amd64/arch.ml @@ -288,8 +288,9 @@ let win64 = | "win64" | "mingw64" | "cygwin" -> true | _ -> false -(* Specific operations that are pure *) +(* Specific operations that are pure *) +(* Keep in sync with [Vectorize_specific] *) let operation_is_pure = function | Ilea _ | Ibswap _ | Isextend32 | Izextend32 | Ifloatarithmem _ -> true @@ -300,7 +301,7 @@ let operation_is_pure = function | Isimd op -> Simd.is_pure op (* Specific operations that can raise *) - +(* Keep in sync with [Vectorize_specific] *) let operation_can_raise = function | Ilea _ | Ibswap _ | Isextend32 | Izextend32 | Ifloatarithmem _ @@ -309,6 +310,7 @@ let operation_can_raise = function | Istore_int (_, _, _) | Ioffset_loc (_, _) | Icldemote _ | Iprefetch _ -> false +(* Keep in sync with [Vectorize_specific] *) let operation_allocates = function | Ilea _ | Ibswap _ | Isextend32 | Izextend32 | Ifloatarithmem _ @@ -410,84 +412,107 @@ let equal_specific_operation left right = (* addressing mode functions *) -let compare_addressing_mode_without_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = - (* Ignores displ when comparing to show that it is possible to calculate the offset *) +let equal_addressing_mode_without_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = + (* Ignores [displ] when comparing to show that it is possible to calculate the offset, + see [addressing_offset_in_bytes]. *) match addressing_mode_1, addressing_mode_2 with | Ibased (symbol1, global1, _), Ibased (symbol2, global2, _) -> ( match global1, global2 with | Global, Global | Local, Local -> - String.compare symbol1 symbol2 - | Global, Local -> -1 - | Local, Global -> 1) - | Ibased _, _ -> -1 - | _, Ibased _ -> 1 - | Iindexed _, Iindexed _ -> 0 - | Iindexed _, _ -> -1 - | _, Iindexed _ -> 1 - | Iindexed2 _, Iindexed2 _ -> 0 - | Iindexed2 _, _ -> -1 - | _, Iindexed2 _ -> 1 - | Iscaled (scale1, _), Iscaled (scale2, _) -> Int.compare scale1 scale2 - | Iscaled _, _ -> -1 - | _, Iscaled _ -> 1 + String.equal symbol1 symbol2 + | (Global | Local), _ -> false) + | Iindexed _, Iindexed _ -> true + | Iindexed2 _, Iindexed2 _ -> true + | Iscaled (scale1, _), Iscaled (scale2, _) -> Int.equal scale1 scale2 | Iindexed2scaled (scale1, _), Iindexed2scaled (scale2, _) -> - Int.compare scale1 scale2 - -let compare_addressing_mode_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = - match addressing_mode_1, addressing_mode_2 with - | Ibased (symbol1, global1, n1), Ibased (symbol2, global2, n2) -> ( - match global1, global2 with - | Global, Global | Local, Local -> - if symbol1 = symbol2 then Some (Int.compare n1 n2) else None - | Global, Local | Local, Global -> None) - | Iindexed n1, Iindexed n2 -> Some (Int.compare n1 n2) - | Iindexed2 n1, Iindexed2 n2 -> Some (Int.compare n1 n2) - | Iscaled (scale1, n1), Iscaled (scale2, n2) -> - let scale_compare = scale1 - scale2 in - if scale_compare = 0 then Some (Int.compare n1 n2) else None - | Iindexed2scaled (scale1, n1), Iindexed2scaled (scale2, n2) -> - let scale_compare = scale1 - scale2 in - if scale_compare = 0 then Some (Int.compare n1 n2) else None - | Ibased _, _ -> None - | Iindexed _, _ -> None - | Iindexed2 _, _ -> None - | Iscaled _, _ -> None - | Iindexed2scaled _, _ -> None - -let addressing_offset_in_bytes (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = + Int.equal scale1 scale2 + | (Ibased _ | Iindexed _ | Iindexed2 _ | Iscaled _ | Iindexed2scaled _), _ -> false + +let addressing_offset_in_bytes + (addressing_mode_1: addressing_mode) + (addressing_mode_2 : addressing_mode) + ~arg_offset_in_bytes + args_1 + args_2 + = + let address_arg_offset_in_bytes index = + arg_offset_in_bytes args_1.(index) args_2.(index) + in match addressing_mode_1, addressing_mode_2 with - | Ibased (symbol1, global1, n1), Ibased (symbol2, global2, n2) -> ( - match global1, global2 with - | Global, Global | Local, Local -> - if symbol1 = symbol2 then Some (n2 - n1) else None - | Global, Local | Local, Global -> None) - | Iindexed n1, Iindexed n2 -> Some (n2 - n1) - | Iindexed2 n1, Iindexed2 n2 -> Some (n2 - n1) + | Ibased (symbol1, global1, n1), Ibased (symbol2, global2, n2) -> + (* symbol + displ *) + (match global1, global2 with + | Global, Global | Local, Local -> + if String.equal symbol1 symbol2 then Some (n2 - n1) else None + | Global, Local | Local, Global -> None) + | Iindexed n1, Iindexed n2 -> + (* reg + displ *) + (match address_arg_offset_in_bytes 0 with + | Some base_off -> Some (base_off + (n2 - n1)) + | None -> None) + | Iindexed2 n1, Iindexed2 n2 -> + (* reg + reg + displ *) + (match address_arg_offset_in_bytes 0, address_arg_offset_in_bytes 1 with + | Some arg0_offset, Some arg1_offset -> + Some (arg0_offset + arg1_offset + (n2 - n1)) + | (None, _|Some _, _) -> None) | Iscaled (scale1, n1), Iscaled (scale2, n2) -> - let scale_compare = scale1 - scale2 in - if scale_compare = 0 then Some (n2 - n1) else None + (* reg * scale + displ *) + if not (Int.compare scale1 scale2 = 0) then None + else + (match address_arg_offset_in_bytes 0 with + | Some offset -> Some ((offset * scale1) + (n2 - n1)) + | None -> None) | Iindexed2scaled (scale1, n1), Iindexed2scaled (scale2, n2) -> - let scale_compare = scale1 - scale2 in - if scale_compare = 0 then Some (n2 - n1) else None + (* reg + reg * scale + displ *) + if not (Int.compare scale1 scale2 = 0) then None else + (match address_arg_offset_in_bytes 0, address_arg_offset_in_bytes 1 with + | Some arg0_offset, Some arg1_offset -> + Some (arg0_offset + (arg1_offset*scale1) + (n2 - n1)) + | (None, _|Some _, _) -> None) | Ibased _, _ -> None | Iindexed _, _ -> None | Iindexed2 _, _ -> None | Iscaled _, _ -> None | Iindexed2scaled _, _ -> None - let can_cross_loads_or_stores (specific_operation : specific_operation) = - match specific_operation with - | Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Isimd _ | Icldemote _ - | Iprefetch _ -> - false - | Ibswap _ | Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence - | Ipause -> - true - - let may_break_alloc_freshness (specific_operation : specific_operation) = - match specific_operation with - | Isimd _ -> true - | Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ | Isextend32 - | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence | Ipause | Icldemote _ - | Iprefetch _ -> - false +let isomorphic_specific_operation op1 op2 = + match op1, op2 with + | Ilea a1, Ilea a2 -> equal_addressing_mode_without_displ a1 a2 + | Istore_int (_n1, a1, is_assign1), Istore_int (_n2, a2, is_assign2) -> + equal_addressing_mode_without_displ a1 a2 && Bool.equal is_assign1 is_assign2 + | Ioffset_loc (_n1, a1), Ioffset_loc (_n2, a2) -> + equal_addressing_mode_without_displ a1 a2 + | Ifloatarithmem (w1, o1, a1), Ifloatarithmem (w2, o2, a2) -> + Cmm.equal_float_width w1 w2 && + equal_float_operation o1 o2 && + equal_addressing_mode_without_displ a1 a2 + | Ibswap { bitwidth = left }, Ibswap { bitwidth = right } -> + Int.equal (int_of_bswap_bitwidth left) (int_of_bswap_bitwidth right) + | Isextend32, Isextend32 -> + true + | Izextend32, Izextend32 -> + true + | Irdtsc, Irdtsc -> + true + | Irdpmc, Irdpmc -> + true + | Ilfence, Ilfence -> + true + | Isfence, Isfence -> + true + | Imfence, Imfence -> + true + | Ipause, Ipause -> true + | Icldemote x, Icldemote x' -> equal_addressing_mode_without_displ x x' + | Iprefetch { is_write = left_is_write; locality = left_locality; addr = left_addr; }, + Iprefetch { is_write = right_is_write; locality = right_locality; addr = right_addr; } -> + Bool.equal left_is_write right_is_write + && equal_prefetch_temporal_locality_hint left_locality right_locality + && equal_addressing_mode_without_displ left_addr right_addr + | Isimd l, Isimd r -> + Simd.equal_operation l r + | (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ | + Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence | + Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ -> + false diff --git a/backend/amd64/arch.mli b/backend/amd64/arch.mli index 27278669b45..91e3bbf348a 100644 --- a/backend/amd64/arch.mli +++ b/backend/amd64/arch.mli @@ -140,14 +140,15 @@ val operation_allocates : specific_operation -> bool val float_cond_and_need_swap : Lambda.float_comparison -> X86_ast.float_condition * bool +val isomorphic_specific_operation : specific_operation -> specific_operation -> bool (* addressing mode functions *) -val compare_addressing_mode_without_displ : addressing_mode -> addressing_mode -> int +val equal_addressing_mode_without_displ : addressing_mode -> addressing_mode -> bool -val compare_addressing_mode_displ : addressing_mode -> addressing_mode -> int option - -val addressing_offset_in_bytes : addressing_mode -> addressing_mode -> int option - -val can_cross_loads_or_stores : specific_operation -> bool - -val may_break_alloc_freshness : specific_operation -> bool +val addressing_offset_in_bytes + : addressing_mode + -> addressing_mode + -> arg_offset_in_bytes:('a -> 'a -> int option) + -> 'a array + -> 'a array + -> int option diff --git a/backend/amd64/simd_selection.ml b/backend/amd64/simd_selection.ml index d33c8aa0e7a..2955d27e6de 100644 --- a/backend/amd64/simd_selection.ml +++ b/backend/amd64/simd_selection.ml @@ -467,3 +467,387 @@ let () = Location.register_error_of_exn (function | Error err -> Some (Location.error_of_printer_file report_error err) | _ -> None) + +(* Vectorize operations *) + +let vector_width_in_bits = 128 + +(* CR-soon gyorsh: [vectorize_operation] is too long, refactor / split up. *) +let vectorize_operation (width_type : Vectorize_utils.Width_in_bits.t) + ~arg_count ~res_count (cfg_ops : Operation.t list) : + Vectorize_utils.Vectorized_instruction.t list option = + (* Assumes cfg_ops are isomorphic *) + let width_in_bits = Vectorize_utils.Width_in_bits.to_int width_type in + let length = List.length cfg_ops in + assert (length * width_in_bits = vector_width_in_bits); + let same_width memory_chunk = + Vectorize_utils.Width_in_bits.equal width_type + (Vectorize_utils.Width_in_bits.of_memory_chunk memory_chunk) + in + let make_default ~arg_count ~res_count operation : + Vectorize_utils.Vectorized_instruction.t list option = + Some + [ Vectorize_utils.Vectorized_instruction.make_default ~arg_count + ~res_count operation ] + in + let create_const_vec consts = + let highs, lows = Misc.Stdlib.List.split_at (length / 2) consts in + let pack_int64 nums = + let mask = + Int64.shift_right_logical Int64.minus_one (64 - width_in_bits) + in + List.fold_left + (fun target num -> + Int64.logor + (Int64.shift_left target width_in_bits) + (Int64.logand num mask)) + 0L nums + in + Operation.Const_vec128 { high = pack_int64 highs; low = pack_int64 lows } + |> make_default ~arg_count:0 ~res_count:1 + in + let add_op = + let sse_op = + match width_type with + | W128 -> assert false + | W64 -> Add_i64 + | W32 -> Add_i32 + | W16 -> Add_i16 + | W8 -> Add_i8 + in + Some (Operation.Specific (Isimd (SSE2 sse_op))) + in + let mul_op = + match width_type with + | W128 -> None + | W64 -> None + | W32 -> Some (Operation.Specific (Isimd (SSE41 Mullo_i32))) + | W16 -> Some (Operation.Specific (Isimd (SSE2 Mullo_i16))) + | W8 -> None + in + let vectorize_intop (intop : Simple_operation.integer_operation) = + match intop with + | Iadd -> Option.bind add_op (make_default ~arg_count ~res_count) + | Isub -> + let sse_op = + match width_type with + | W128 -> assert false + | W64 -> Sub_i64 + | W32 -> Sub_i32 + | W16 -> Sub_i16 + | W8 -> Sub_i8 + in + Operation.Specific (Isimd (SSE2 sse_op)) + |> make_default ~arg_count ~res_count + | Imul -> Option.bind mul_op (make_default ~arg_count ~res_count) + | Imulh { signed } -> ( + match width_type with + | W128 -> None + | W64 -> None + | W32 -> None + | W16 -> + if signed + then + Operation.Specific (Isimd (SSE2 Mulhi_i16)) + |> make_default ~arg_count ~res_count + else + Operation.Specific (Isimd (SSE2 Mulhi_unsigned_i16)) + |> make_default ~arg_count ~res_count + | W8 -> None) + | Iand -> + Operation.Specific (Isimd (SSE2 And_bits)) + |> make_default ~arg_count ~res_count + | Ior -> + Operation.Specific (Isimd (SSE2 Or_bits)) + |> make_default ~arg_count ~res_count + | Ixor -> + Operation.Specific (Isimd (SSE2 Xor_bits)) + |> make_default ~arg_count ~res_count + | Ilsl -> + let sse_op = + match width_type with + | W128 -> assert false + | W64 -> SLL_i64 + | W32 -> SLL_i32 + | W16 -> SLL_i16 + | W8 -> assert false + in + Operation.Specific (Isimd (SSE2 sse_op)) + |> make_default ~arg_count ~res_count + | Ilsr -> + let sse_op = + match width_type with + | W128 -> assert false + | W64 -> SRL_i64 + | W32 -> SRL_i32 + | W16 -> SRL_i16 + | W8 -> assert false + in + Operation.Specific (Isimd (SSE2 sse_op)) + |> make_default ~arg_count ~res_count + | Iasr -> + let sse_op = + match width_type with + | W128 -> assert false + | W64 -> assert false + | W32 -> SRA_i32 + | W16 -> SRA_i16 + | W8 -> assert false + in + Operation.Specific (Isimd (SSE2 sse_op)) + |> make_default ~arg_count ~res_count + | Icomp (Isigned intcomp) -> ( + match intcomp with + | Ceq -> + let sse_op = + match width_type with + | W128 -> assert false + | W64 -> SSE41 Cmpeq_i64 + | W32 -> SSE2 Cmpeq_i32 + | W16 -> SSE2 Cmpeq_i16 + | W8 -> SSE2 Cmpeq_i8 + in + Operation.Specific (Isimd sse_op) |> make_default ~arg_count ~res_count + | Cgt -> + let sse_op = + match width_type with + | W128 -> assert false + | W64 -> SSE42 Cmpgt_i64 + | W32 -> SSE2 Cmpgt_i32 + | W16 -> SSE2 Cmpgt_i16 + | W8 -> SSE2 Cmpgt_i8 + in + Operation.Specific (Isimd sse_op) |> make_default ~arg_count ~res_count + | Cne | Clt | Cle | Cge -> + None + (* These instructions seem to not have a simd counterpart yet, could + also implement as a combination of other instructions if needed in + the future *)) + | Idiv | Imod | Iclz _ | Ictz _ | Ipopcnt | Icomp (Iunsigned _) -> None + in + match List.hd cfg_ops with + | Move -> Operation.Move |> make_default ~arg_count ~res_count + | Const_int _ -> + let extract_const_int (op : Operation.t) = + match op with + | Const_int n -> Int64.of_nativeint n + | Move | Load _ | Store _ | Intop _ | Intop_imm _ | Specific _ | Alloc _ + | Reinterpret_cast _ | Static_cast _ | Spill | Reload | Const_float32 _ + | Const_float _ | Const_symbol _ | Const_vec128 _ | Stackoffset _ + | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ | Opaque + | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll -> + assert false + in + assert (arg_count = 0 && res_count = 1); + let consts = List.map extract_const_int cfg_ops in + create_const_vec consts + | Load { memory_chunk; addressing_mode; mutability; is_atomic } -> + if not (same_width memory_chunk) + then None + else + let num_args_addressing = Arch.num_args_addressing addressing_mode in + assert (arg_count = num_args_addressing && res_count = 1); + let operation = + Operation.Load + { memory_chunk = Onetwentyeight_unaligned; + addressing_mode; + mutability; + is_atomic + } + in + Some + [ { operation; + arguments = + Array.init num_args_addressing (fun i -> + Vectorize_utils.Vectorized_instruction.Original i); + results = [| Result 0 |] + } ] + | Store (memory_chunk, addressing_mode, is_assignment) -> + if not (same_width memory_chunk) + then None + else + let num_args_addressing = Arch.num_args_addressing addressing_mode in + assert (arg_count = num_args_addressing + 1 && res_count = 0); + let operation = + Operation.Store + (Onetwentyeight_unaligned, addressing_mode, is_assignment) + in + Some + [ { operation; + arguments = + Array.append + [| Vectorize_utils.Vectorized_instruction.Argument 0 |] + (Array.init num_args_addressing (fun i -> + Vectorize_utils.Vectorized_instruction.Original (i + 1))); + results = [||] + } ] + | Intop intop -> vectorize_intop intop + | Intop_imm (intop, _) -> ( + let extract_intop_imm_int (op : Operation.t) = + match op with + | Intop_imm (_, n) -> Int64.of_int n + | Move | Load _ | Store _ | Intop _ | Specific _ | Alloc _ + | Reinterpret_cast _ | Static_cast _ | Spill | Reload | Const_int _ + | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _ + | Stackoffset _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ + | Opaque | Begin_region | End_region | Name_for_debugger _ | Dls_get + | Poll -> + assert false + in + let consts = List.map extract_intop_imm_int cfg_ops in + match create_const_vec consts, vectorize_intop intop with + | Some [const_instruction], Some [intop_instruction] -> + if Array.length const_instruction.results = 1 + && Array.length intop_instruction.arguments = 2 + then ( + assert (arg_count = 1 && res_count = 1); + const_instruction.results.(0) + <- Vectorize_utils.Vectorized_instruction.New 0; + intop_instruction.arguments.(1) + <- Vectorize_utils.Vectorized_instruction.New 0; + Some [const_instruction; intop_instruction]) + else None + | _ -> None) + | Specific op -> ( + match op with + | Ilea addressing_mode -> ( + let extract_scale_displ (op : Operation.t) = + match op with + | Specific spec_op -> ( + match spec_op with + | Ilea addressing_mode -> ( + match addressing_mode with + | Iindexed displ -> None, Some displ + | Iindexed2 displ -> None, Some displ + | Iscaled (scale, displ) -> Some scale, Some displ + | Iindexed2scaled (scale, displ) -> Some scale, Some displ + | Ibased _ -> None, None) + | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ + | Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence + | Imfence | Ipause | Isimd _ | Iprefetch _ | Icldemote _ -> + assert false) + | Move | Load _ | Store _ | Intop _ | Intop_imm _ | Alloc _ + | Reinterpret_cast _ | Static_cast _ | Spill | Reload | Const_int _ + | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _ + | Stackoffset _ | Intop_atomic _ | Floatop _ | Csel _ + | Probe_is_enabled _ | Opaque | Begin_region | End_region + | Name_for_debugger _ | Dls_get | Poll -> + assert false + in + let get_scale op = + match extract_scale_displ op with + | Some scale, _ -> scale |> Int64.of_int + | _ -> assert false + in + let get_displ op = + match extract_scale_displ op with + | _, Some displ -> displ |> Int64.of_int + | _ -> assert false + in + let make_move arg res = + { Vectorize_utils.Vectorized_instruction.operation = Move; + arguments = [| arg |]; + results = [| res |] + } + in + let make_binary_operation arg_0 arg_1 res operation = + { Vectorize_utils.Vectorized_instruction.operation; + arguments = [| arg_0; arg_1 |]; + results = [| res |] + } + in + let make_const res consts = + match create_const_vec consts with + | Some [const_instruction] -> + assert ( + Array.length const_instruction.arguments = 0 + && Array.length const_instruction.results = 1); + const_instruction.results.(0) <- res; + const_instruction + | _ -> assert false + in + match addressing_mode with + | Iindexed _ -> ( + match add_op with + | Some add -> + assert (arg_count = 1 && res_count = 1); + let displs = List.map get_displ cfg_ops in + (* reg + displ *) + Some + [ make_move (Argument 0) (Result 0); + make_const (New 0) displs; + make_binary_operation (Result 0) (New 0) (Result 0) add ] + | None -> None) + | Iindexed2 _ -> ( + match add_op with + | Some add -> + assert (arg_count = 2 && res_count = 1); + let displs = List.map get_displ cfg_ops in + (* reg + reg + displ *) + Some + [ make_move (Argument 0) (Result 0); + make_binary_operation (Result 0) (Argument 1) (Result 0) add; + make_const (New 0) displs; + make_binary_operation (Result 0) (New 0) (Result 0) add ] + | None -> None) + | Iscaled _ -> ( + match add_op, mul_op with + | Some add, Some mul -> + assert (arg_count = 1 && res_count = 1); + let scales = List.map get_scale cfg_ops in + let displs = List.map get_displ cfg_ops in + (* reg * scale + displ *) + Some + [ make_move (Argument 0) (Result 0); + make_const (New 0) scales; + make_binary_operation (Result 0) (New 0) (Result 0) mul; + make_const (New 1) displs; + make_binary_operation (Result 0) (New 1) (Result 0) add ] + | _ -> None) + | Iindexed2scaled _ -> ( + match add_op, mul_op with + | Some add, Some mul -> + assert (arg_count = 2 && res_count = 1); + let scales = List.map get_scale cfg_ops in + let displs = List.map get_displ cfg_ops in + (* reg + reg * scale + displ *) + Some + [ make_move (Argument 1) (Result 0); + make_const (New 0) scales; + make_binary_operation (Result 0) (New 0) (Result 0) mul; + make_binary_operation (Result 0) (Argument 0) (Result 0) add; + make_const (New 1) displs; + make_binary_operation (Result 0) (New 1) (Result 0) add ] + | _ -> None) + | Ibased _ -> None) + | Isextend32 -> ( + match width_type with + | W128 -> None + | W64 -> + Operation.Specific (Isimd (SSE41 I32_sx_i64)) + |> make_default ~arg_count ~res_count + | W32 -> + None + (* If the upper bits of the original register containing the smaller + register is determined to be unused without relying on this file, + these can also be vectorized to be a move *) + | W16 -> None + | W8 -> None) + | Izextend32 -> ( + match width_type with + | W128 -> None + | W64 -> + Operation.Specific (Isimd (SSE41 I32_zx_i64)) + |> make_default ~arg_count ~res_count + | W32 -> None (* See previous comment *) + | W16 -> None + | W8 -> None) + | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ | Irdtsc + | Irdpmc | Ilfence | Isfence | Imfence | Ipause | Isimd _ | Iprefetch _ + | Icldemote _ -> + None) + | Alloc _ | Reinterpret_cast _ | Static_cast _ | Spill | Reload + | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _ + | Stackoffset _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ + | Opaque | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll -> + None diff --git a/backend/amd64/vectorize_specific.ml b/backend/amd64/vectorize_specific.ml new file mode 100644 index 00000000000..2f535ca2834 --- /dev/null +++ b/backend/amd64/vectorize_specific.ml @@ -0,0 +1,53 @@ +(* Keep in sync with [Arch.operation_is_pure], [Arch.operation_can_raise], + [Arch.operation_allocates]. *) +module Memory_access = Vectorize_utils.Memory_access + +let memory_access : Arch.specific_operation -> Memory_access.t option = + fun op -> + let create ?first_memory_arg_index desc = + Some (Memory_access.create ?first_memory_arg_index desc) + in + match op with + | Istore_int (_n, addressing_mode, is_assignment) -> + let desc = + Memory_access.Write + { width_in_bits = W64; + addressing_mode; + init_or_assign = + (if is_assignment then Assignment else Initialization) + } + in + create ~first_memory_arg_index:0 desc + | Ifloatarithmem (float_width, _float_op, addressing_mode) -> + let width_in_bits : Vectorize_utils.Width_in_bits.t = + match float_width with Float64 -> W64 | Float32 -> W32 + in + let is_mutable = + (* CR-someday gyorsh: conservative, propagate mutability of Ifloatarithmem + from selection to make it precise. *) + true + in + let desc = + Memory_access.Read + { width_in_bits; addressing_mode; is_mutable; is_atomic = false } + in + create ~first_memory_arg_index:1 desc + | Ioffset_loc (_n, addressing_mode) -> + let desc = + Memory_access.Read_and_write + { width_in_bits = W64; addressing_mode; is_atomic = false } + in + create desc + | Iprefetch { is_write = _; locality = _; addr = _ } -> + (* Conservative, to prevent reordering anything around this instruction. + Using [addressing_mode] is tricky because it need not be the start of the + prefetch cache line and the interval would depend on cache line size. *) + create Memory_access.Arbitrary + | Icldemote _ | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence | Ipause -> + (* Conservative, don't reorder around timing or ordering instructions. *) + create Memory_access.Arbitrary + | Isimd op -> + (* Conservative. we don't have any simd operations with memory operations at + the moment. *) + if Simd.is_pure op then None else create Memory_access.Arbitrary + | Ilea _ | Ibswap _ | Isextend32 | Izextend32 -> None diff --git a/backend/arm64/arch.ml b/backend/arm64/arch.ml index 8b83c064fc6..d8a4a1adafb 100644 --- a/backend/arm64/arch.ml +++ b/backend/arm64/arch.ml @@ -213,6 +213,9 @@ let equal_specific_operation left right = | Imuladd | Imulsub | Inegmulf | Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _), _ -> false +let isomorphic_specific_operation op1 op2 = + equal_specific_operation op1 op2 + (* Recognition of logical immediate arguments *) (* An automaton to recognize ( 0+1+0* | 1+0+1* ) @@ -333,32 +336,12 @@ let operation_allocates = function | Ibswap _ -> false (* See `amd64/arch.ml`. *) - -let compare_addressing_mode_without_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = +let equal_addressing_mode_without_displ (addressing_mode_1: addressing_mode) + (addressing_mode_2 : addressing_mode) = match addressing_mode_1, addressing_mode_2 with - | Iindexed _, Iindexed _ -> 0 - | Iindexed _ , _ -> -1 - | _, Iindexed _ -> 1 - | Ibased (var1, _), Ibased (var2, _) -> String.compare var1 var2 + | Iindexed _, Iindexed _ -> true + | Ibased (var1, _), Ibased (var2, _) -> String.equal var1 var2 + | (Iindexed _ | Ibased _), _ -> false -let compare_addressing_mode_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = - match addressing_mode_1, addressing_mode_2 with - | Iindexed n1, Iindexed n2 -> Some (Int.compare n1 n2) - | Ibased (var1, n1), Ibased (var2, n2) -> - if String.compare var1 var2 = 0 then Some (Int.compare n1 n2) else None - | Iindexed _ , _ -> None - | Ibased _ , _ -> None - -let addressing_offset_in_bytes (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = None - -let can_cross_loads_or_stores (specific_operation : specific_operation) = - match specific_operation with - | Ifar_poll _ | Ifar_alloc _ | Ishiftarith _ | Imuladd | Imulsub | Inegmulf | Imuladdf - | Inegmuladdf | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _ -> - true - -let may_break_alloc_freshness (specific_operation : specific_operation) = - match specific_operation with - | Ifar_poll _ | Ifar_alloc _ | Ishiftarith _ | Imuladd | Imulsub | Inegmulf | Imuladdf - | Inegmuladdf | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _ -> - false +let addressing_offset_in_bytes _ _ ~arg_offset_in_bytes:_ _ _ = + None (* conservative *) diff --git a/backend/arm64/arch.mli b/backend/arm64/arch.mli index d4292c59b24..e542b5df5dd 100644 --- a/backend/arm64/arch.mli +++ b/backend/arm64/arch.mli @@ -116,14 +116,15 @@ val operation_allocates : specific_operation -> bool val operation_can_raise : specific_operation -> bool -(* See `amd64/arch.mli`. *) - -val compare_addressing_mode_without_displ : addressing_mode -> addressing_mode -> int - -val compare_addressing_mode_displ : addressing_mode -> addressing_mode -> int option +val isomorphic_specific_operation : specific_operation -> specific_operation -> bool -val addressing_offset_in_bytes : addressing_mode -> addressing_mode -> int option - -val can_cross_loads_or_stores : specific_operation -> bool - -val may_break_alloc_freshness : specific_operation -> bool +(* See `amd64/arch.mli`. *) +val equal_addressing_mode_without_displ : addressing_mode -> addressing_mode -> bool + +val addressing_offset_in_bytes + : addressing_mode + -> addressing_mode + -> arg_offset_in_bytes:('a -> 'a -> int option) + -> 'a array + -> 'a array + -> int option diff --git a/backend/arm64/simd_selection.ml b/backend/arm64/simd_selection.ml index 5fae7825965..3e18e247129 100644 --- a/backend/arm64/simd_selection.ml +++ b/backend/arm64/simd_selection.ml @@ -19,3 +19,11 @@ let select_operation _ = None let pseudoregs_for_operation _ arg res = arg, res + +(* See `amd64/simd_selection.ml`. *) + +let vector_width_in_bits = 128 + +let vectorize_operation _ ~arg_count:_ ~res_count:_ (_ : Operation.t list) : + Vectorize_utils.Vectorized_instruction.t list option = + None diff --git a/backend/arm64/vectorize_specific.ml b/backend/arm64/vectorize_specific.ml new file mode 100644 index 00000000000..5eb1ff3886e --- /dev/null +++ b/backend/arm64/vectorize_specific.ml @@ -0,0 +1,21 @@ +(* Keep in sync with [Arch.operation_is_pure], [Arch.operation_can_raise], + [Arch.operation_allocates]. *) +module Memory_access = Vectorize_utils.Memory_access + +let memory_access : Arch.specific_operation -> Memory_access.t option = + fun op -> + let create ?first_memory_arg_index desc = + Some (Memory_access.create ?first_memory_arg_index desc) + in + match op with + | Ifar_poll _ -> + (* Conservative, don't reorder across poll instructions. In practice, there + are not many poll instructions present at this stage, because poll + insertion pass currently happens after vectorize. *) + create Arbitrary + | Ifar_alloc _ -> create Alloc + | Ishiftarith _ | Imuladd | Imulsub | Inegmulf | Imuladdf | Inegmuladdf + | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _ -> + (* Conservative. we don't have any specific operations with memory + operations at the moment. *) + if Arch.operation_is_pure op then None else create Memory_access.Arbitrary diff --git a/backend/cfg/cfg_polling.ml b/backend/cfg/cfg_polling.ml index 4e9cf43df9f..d148cef743c 100644 --- a/backend/cfg/cfg_polling.ml +++ b/backend/cfg/cfg_polling.ml @@ -194,7 +194,10 @@ let instr_cfg_with_layout : let cfg = Cfg_with_layout.cfg cfg_with_layout in Cfg_loop_infos.EdgeSet.fold (fun { Cfg_loop_infos.Edge.src; dst } added_poll -> - let needs_poll = exists_unsafe_path cfg ~safe_map ~from:dst ~to_:src in + let needs_poll = + (not (Label.Tbl.find safe_map src)) + && exists_unsafe_path cfg ~safe_map ~from:dst ~to_:src + in if needs_poll then ( let after = Cfg.get_block_exn cfg src in diff --git a/backend/cfg/cfg_with_layout.ml b/backend/cfg/cfg_with_layout.ml index 92612c422ac..7815c31bc41 100644 --- a/backend/cfg/cfg_with_layout.ml +++ b/backend/cfg/cfg_with_layout.ml @@ -131,8 +131,11 @@ let dump ppf t ~msg = let print_block label = let block = Label.Tbl.find t.cfg.blocks label in fprintf ppf "\n%a:\n" Label.format label; - DLL.iter ~f:(fprintf ppf "%a\n" Cfg.print_basic) block.body; - Cfg.print_terminator ppf block.terminator; + let pp_with_id ppf ~pp (instr : _ Cfg.instruction) = + fprintf ppf "(id:%d) %a\n" instr.id pp instr + in + DLL.iter ~f:(pp_with_id ppf ~pp:Cfg.print_basic) block.body; + pp_with_id ppf ~pp:Cfg.print_terminator block.terminator; fprintf ppf "\npredecessors:"; Label.Set.iter (fprintf ppf " %a" Label.format) block.predecessors; fprintf ppf "\nsuccessors:"; diff --git a/backend/cfg/vectorize.ml b/backend/cfg/vectorize.ml index 10eede1b022..a225018a57f 100644 --- a/backend/cfg/vectorize.ml +++ b/backend/cfg/vectorize.ml @@ -2,62 +2,216 @@ (* Finds independent scalar operations within the same basic block and tries to use vector operations if possible *) -(* CR-soon tip: add documentation *) +(* CR gyorsh: how does the info from [reg_map] flow between blocks? *) module DLL = Flambda_backend_utils.Doubly_linked_list -let ( << ) f g x = f (g x) +module State : sig + type t + + type live_regs = Reg.Set.t + + val create : Format.formatter -> Cfg_with_layout.t -> t + + val next_available_instruction : t -> int + + val liveness : t -> int -> live_regs + + val dump_debug : t -> ('a, Format.formatter, unit) format -> 'a + + val dump : t -> ('a, Format.formatter, unit) format -> 'a + + val extra_debug : bool + + val fun_name : t -> string + + val fun_dbg : t -> Debuginfo.t +end = struct + type t = + { ppf_dump : Format.formatter; + mutable max_instruction_id : int; + cfg_with_infos : Cfg_with_infos.t Lazy.t; + cfg_with_layout : Cfg_with_layout.t + } + + type live_regs = Reg.Set.t + + let fun_name t = Cfg.fun_name (Cfg_with_layout.cfg t.cfg_with_layout) + + let fun_dbg t = (Cfg_with_layout.cfg t.cfg_with_layout).fun_dbg + + let next_available_instruction t = + let id = t.max_instruction_id + 1 in + t.max_instruction_id <- id; + id + + let init_instructon_max_id cl = + (* CR gyorsh: Duplicated from backend/regalloc/regalloc_utils.ml. Should + probably move it to Cfg or Cfg_with_infos. *) + let max_id = ref Int.min_int in + let update_max_id (instr : _ Cfg.instruction) : unit = + max_id := Int.max !max_id instr.id + in + Cfg_with_layout.iter_instructions cl ~instruction:update_max_id + ~terminator:update_max_id; + !max_id + + let create ppf_dump cl = + (* CR-someday tip: the function may someday take a cfg_with_infos instead of + creating a new one *) + { ppf_dump; + max_instruction_id = init_instructon_max_id cl; + cfg_with_layout = cl; + cfg_with_infos = lazy (Cfg_with_infos.make cl) + } + + let liveness t id = + Cfg_with_infos.(liveness_find (Lazy.force t.cfg_with_infos) id).before + + let extra_debug = true + + let dump_if c t = + if c && !Flambda_backend_flags.dump_vectorize + then Format.fprintf t.ppf_dump + else Format.ifprintf t.ppf_dump + + let dump_debug t = dump_if extra_debug t + + let dump t = dump_if true t +end + +module Substitution : sig + (* CR-someday gyorsh: should be factored out with + [Regalloc_utils.Substitution]. *) + type t + + val create : int -> t + + val get_reg_exn : t -> Reg.t -> Reg.t + + val get_reg_opt : t -> Reg.t -> Reg.t option + + (* CR gyorsh: for SSA *) + (* (** [fresh_reg t r typ] assumes that [r] is not mapped, creates a fresh register [r'] of + * type [typ] and maps [r] to [r']. *) + * val fresh_reg : t -> Reg.t -> Cmm.machtype_component -> Reg.t *) -let vector_width_in_bytes = 16 + (** [fresh_reg_for_pack t pack typ] assumes that none of the registers in [pack] are + mapped, creates a fresh register [r'] of type [typ] and maps all registers in [pack] + to [r]. *) + val fresh_reg_for_pack : t -> Reg.t list -> Cmm.machtype_component -> unit +end = struct + type t = Reg.t Reg.Tbl.t + + let create n = Reg.Tbl.create n + + let get_reg_exn t (reg : Reg.t) = Reg.Tbl.find t reg + + let get_reg_opt t (reg : Reg.t) = Reg.Tbl.find_opt t reg + + let fresh_reg t reg machtype_component = + match get_reg_opt t reg with + | None -> + let new_reg = Reg.create machtype_component in + Reg.Tbl.add t reg new_reg; + new_reg + | Some old_reg -> + Misc.fatal_errorf "register %a is already mapped to %a" Printreg.reg reg + Printreg.reg old_reg + + let fresh_reg_for_pack t regs machtype_component = + match regs with + | [] -> + Misc.fatal_error "State.fresh_reg_for_group: expects a non-empty group." + | hd :: tl -> + let new_reg = fresh_reg t hd machtype_component in + let add reg = + match get_reg_opt t reg with + | None -> Reg.Tbl.add t reg new_reg + | Some old_reg -> + if Reg.same new_reg old_reg + then + (* same register may appear multiple times in the group, this is + fine here but may not be desirable in the client code and should + be checked for there. *) + Misc.fatal_errorf + "fresh_reg_for_group: duplicate register %a in the group" + Printreg.reg reg + else + Misc.fatal_errorf + "fresh_reg_for_group: register %a is already mapped to %a" + Printreg.reg reg Printreg.reg old_reg + in + List.iter add tl +end module Instruction : sig (* CR-someday tip: consider moving this to cfg or at least have something similar there *) + (* CR gyorsh: We don't really need the terminator when vectorizing basic + blocks only, and removing it would simplify the code and reduce + allocation. *) module Id : sig type t include Identifiable.S with type t := t - - val to_int : t -> int end - type t = - | Basic of Cfg.basic Cfg.instruction - | Terminator of Cfg.terminator Cfg.instruction + type t + + val basic : Cfg.basic Cfg.instruction -> t + + val terminator : Cfg.terminator Cfg.instruction -> t val id : t -> Id.t + val equal_id : t -> t -> bool + val arguments : t -> Reg.t Array.t val results : t -> Reg.t Array.t val destroyed : t -> Reg.t Array.t - val print : Format.formatter -> t -> unit + val op : t -> Operation.t option - val is_store : t -> bool + val have_isomorphic_op : t -> t -> bool - val is_alloc : t -> bool + val stack_offset : t -> int + + val print : Format.formatter -> t -> unit - val can_cross_loads_or_stores : t -> bool + val print_id : Format.formatter -> t -> unit - val may_break_alloc_freshness : t -> bool + val copy : + Cfg.basic Cfg.instruction -> + arg:Reg.t array -> + res:Reg.t array -> + id:int -> + desc:Cfg.basic -> + Cfg.basic Cfg.instruction end = struct module Id = struct include Numbers.Int - let to_int t = t + let print ppf t = Format.fprintf ppf "(id:%d)" t end type t = | Basic of Cfg.basic Cfg.instruction | Terminator of Cfg.terminator Cfg.instruction + let basic i = Basic i + + let terminator i = Terminator i + let id (instruction : t) : Id.t = match instruction with | Basic instruction -> instruction.id | Terminator instruction -> instruction.id + let equal_id t1 t2 = Id.equal (id t1) (id t2) + let arguments (instruction : t) : Reg.t Array.t = match instruction with | Basic instruction -> instruction.arg @@ -73,808 +227,2857 @@ end = struct | Basic instruction -> Proc.destroyed_at_basic instruction.desc | Terminator instruction -> Proc.destroyed_at_terminator instruction.desc - let print ppf (instruction : t) : unit = + let stack_offset (instruction : t) : int = match instruction with - | Basic i -> Cfg.print_basic ppf i - | Terminator i -> Cfg.print_terminator ppf i + | Basic instruction -> instruction.stack_offset + | Terminator instruction -> instruction.stack_offset - let is_store (instruction : t) = + let op (instruction : t) : Operation.t option = match instruction with | Basic basic_instruction -> ( let desc = basic_instruction.desc in match desc with - | Op op -> ( - match op with - | Store _ -> true - | Load _ | Alloc _ | Move | Reinterpret_cast _ | Static_cast _ | Spill - | Reload | Const_int _ | Const_float32 _ | Const_float _ - | Const_symbol _ | Const_vec128 _ | Stackoffset _ | Intop _ - | Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ - | Opaque | Begin_region | End_region | Specific _ | Name_for_debugger _ - | Dls_get | Poll -> - false) - | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> false - ) - | Terminator _ -> false + | Op op -> Some op + | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> None) + | Terminator _ -> None + + let copy (i : Cfg.basic Cfg.instruction) ~arg ~res ~id ~desc = + { i with desc; arg; res; id } + + (* [op_isomorphic] is used to identify candidate operations that may be + grouped into a vector operation. It returns true if two operations have the + same opcode but allows different constant arguments or different constant + displacement in the address calculation. Subsequent checks ensure that the + corresponding vector instruction is supported by the target architecture, + where additional constraints on the constants may be needed (see + [Operation.is_adjacent] and [Simd_selection.vectorize_operation]. It is + safe to return [false] conservatively, but will prevent vectorization. + [Arch.isomorphic_specific_operation] may also conservatively return false + even if some operations are isomorphic. [Arch.equal_specific_operation] can + be used as a conservative approximation of [Arch.isomorphic_specific + operation], for example for SIMD operations with different constants, such + as [Simd.Clmul_64] in amd64. *) + let op_isomorphic (op1 : Operation.t) (op2 : Operation.t) = + match op1, op2 with + | Move, Move | Spill, Spill | Reload, Reload -> true + | Const_int _, Const_int _ + | Const_float32 _, Const_float32 _ + | Const_float _, Const_float _ + | Const_symbol _, Const_symbol _ + | Const_vec128 _, Const_vec128 _ -> + true + | ( Load + { memory_chunk = memory_chunk1; + addressing_mode = addressing_mode1; + mutability = mutability1; + is_atomic = is_atomic1 + }, + Load + { memory_chunk = memory_chunk2; + addressing_mode = addressing_mode2; + mutability = mutability2; + is_atomic = is_atomic2 + } ) -> + Cmm.equal_memory_chunk memory_chunk1 memory_chunk2 + && Arch.equal_addressing_mode_without_displ addressing_mode1 + addressing_mode2 + && mutability1 = mutability2 && is_atomic1 = is_atomic2 + | ( Store (memory_chunk1, addressing_mode1, is_assignment1), + Store (memory_chunk2, addressing_mode2, is_assignment2) ) -> + Cmm.equal_memory_chunk memory_chunk1 memory_chunk2 + && Arch.equal_addressing_mode_without_displ addressing_mode1 + addressing_mode2 + && is_assignment1 = is_assignment2 + | Intop intop1, Intop intop2 -> + Simple_operation.equal_integer_operation intop1 intop2 + | Intop_imm (intop1, _), Intop_imm (intop2, _) -> + Simple_operation.equal_integer_operation intop1 intop2 + | Floatop (width1, floatop1), Floatop (width2, floatop2) -> + Simple_operation.equal_float_width width1 width2 + && Simple_operation.equal_float_operation floatop1 floatop2 + | Specific specific_operation1, Specific specific_operation2 -> + Arch.isomorphic_specific_operation specific_operation1 specific_operation2 + | Move, _ + | Spill, _ + | Reload, _ + | Const_int _, _ + | Const_float32 _, _ + | Const_float _, _ + | Const_symbol _, _ + | Const_vec128 _, _ + | Stackoffset _, _ + | Load _, _ + | Store _, _ + | Intop _, _ + | Intop_imm _, _ + | Intop_atomic _, _ + | Floatop _, _ + | Csel _, _ + | Reinterpret_cast _, _ + | Static_cast _, _ + | Probe_is_enabled _, _ + | Opaque, _ + | Begin_region, _ + | End_region, _ + | Specific _, _ + | Name_for_debugger _, _ + | Dls_get, _ + | Poll, _ + | Alloc _, _ -> + false + + let have_isomorphic_op instruction1 instruction2 = + match op instruction1, op instruction2 with + | Some op1, Some op2 -> op_isomorphic op1 op2 + | _ -> false + + let print ppf t = + match t with + | Basic i -> Cfg.print_basic ppf i + | Terminator i -> Cfg.print_terminator ppf i - let is_alloc (instruction : t) = - match instruction with - | Basic basic_instruction -> ( - let desc = basic_instruction.desc in - match desc with - | Op op -> ( - match op with - | Alloc _ -> true - | Load _ | Store _ | Move | Reinterpret_cast _ | Static_cast _ | Spill - | Reload | Const_int _ | Const_float32 _ | Const_float _ - | Const_symbol _ | Const_vec128 _ | Stackoffset _ | Intop _ - | Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ - | Opaque | Begin_region | End_region | Specific _ | Name_for_debugger _ - | Dls_get | Poll -> - false) - | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> false - ) - | Terminator _ -> false - - let can_cross_loads_or_stores (instruction : t) = - (* CR-someday tip: some instructions may or may not cause issues for going - across a load or a store, for simplicity's sake, let's just return false - and not let them go across for now, but better handling can be added in - the future. Also, loads from an immuntable block has no coeffects and may - have less restrictions *) - match instruction with - | Basic basic_instruction -> ( - let desc = basic_instruction.desc in - match desc with - | Op op -> ( - match op with - | Load _ | Store _ | Intop_atomic _ | Alloc _ | Poll | Opaque - | Begin_region | End_region -> - false - | Specific specific_operation -> - Arch.can_cross_loads_or_stores specific_operation - | Move | Reinterpret_cast _ | Static_cast _ | Spill | Reload - | Const_int _ | Const_float32 _ | Const_float _ | Const_symbol _ - | Const_vec128 _ | Stackoffset _ | Intop _ | Intop_imm _ | Floatop _ - | Csel _ | Probe_is_enabled _ | Name_for_debugger _ | Dls_get -> - true) - | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> true) - | Terminator _ -> false - - let may_break_alloc_freshness (instruction : t) = - match instruction with - | Basic basic_instruction -> ( - let desc = basic_instruction.desc in - match desc with - | Op op -> ( - match op with - | Load _ | Store _ -> true - | Specific specific_operation -> - Arch.may_break_alloc_freshness specific_operation - | Alloc _ | Move | Reinterpret_cast _ | Static_cast _ | Spill | Reload - | Const_int _ | Const_float32 _ | Const_float _ | Const_symbol _ - | Const_vec128 _ | Stackoffset _ | Intop _ | Intop_imm _ - | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ | Opaque - | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll -> - false) - | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> false - ) - | Terminator _ -> false + let print ppf t = Format.fprintf ppf "%a %a" Id.print (id t) print t + + let print_id ppf t = Format.fprintf ppf "%a" Id.print (id t) end -module Dependency_graph : sig - (* The dependency graph shows dependencies between instructions within the - same basic block *) +module Block : sig type t - val from_block : Cfg.basic_block -> t + val create : Cfg.basic_block -> State.t -> t - val get_all_dependencies_of_arg : - t -> Instruction.Id.t -> arg_i:int -> Instruction.Id.Set.t + val body : t -> Cfg.basic_instruction_list - val dump : Format.formatter -> t -> Cfg.basic_block -> unit -end = struct - module Node = struct - module Reg_node = struct - type t = - { reg : Reg.t; - direct_dependency : Instruction.Id.t option - (* the most recent instruction in this basic block that may change - the value of the argument *) - } + val terminator : t -> Instruction.t - let init reg : t = { reg; direct_dependency = None } - end + (** original size of the block before vectorization *) + val size : t -> int - type t = - { instruction : Instruction.t; - reg_nodes : Reg_node.t array; - direct_dependencies : Instruction.Id.Set.t; - (* direct dependencies of all arguments of this instruction *) - all_dependencies : Instruction.Id.Set.t; - (* direct dependencies of this instruction and all dependencies of - each direct dependency of this instruction *) - is_direct_dependency_of : Instruction.Id.Set.t - (* all instructions that have this instruction as a direct - dependency *) - } + val find : t -> Instruction.Id.t -> Instruction.t - let init instruction : t = - let arguments = Instruction.arguments instruction in - { instruction; - reg_nodes = - Array.init (Array.length arguments) (fun i -> - arguments.(i) |> Reg_node.init); - direct_dependencies = Instruction.Id.Set.empty; - all_dependencies = Instruction.Id.Set.empty; - is_direct_dependency_of = Instruction.Id.Set.empty - } - end + (** [find_last_instruction t instrs] returns instruction [i] + from [instrs] such that [i] appears after + all other instructions from [instrs] according to the order of instructions + in this basic block. Raises if [instrs] is empty. *) + val find_last_instruction : t -> Instruction.Id.t list -> Instruction.t + + val get_live_regs_before_terminator : t -> State.live_regs + + val state : t -> State.t + + val reg_map : t -> Substitution.t + + val start : t -> Label.t + + (** [pos t id] returns the original position of [id] instruction within the body of [t]. + Raises if [id] is not in the body. *) + val pos : t -> Instruction.Id.t -> int +end = struct + type t = + { block : Cfg.basic_block; + id_to_instructions : Instruction.t Instruction.Id.Tbl.t; + id_to_body_pos : int Instruction.Id.Tbl.t; + size : int; + state : State.t; + reg_map : Substitution.t + } + + let state t = t.state - type t = Node.t Instruction.Id.Tbl.t + let body t = t.block.body - let add = Instruction.Id.Tbl.add + let terminator t = Instruction.terminator t.block.terminator - let find = Instruction.Id.Tbl.find + let size t = t.size - let replace = Instruction.Id.Tbl.replace + let start (t : t) = t.block.start - let init ~size : t = Instruction.Id.Tbl.create size + let reg_map t = t.reg_map - let get_all_dependencies dependency_graph id = - let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in - node.all_dependencies + let find t id = Instruction.Id.Tbl.find t.id_to_instructions id - let get_all_dependencies_of_arg dependency_graph id ~arg_i = - let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in - match node.reg_nodes.(arg_i).direct_dependency with - | None -> Instruction.Id.Set.empty - | Some direct_dependency -> - get_all_dependencies dependency_graph direct_dependency - |> Instruction.Id.Set.add direct_dependency + let pos t id = Instruction.Id.Tbl.find t.id_to_body_pos id - let from_block (block : Cfg.basic_block) = - let dependency_graph = init ~size:(DLL.length block.body) in - let is_changed_in instruction reg = - Array.exists (Reg.same reg) (Instruction.results instruction) - || Array.exists (Reg.same reg) (Instruction.destroyed instruction) + let create (block : Cfg.basic_block) state = + let size = DLL.length block.body + 1 in + let id_to_instructions = Instruction.Id.Tbl.create size in + let add i = + Instruction.Id.Tbl.add id_to_instructions (Instruction.id i) i in - let latest_change ~(current : Instruction.Id.t) (reg : Reg.t) = - let starting_cell = - match - DLL.find_cell_opt block.body ~f:(fun instruction -> - Basic instruction |> Instruction.id - |> Instruction.Id.equal current) - with - | None -> DLL.last_cell block.body - | Some current_cell -> DLL.prev current_cell - in - let rec find_latest_change cell_option = + DLL.iter block.body ~f:(fun i -> add (Instruction.basic i)); + add (Instruction.terminator block.terminator); + let id_to_body_pos = Instruction.Id.Tbl.create size in + DLL.iteri block.body ~f:(fun pos i -> + Instruction.Id.Tbl.add id_to_body_pos + (Instruction.id (Instruction.basic i)) + pos); + let reg_map = Substitution.create 3 in + { block; size; id_to_instructions; id_to_body_pos; state; reg_map } + + let get_live_regs_before_terminator t = + State.liveness t.state t.block.terminator.id + + let find_last_instruction t instructions = + let instruction_set = Instruction.Id.Set.of_list instructions in + let terminator = terminator t in + if Instruction.Id.Set.mem (Instruction.id terminator) instruction_set + then terminator + else + let body = t.block.body in + let rec find_last cell_option = match cell_option with - | None -> None + | None -> + Misc.fatal_errorf "Vectorizer.find_last_instruction in block %a" + Label.print t.block.start () | Some cell -> - let instruction = Instruction.Basic (DLL.value cell) in - if is_changed_in instruction reg - then Some instruction - else find_latest_change (DLL.prev cell) + let current_instruction = Instruction.basic (DLL.value cell) in + let current_instruction_id = Instruction.id current_instruction in + if Instruction.Id.Set.exists + (Instruction.Id.equal current_instruction_id) + instruction_set + then current_instruction + else find_last (DLL.prev cell) in - find_latest_change starting_cell - in - let add_arg_dependency instruction arg_i arg = - let id = Instruction.id instruction in - let dependency = latest_change ~current:id arg in - let node = find dependency_graph id in - let reg_node = node.reg_nodes.(arg_i) in - node.reg_nodes.(arg_i) - <- { reg_node with - direct_dependency = - Option.fold ~none:None - ~some:(Option.some << Instruction.id) - dependency - } - in - let add_arg_dependencies (instruction : Instruction.t) = - Array.iteri - (add_arg_dependency instruction) - (Instruction.arguments instruction) - in - let add_dependencies (instruction : Instruction.t) = - let id = Instruction.id instruction in - add dependency_graph id (Node.init instruction); - add_arg_dependencies instruction; - let arg_indices = - Instruction.arguments instruction |> Array.mapi (fun arg_i _ -> arg_i) + find_last (DLL.last_cell body) +end + +(* CR-someday gyorsh: Dependencies computed below can be used for other + optimizations, not only vectorization. For example, peephole optimizations + within a basic block can replace reorder or replace instructions that are not + consecutive, as long as the transformation preserves dependencies. *) + +(** + Construct an overapproximation of transitive dependencies between instructions, and + use this information to identify instructions that can run in parallel and can be + reordered. + + Approach: + ========= + + Record a direct dependency from instruction [i] to [j] if one of the following + conditions holds: + + (a) Dependency via registers: [i] may read from a register [r] a value that was + previously written to [r] by instruction [j]. + + (b) Dependency via memory: [i] may read to a memory location that [j] may + write to (RAW memory dependency). + + (c) Ordering via memory: [i] and [j] may access the same memory location, + and one of the accesses is a "write" (this covers WAR and WAW dependencies, + as well as dependencies of reads and writes on allocation). + + Vectorizable computation + ======================== + TODO add citation + + The method for constructing vectorizable computations. + - Group: a sequence of scalar instructions that are independent of each other, and have + an equivalent vector instruction. Most groups correspond to one vector instruction, + but sometimes a sequence of vector instructions is needed, for example scalar addition + of a register and a constant. + - Current heuristic for identifying vectorizable instructions requires that memory + accesses of scalar instructions are adjacent and have the same base address and + addressing mode. + - Seed: a group of store instructions. + - Vectorizable computation is set of groups of scalar instructions that can be replaced + with equivalent vector instructions. + - Find vectorizable computations: starting from a seed group, traverse the dependencies + backwards to construct groups for each of the arguments. Stop at load instructions (and + constant arguments). + - Use (a) during the backward traversal to find scalar instruction need to form + a group. + - Use both (a) and (b) dependencies to determine whether scalar instructions can run in + parallel and therefore can form a group. + - Use (c) to determine the placement of vector instructions for each group in the + block, relative to other instructions. Order constraints detemine when scalar + instructions can be executed. + - Key: conservatively choose the position to the last scalar instruction of the group + to place the vector instructions for the group. + - Conservatively require that all ordering constraints from a scalar instruction in a + vectorizable computation is to instructions that appear earlier in the block, + before the first scalar instruction of the computation. + - Conservatively require that nothing depends (via a,b,c) + on scalar instructions in the computation. + + Heuristics + ========== + Identify disjoint memory accesses to improve precision of memory dependency and + ordering overapproximation. Use the following observations: + + (1) A freshly-allocated memory block is disjoint from all previously-allocated blocks. + + A crude version of points-to analysis with allocation site abstraction is sufficient to + track this information. + + (2) Different offsets from the same base address. + + If both memory accesses use the same base address, and offsets from the base address + can statically be shown to refer to non-overlapping address intervals. + + Two memory accesses have the same base address if they use the same register for the + base address, and the register contains the same value, i.e., there are no writes to + this register between the two memory accesses. + + (3) Two valid ocaml blocks are disjoint, except for closure blocks. + + A register points to a valid ocaml block iff [Reg.typ] is [Val]. + + For two different registers that point to valid ocaml blocks, one of the following + conditions holds: + - the registers point to the same block, or + - the registers point to disjoint blocks, or + - the registers point to closure blocks. + + To rule out closure blocks, we use the following observation. + + Closures cannot be modified after initialization. All stores to closure blocks emitted + by the ocaml compiler are initializing stores. Therefore, if a register that points to + a valid ocaml block is used in an address computation of a non-initializing store, then + the block it points to is not a closure block. + + The type system guarantees that there is no pointer arithmetic between valid ocaml + blocks: an address computed as an offset from a register that points to a valid ocaml + block cannot point to a different valid ocaml block, except for closure blocks. + + If two registers (a) point to valid ocaml blocks, (b) one of the blocks is not a + closure block, and (c) a memory access uses one of the registers as base address, + then a memory access that uses the other base address is disjoint if the accesses + refer to non-overlapping address intervals (similarly to (2)). + + Overview of the implementation + ============================== + + - Partition: represents a set of memory blocks that are disjoint from memory blocks + represented by other partitions. A memory block need not be on the ocaml heap. + - Points-to graph: for each partition, track the set of partitions it may point to. + - Initial partition [unknown] represents all other memory blocks, previously allocated on + the ocaml heap or elsewhere, and it may point to itself. + - Fresh allocations: Track that a partition represents exactly one ocaml block. Such a + partion is a result of an allocation instruction, and known to be disjoint from all + other partitions, and initially has no valid pointers to or from other partitions. + - Aliases: For each register, track the set of partitions it may point to. + - Memory accesses: For each instruction, identify the set of partitions that the + instruction may access for read or write. + - For each partition, track the set of instructions that may read from it or write + to it. + - For each instruction [i] that may write to partition [p], and each instruction [j] that + appears before [i] in the basic block and may access the same partition [p] for read + or write, if heuristic conditions (2) and (3) above do not hold, then add a direct + dependency from [i] to [j]. Note that it will add dependency from Load to Store. + + Computing aliases and points-to graph + ===================================== + + - Initially, all registers may point to [unknown] partition. + - If a register is clobbered, its partition is reset to empty, representing the fact + that it does not contain a valid pointer and it is illegal for the program to access + memory that this register may point to. + - If a register is assigned a static constant, its partition is reset to [unknown], + representing the fact that a constant may point to statically allocated memory, but it + is illegal to use it to access any partitions freshly allocated on the OCaml heap. + - If a register is used as RHS of a store (i.e., the value to store), add edges + between the corresponding partitions: the partitions of the LHS of the store (i.e., the + address arguments of the store) may point to the partitions of RHS. + - If register [r] is used as a LHS of a load (i.e., result of the load), then after the + load [r] may point to partition [p'] if [p] is an RHS of this load (i.e., address + arguments of the load) may point to, and [p] may point to [p']. + - For all other assignments, propagate points-to information from arguments to results. + - Edges between partitions can be added but not removed (i.e., no strong update + of the points-to graph). Strong updates of aliases within a basic block are safe. + + Complexity + ======== + - The number of partitions is n+1 where n is the number of allocation instruction in + the block. + - Tracking of memory accesses of each partition is quadratic in [n] in the worst case, + but in practice should be much smaller. + - Single pass on the block's body to construct memory dependencies. + +*) +module Dependencies : sig + (** Dependencies between basic instructions within the same basic block *) + type t + + val from_block : Block.t -> t + + val get_direct_dependency_of_reg : + t -> Instruction.Id.t -> Reg.t -> Instruction.Id.t option + + val get_direct_dependency_of_arg : + t -> Instruction.Id.t -> arg_i:int -> Instruction.Id.t option + + val get_direct_dependencies : t -> Instruction.Id.t -> Instruction.Id.Set.t + + val independent : t -> Instruction.t -> Instruction.t -> bool + + (** [all_independent t l] returns true when all instructions in [l] are pairwise + independent. *) + val all_independent : t -> Instruction.t list -> bool + + (** [all_adjacent t l] raises if [l] are not all memory operations. *) + val all_adjacent : t -> Instruction.t list -> bool + + val for_all_memory_dependencies : + t -> f:(Instruction.Id.t -> Instruction.Id.t -> bool) -> bool + + (* (** [width_in_bits t i] raises if [i] is not a memory operation or width is not known, + * e.g., Arbitrary or Alloc operations. *) + * val width_in_bits : t -> Instruction.t -> int *) + + (* CR gyorsh: find a better way to pass the state around. it's mostly used for + debug printing, so we could just pass the [ppf] around, but not only - the + other two use is for reg_map. It's not yet needed across blocks, so need + not be passed around. Maybe we should split printing util out of state and + pass ppf around, then pass the state to only the places that actually need + it. *) + val state : t -> State.t + + module Memory : sig + module Operation : sig + type t + + val first_memory_arg_index : t -> int + end + end + + val get_memory_operation : t -> Instruction.t -> Memory.Operation.t option + + (* CR gyorsh: output assorted dependency graphs and points-to graph in dot + format, it'll be very useful for debugging. Not clear how to control it on + a per-block basis. *) +end = struct + module Reaching_definitions : sig + type t + + val from_block : Block.t -> t + + (** [get t cur_id reg] For register [r], return [id] of instruction that defines [r] + and there is no other instruction that defines [r] or destroys/clobbers [r] on any + path from [id] to the current program point, i.e., the program point immediately + before [cur_id]. An instruction defines [r] means that [r] is a "result" register + that the instruction writes to, not clobbers it. Returns [None] if there is no + definition of [r] in the block, or the definition of [r] is clobbered + prior to the current program point. *) + val get : t -> Instruction.Id.t -> Reg.t -> Instruction.Id.t option + + val dump : Format.formatter -> block:Block.t -> t -> unit + end = struct + module D = struct + type t = Instruction.Id.t Reg.Map.t + + let get t reg = Reg.Map.find_opt reg t + + let dump ppf t = + let open Format in + Reg.Map.iter + (fun reg id -> + fprintf ppf "%a defined by instruction %a@." Printreg.reg reg + Instruction.Id.print id) + t + end + + type t = D.t Instruction.Id.Tbl.t + + let from_block block = + let t = Block.size block |> Instruction.Id.Tbl.create in + let f map instruction = + (* record current value *) + let instruction = Instruction.basic instruction in + let id = Instruction.id instruction in + Instruction.Id.Tbl.add t id map; + (* transform *) + let remove map reg_array = + Array.fold_left (fun m r -> Reg.Map.remove r m) map reg_array + in + let add map reg_array id = + Array.fold_left (fun m r -> Reg.Map.add r id m) map reg_array + in + let map = remove map (Instruction.destroyed instruction) in + let map = add map (Instruction.results instruction) id in + map in - let direct_dependencies = - Array.fold_left - (fun dependencies arg_i -> - Option.fold ~none:dependencies - ~some:(fun dependency -> - Instruction.Id.Set.add dependency dependencies) - (find dependency_graph id).reg_nodes.(arg_i).direct_dependency) - Instruction.Id.Set.empty arg_indices + let init = + (* ignore definitions outside the block *) + Reg.Map.empty in - let all_dependencies = - Instruction.Id.Set.fold - (fun new_id old_indirect_dependencies -> - let node = Instruction.Id.Tbl.find dependency_graph new_id in - Instruction.Id.Set.union node.direct_dependencies - old_indirect_dependencies) - direct_dependencies direct_dependencies + let map = DLL.fold_left (Block.body block) ~init ~f in + let terminator_id = Block.terminator block |> Instruction.id in + Instruction.Id.Tbl.add t terminator_id map; + t + + let get t id reg = + let d = Instruction.Id.Tbl.find t id in + D.get d reg + + let dump ppf ~(block : Block.t) (t : t) = + let open Format in + DLL.iter (Block.body block) ~f:(fun instruction -> + let instruction = Instruction.basic instruction in + let id = Instruction.id instruction in + fprintf ppf "Reaching definitions after instruction %a:@.%a@." + Instruction.Id.print id D.dump + (Instruction.Id.Tbl.find t id)) + end + + (* [Reaching_definitions] wouldn't be needed here if we had SSA + representation. Converting to SSA even at a basic block level would require + some change in emit because instruction selection (a) relies on sharing of + registers between arguments and results to emit shorter encodings of + instructions such as Add and Shift, (b) forces the use of certain hardware + registers, for example Div and Bswap. *) + module Reg_defined_at_instruction : sig + type t + + val create : Reg.t -> Instruction.t -> Reaching_definitions.t -> t + + val equal : t -> t -> bool + + val is_val : t -> bool + + val get_offset : t -> t -> Block.t -> Reaching_definitions.t -> int option + + val print : Format.formatter -> t -> unit + end = struct + type t = + { reg : Reg.t; + def : Instruction.Id.t option + } + + let print ppf t = + let pp ppf def = + match def with + | None -> Format.fprintf ppf "(unknown)" + | Some id -> Instruction.Id.print ppf id in - let node = find dependency_graph id in - replace dependency_graph id - { node with direct_dependencies; all_dependencies } - in - let add_all_dependencies () = - DLL.iter block.body ~f:(fun instruction -> - add_dependencies (Basic instruction)); - add_dependencies (Terminator block.terminator) - in - let set_is_dependency_of instruction_id dependency_id = - let dependency = find dependency_graph dependency_id in - replace dependency_graph dependency_id - { dependency with - is_direct_dependency_of = - Instruction.Id.Set.add instruction_id - dependency.is_direct_dependency_of - } - in - let set_is_dependency_of_plural (instruction : Instruction.t) = + Format.fprintf ppf "%a at %a" Printreg.reg t.reg pp t.def + + let init reg id reaching_definitions = + let def = Reaching_definitions.get reaching_definitions id reg in + { reg; def } + + let create reg instruction reaching_definitions = let id = Instruction.id instruction in - let node = find dependency_graph id in - Instruction.Id.Set.iter (set_is_dependency_of id) node.direct_dependencies - in - let set_all_is_dependency_of () = - DLL.iter block.body ~f:(fun instruction -> - set_is_dependency_of_plural (Basic instruction)); - set_is_dependency_of_plural (Terminator block.terminator) - in - add_all_dependencies (); - set_all_is_dependency_of (); - dependency_graph + init reg id reaching_definitions - let dump ppf (t : t) (block : Cfg.basic_block) = - let open Format in - let print_reg_node arg_i (reg_node : Node.Reg_node.t) = - let dependency = - Option.fold ~none:"none" - ~some:(sprintf "instruction %d" << Instruction.Id.to_int) - reg_node.direct_dependency + let equal { reg = r1; def = d1 } { reg = r2; def = d2 } = + Reg.same r1 r2 && Option.equal Instruction.Id.equal d1 d2 + + let is_val t = Cmm.is_val t.reg.typ + + let get_offset t1 t2 block reaching_definitions = + (* Heuristic to identify some very simple relations of the form [t2 = t1 + + N]. This heuristic is relatively cheap, because it only follows a + single use-def chain, backwards from [t2] until it finds [t1], and + returns the accumulated offset N such that [t2 = t1 + N]. *) + let add n acc = + (* [acc = None] indicates that the accumulator is not initialized. *) + match acc with None -> Some n | Some n' -> Some (n + n') in - fprintf ppf "argument %d, %a depends on %s\n" arg_i Printreg.reg - reg_node.reg dependency - in - let print_node (instruction : Instruction.t) = - let id = Instruction.id instruction in - let node = find t id in - fprintf ppf "\n%d:\n" - (Instruction.id node.instruction |> Instruction.Id.to_int); - Instruction.print ppf instruction; - fprintf ppf "\ndirect dependencies:\n"; - Instruction.Id.Set.iter - (fprintf ppf "%d " << Instruction.Id.to_int) - node.direct_dependencies; - fprintf ppf "\nall dependencies:\n"; - Instruction.Id.Set.iter - (fprintf ppf "%d " << Instruction.Id.to_int) - node.all_dependencies; - fprintf ppf "\nis direct dependency of:\n"; - Instruction.Id.Set.iter - (fprintf ppf "%d " << Instruction.Id.to_int) - node.is_direct_dependency_of; - fprintf ppf "\narg dependencies:\n"; - Array.iteri print_reg_node node.reg_nodes; - fprintf ppf "\n" - in - fprintf ppf "\ndependency graph:\n"; - DLL.iter block.body ~f:(fun instruction -> print_node (Basic instruction)); - print_node (Terminator block.terminator); - fprintf ppf "\n" -end + let rec loop ~cur ~dst acc = + (* If [cur] does not depend on [dst], or an operation is encountered + that cannot be expressed as a constant offset, return None *) + if equal cur dst + then add 0 acc + else + match cur.def with + | None -> + (* [cur] defined outside the block, so it is not defined as an + offset of [dst]. *) + None + | Some id -> ( + let next = + let instruction = Block.find block id in + match Instruction.arguments instruction with + | [| reg |] -> ( + match Instruction.op instruction with + | None -> None + | Some op -> ( + match[@warning "-fragile-match"] op with + | Move | Spill | Reload -> Some (reg, 0) + | Intop_imm (Iadd, n) -> Some (reg, n) + | Intop_imm (Isub, n) -> Some (reg, -n) + | _ -> None)) + | _ -> None + in + match next with + | None -> None + | Some (reg, n) -> + let acc = add n acc in + let cur = init reg id reaching_definitions in + loop ~cur ~dst acc) + in + (* CR-someday gyorsh: add the symmetric loop r with a negative offset? *) + (* The backward traversal starts from [t2]. *) + loop ~cur:t2 ~dst:t1 None + end -module Memory_accesses : sig - module Memory_operation : sig + (* CR-someday gyorsh: [Memory] can be merged with [Reaching_definitions] but + let's do it separately first for simplicity. *) + module Memory : sig type t - val instruction : t -> Instruction.t + module Operation : sig + type t - val is_adjacent : t -> t -> bool + val first_memory_arg_index : t -> int + end - val width : t -> int + module Dependencies : sig + type t - val dump : Format.formatter -> t -> unit - end + val get : t -> Instruction.Id.t -> Instruction.Id.Set.t - type t + val for_all : + t -> f:(Instruction.Id.t -> Instruction.Id.t -> bool) -> bool - val stores : t -> Instruction.Id.t list + val dump : Format.formatter -> block:Block.t -> t -> unit + end - val get_memory_operation_exn : t -> Instruction.Id.t -> Memory_operation.t + val from_block : Block.t -> Reaching_definitions.t -> t * Dependencies.t - val from_block : Cfg.basic_block -> t + (* val is_memory_operation : t -> Instruction.t -> bool *) - val can_cross : t -> Instruction.t -> Instruction.t -> bool + val get_memory_operation : t -> Instruction.t -> Operation.t option - val dump : Format.formatter -> t -> unit -end = struct - module Memory_operation = struct - type op = - | Load - | Store + val is_adjacent : + t -> + Instruction.t -> + Instruction.t -> + Block.t -> + Reaching_definitions.t -> + bool - type t = - { op : op; - memory_chunk : Cmm.memory_chunk; - addressing_mode : Arch.addressing_mode; - instruction : Instruction.t; - dependent_allocs : Instruction.Id.Set.t; - unsure_allocs : Instruction.Id.Set.t - } + val dump : Format.formatter -> block:Block.t -> t -> unit + end = struct + module Partition : sig + include Identifiable.S - let instruction t = t.instruction + (** Abstraction of all previously allocated blocks of memory. *) + val unknown : t - let init (instruction : Instruction.t) : t option = - match instruction with - | Basic basic_instruction -> ( - let desc = basic_instruction.desc in - match desc with - | Op op -> ( - match op with - | Load { memory_chunk; addressing_mode; _ } -> - Some - { op = Load; - memory_chunk; - addressing_mode; - instruction; - dependent_allocs = Instruction.Id.Set.empty; - unsure_allocs = Instruction.Id.Set.empty - } - | Store (memory_chunk, addressing_mode, _) -> - Some - { op = Store; - memory_chunk; - addressing_mode; - instruction; - dependent_allocs = Instruction.Id.Set.empty; - unsure_allocs = Instruction.Id.Set.empty - } - | Specific _ -> - None - (* CR-someday tip: may need to rewrite a lot of code to handle loads - and stores inside [Specific] in the future *) - | Move | Reinterpret_cast _ | Static_cast _ | Spill | Reload - | Const_int _ | Const_float32 _ | Const_float _ | Const_symbol _ - | Const_vec128 _ | Stackoffset _ | Intop _ | Intop_imm _ - | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ | Opaque - | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll - | Alloc _ -> - None) - | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> - None) - | Terminator _ -> None + (** Represents a block of memory that was freshly allocated at the + [allocation_site]. *) + val create : allocation_site:Instruction.Id.t -> t - let memory_arguments (t : t) = - let arguments = Instruction.arguments t.instruction in - match t.op with - | Load -> arguments - | Store -> Array.sub arguments 1 (Array.length arguments - 1) + (* (** [init] is the singleton set of [unknown] partition. *) + * val init : Set.t *) + module Set : sig + include module type of Set - let width (t : t) = Cmm.width_in_bytes t.memory_chunk + val unknown : t + end + end = struct + module S = struct + type t = Instruction.Id.t option - let print_memory_chunk ppf (t : t) = - Format.fprintf ppf "%s (length %d)" - (Printcmm.chunk t.memory_chunk) - (Cmm.width_in_bytes t.memory_chunk) + let compare t1 t2 = Option.compare Instruction.Id.compare t1 t2 - let dump ppf (t : t) = - let open Format in - let instruction = t.instruction in - let print_set ppf set = - Instruction.Id.Set.iter - (fun id -> fprintf ppf "%d " (Instruction.Id.to_int id)) - set - in - fprintf ppf - "\n\ - Instruction %d: %a (%a, %a)\n\ - \ dependent allocs: %a\n\ - \ unsure_allocs: %a" - (Instruction.id instruction |> Instruction.Id.to_int) - Instruction.print instruction print_memory_chunk t - (Arch.print_addressing Printreg.reg t.addressing_mode) - (memory_arguments t) print_set t.dependent_allocs print_set - t.unsure_allocs - - let compare_arguments (t1 : t) (t2 : t) = - let arguments_1 = memory_arguments t1 in - let arguments_2 = memory_arguments t2 in - Array.combine arguments_1 arguments_2 - |> Array.fold_left - (fun result ((arg1, arg2) : Reg.t * Reg.t) -> - if result = 0 then Reg.compare arg1 arg2 else result) - 0 - - let compare_addressing_modes_and_arguments (t1 : t) (t2 : t) = - let addressing_mode_comparison = - Arch.compare_addressing_mode_without_displ t1.addressing_mode - t2.addressing_mode - in - if addressing_mode_comparison = 0 - then - let arguments_comparison = compare_arguments t1 t2 in - arguments_comparison - else addressing_mode_comparison + let equal t1 t2 = compare t1 t2 = 0 - let offset_in_bytes (t1 : t) (t2 : t) = - let addressing_mode_and_arguments_comparison = - compare_addressing_modes_and_arguments t1 t2 - in - if addressing_mode_and_arguments_comparison = 0 - then Arch.addressing_offset_in_bytes t1.addressing_mode t2.addressing_mode - else None - - let is_adjacent (t1 : t) (t2 : t) = - let res = - if Cmm.equal_memory_chunk t1.memory_chunk t2.memory_chunk - then - let width = Cmm.width_in_bytes t1.memory_chunk in - let offset_option = offset_in_bytes t1 t2 in - match offset_option with - | None -> false - | Some offset -> width = offset - else false - in - res + let hash t = + match t with None -> 0 | Some id -> Instruction.Id.hash id - let index_offset t = match t.op with Load -> 0 | Store -> 1 - end + let print ppf t = + match t with + | None -> Format.fprintf ppf "unknown" + | Some id -> + Format.fprintf ppf "alloc_site=%a" Instruction.Id.print id - type t = - { loads : Instruction.Id.t list; - stores : Instruction.Id.t list; - memory_operations : Memory_operation.t Instruction.Id.Tbl.t - } + let output oc t = Printf.fprintf oc "%s" (Format.asprintf "%a" print t) + end - let stores t = t.stores + include S + include Identifiable.Make (S) - let get_memory_operation_exn t id = - Instruction.Id.Tbl.find t.memory_operations id + let unknown = None - type alloc_tracker = - { loads : Instruction.Id.t list; - stores : Instruction.Id.t list; - fresh_allocs : Instruction.Id.Set.t; - stored_allocs : Instruction.Id.Set.t; - unsure_allocs : Instruction.Id.Set.t - } + let create ~allocation_site = Some allocation_site - let from_block (block : Cfg.basic_block) : t = - (* A heuristic to avoid treating the same "fresh" allocation which address - stored and loaded into a different register as different, has room for - improvement. Assumption: if x depends on a fresh allocation, and it is - certain that y does not depend on that fresh allocation, then they point - to disjoint addresses *) - (* At each load or store instruction, it keeps track of all allocs up to - this point in this basic block and puts them in one of 3 categories: - [fresh_allocs]: nothing that depends on the address of the fresh alloc - has been saved as a value; [stored_allocs]: something that depends on the - address of the fresh alloc has been saved as a value, but nothing has - been loaded till this point; [unsure_allocs]: something that depends on - the address of the fresh alloc has been saved as a value, and something - has been loaded till this point. For each memory operation, we will save - its dependent allocs and unsure allocs *) - let dependency_graph = Dependency_graph.from_block block in - let id_to_instructions = - DLL.to_list block.body - |> List.map (fun basic_instruction -> - let instruction = Instruction.Basic basic_instruction in - Instruction.id instruction, instruction) - |> Instruction.Id.Tbl.of_list - in - let memory_operations = Instruction.Id.Tbl.create (DLL.length block.body) in - let ({ loads; stores; _ } : alloc_tracker) = - DLL.fold_left block.body - ~f: - (fun { loads; stores; fresh_allocs; stored_allocs; unsure_allocs } - basic_instruction -> - let instruction = Instruction.Basic basic_instruction in - let id = Instruction.id instruction in - if Instruction.is_alloc instruction - then - { loads; - stores; - fresh_allocs = Instruction.Id.Set.add id fresh_allocs; - stored_allocs; - unsure_allocs - } - else - let memory_operation = Memory_operation.init instruction in - match memory_operation with - | None -> - if Instruction.may_break_alloc_freshness instruction - then - { loads; - stores; - fresh_allocs = Instruction.Id.Set.empty; - stored_allocs = Instruction.Id.Set.empty; - unsure_allocs = - Instruction.Id.Set.union fresh_allocs stored_allocs - |> Instruction.Id.Set.union unsure_allocs + module Set = struct + include Set + + let unknown = singleton unknown + end + end + + module Memory_access = Vectorize_utils.Memory_access + + module Operation : sig + type t + + (* Reaching_definitions is used here to disambiguate register names. It + wouldn't be needed here if we had SSA representation. *) + val create : Instruction.t -> Reaching_definitions.t -> t option + + val desc : t -> Memory_access.desc + + val address_args : t -> Reg.t array + + val non_address_args : t -> Reg.t array + + val first_memory_arg_index : t -> int + + val get_instruction_id : t -> Instruction.Id.t + + (** [is_adjacent t1 t2] assumes that [t1] and [t2] have isomorphic operations, + and conservatively returns [false] unless it can prove that [t1] and [t2] + are adjacent (i.e., the accesses are disjoint but the intervals of addresses + have no gap between them). *) + val is_adjacent : t -> t -> Block.t -> Reaching_definitions.t -> bool + + (** [is_disjoint t1 t2] assumes that [t1] and [t2] have isomorphic operations, + and conservatively returns [false] unless it can prove that [t1] and [t2] + access disjoint memory addresses. *) + val is_disjoint : t -> t -> Block.t -> Reaching_definitions.t -> bool + + val dump : Format.formatter -> t -> unit + end = struct + type t = + { memory_access : Memory_access.t; + instruction : Instruction.t; + (* CR-soon gyorsh: the instruction is only for validation and + debugging. We don't actually need to keep the little subarrays of + args around, we can define iterators on the original arguments + array instead, at least for all current uses. Not worth optimizing + these little allocations at this point. [address_args] and + [address_args_defined_at] also have redundant information. *) + address_args_defined_at : Reg_defined_at_instruction.t array; + address_args : Reg.t array; + non_address_args : Reg.t array + } + + let desc t = Memory_access.desc t.memory_access + + let non_address_args t = t.non_address_args + + let address_args t = t.address_args + + let first_memory_arg_index t = + Memory_access.first_memory_arg_index t.memory_access + + let get_instruction_id t = Instruction.id t.instruction + + let memory_access (instruction : Instruction.t) : Memory_access.t option = + let width_in_bits c = Vectorize_utils.Width_in_bits.of_memory_chunk c in + let create ?first_memory_arg_index desc = + Some (Memory_access.create ?first_memory_arg_index desc) + in + match Instruction.op instruction with + | None -> + (* conservative *) + create Arbitrary + | Some op -> ( + match op with + | Load { memory_chunk; addressing_mode; mutability; is_atomic } -> + let desc = + Memory_access.Read + { width_in_bits = width_in_bits memory_chunk; + addressing_mode; + is_mutable = + (match mutability with + | Mutable -> true + | Immutable -> false); + is_atomic } - else { loads; stores; fresh_allocs; stored_allocs; unsure_allocs } - | Some memory_operation -> ( - let get_dependent_allocs_of_arg arg_i = - Dependency_graph.get_all_dependencies_of_arg dependency_graph id - ~arg_i - |> Instruction.Id.Set.filter - (Instruction.is_alloc - << Instruction.Id.Tbl.find id_to_instructions) - in - let rec get_dependent_allocs arg_i = - if arg_i < 0 - then Instruction.Id.Set.empty - else - Instruction.Id.Set.union - (get_dependent_allocs_of_arg - (arg_i + Memory_operation.index_offset memory_operation)) - (get_dependent_allocs (arg_i - 1)) - in - let dependent_allocs = - get_dependent_allocs - (Array.length - (Memory_operation.memory_arguments memory_operation) - - 1) - in - Instruction.Id.Tbl.add memory_operations id - { memory_operation with dependent_allocs; unsure_allocs }; - match memory_operation.op with - | Load -> - { loads = id :: loads; - stores; - fresh_allocs; - stored_allocs = Instruction.Id.Set.empty; - unsure_allocs = - Instruction.Id.Set.union stored_allocs unsure_allocs + in + create ~first_memory_arg_index:0 desc + | Store (memory_chunk, addressing_mode, is_assign) -> + let desc = + Memory_access.Write + { width_in_bits = width_in_bits memory_chunk; + addressing_mode; + init_or_assign = + (if is_assign + then Memory_access.Init_or_assign.Assignment + else Memory_access.Init_or_assign.Initialization) } - | Store -> - let new_stored_allocs = - Instruction.Id.Set.diff - (get_dependent_allocs_of_arg 0) - unsure_allocs - in - { loads; - stores = id :: stores; - fresh_allocs = - Instruction.Id.Set.diff fresh_allocs new_stored_allocs; - stored_allocs = - Instruction.Id.Set.union stored_allocs new_stored_allocs; - unsure_allocs - })) - ~init: - { loads = []; - stores = []; - fresh_allocs = Instruction.Id.Set.empty; - stored_allocs = Instruction.Id.Set.empty; - unsure_allocs = Instruction.Id.Set.empty - } - in - { loads = List.rev loads; stores = List.rev stores; memory_operations } + in + create desc ~first_memory_arg_index:1 + | Intop_atomic { op; size; addr } -> + let desc = + Memory_access.Read_and_write + { width_in_bits = + Vectorize_utils.Width_in_bits.of_atomic_bitwidth size; + addressing_mode = addr; + is_atomic = true + } + in + let first_memory_arg_index = + match op with Compare_and_swap -> 2 | Fetch_and_add -> 1 + in + create ~first_memory_arg_index desc + | Specific s -> Vectorize_specific.memory_access s + | Begin_region | End_region -> + (* conservative, don't reorder around region begin/end. *) + create Arbitrary + | Name_for_debugger _ | Dls_get | Poll | Opaque | Probe_is_enabled _ + -> + (* conservative, don't reorder around this instruction. *) + (* CR-someday gyorsh: Poll insertion pass is after the vectorizer. + Currently, it inserts instruction at the end of a block, so it + would not have affected vectorizer decisions. Consider soundness + of it carefully if the vectorizer is extended beyond basic blocks + (also in arm64 backend). Note that some poll instructions can + also be inserted before the vectorizer by the user or during + selection. *) + (* CR-soon gyorsh: Update [Name_for_debugger.regs] if relevant + instructions are vectorized. Currently, the debug info would be + out of sync. *) + create Arbitrary + | Spill | Reload -> + Misc.fatal_error + "Unexpected instruction Spill or Reload during vectorize" + | Move | Reinterpret_cast _ | Static_cast _ | Const_int _ + | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _ + | Stackoffset _ | Intop _ | Intop_imm _ | Floatop _ | Csel _ | Alloc _ + -> + None) - let can_cross (t : t) (instruction_1 : Instruction.t) - (instruction_2 : Instruction.t) = - let get_memory_operation instruction = - Instruction.Id.Tbl.find_opt t.memory_operations - (Instruction.id instruction) - in - match - get_memory_operation instruction_1, get_memory_operation instruction_2 - with - | None, _ | _, None -> - (* Make sure that they are not both "dangerous", ie. thinigs like allocs - or specific stores *) - Instruction.can_cross_loads_or_stores instruction_1 - || Instruction.can_cross_loads_or_stores instruction_2 - | Some memory_operation_1, Some memory_operation_2 -> ( - match memory_operation_1.op, memory_operation_2.op with - | Load, Load -> true - | Load, Store | Store, Load | Store, Store -> - if Memory_operation.compare_addressing_modes_and_arguments - memory_operation_1 memory_operation_2 - = 0 - then - let check_direct_separation left_memory_operation - right_memory_operation = - match - Memory_operation.offset_in_bytes left_memory_operation - right_memory_operation - with - | None -> false - | Some offset -> - offset - >= (left_memory_operation.Memory_operation.memory_chunk - |> Cmm.width_in_bytes) + let create (instruction : Instruction.t) reaching_definitions : t option = + match memory_access instruction with + | None -> None + | Some memory_access -> + let arguments = Instruction.arguments instruction in + let address_args, non_address_args = + match Memory_access.first_memory_arg_index memory_access with + | 0 -> + (* avoid copy *) + arguments, [||] + | i -> + ( Array.sub arguments i (Array.length arguments - i), + Array.sub arguments 0 i ) in - check_direct_separation memory_operation_1 memory_operation_2 - || check_direct_separation memory_operation_2 memory_operation_1 - else - (* Assumption: If memory operation 1 definitely depends on an - allocation and memory operation 2 definitely does not depend on it, - then they are disjoint *) - Instruction.Id.Set.is_empty - (Instruction.Id.Set.diff memory_operation_1.dependent_allocs - (Instruction.Id.Set.union memory_operation_2.dependent_allocs - memory_operation_2.unsure_allocs)) - || Instruction.Id.Set.is_empty - (Instruction.Id.Set.diff memory_operation_2.dependent_allocs - (Instruction.Id.Set.union memory_operation_1.dependent_allocs - memory_operation_1.unsure_allocs))) - - let dump ppf ({ loads; stores; memory_operations } : t) = - let open Format in - let print_list list = - List.iter - (fun id -> - let address = Instruction.Id.Tbl.find memory_operations id in - Memory_operation.dump ppf address) - list - in - fprintf ppf "\nmemory accesses (loads):\n"; - print_list loads; - fprintf ppf "\nmemory accesses (stores):\n"; - print_list stores; - fprintf ppf "\n" -end - -module Seed : sig - type t - - val from_block : Cfg.basic_block -> t list + let address_args_defined_at = + Array.map + (fun reg -> + Reg_defined_at_instruction.create reg instruction + reaching_definitions) + address_args + in + Some + { memory_access; + instruction; + address_args; + address_args_defined_at; + non_address_args + } - val dump : Format.formatter -> t list -> unit -end = struct - type t = Memory_accesses.Memory_operation.t list - - let can_cross memory_accesses instruction_1 instruction_2 = - let reg_array_to_set = Reg.Set.of_list << Array.to_list in - let argument_set = reg_array_to_set << Instruction.arguments - and affected_set instruction = - Reg.Set.union - (Instruction.results instruction |> reg_array_to_set) - (Instruction.destroyed instruction |> reg_array_to_set) - in - let arguments_1 = argument_set instruction_1 - and affected_1 = affected_set instruction_1 - and arguments_2 = argument_set instruction_2 - and affected_2 = affected_set instruction_2 in - if Reg.Set.disjoint affected_1 affected_2 - && Reg.Set.disjoint arguments_1 affected_2 - && Reg.Set.disjoint affected_1 arguments_2 - then Memory_accesses.can_cross memory_accesses instruction_1 instruction_2 - else false - - let from_block (block : Cfg.basic_block) : t list = - (* For each store instruction, it tries to form a seed with the closest - stores after it, it will go down the DLL of instructions and tries to - move the store instructions across the non-store instructions until all - the store instructions are together *) - let memory_accesses = Memory_accesses.from_block block in - let stores = Memory_accesses.stores memory_accesses in - List.filter_map - (fun store_id -> - let starting_cell = + let to_bits bytes = bytes * 8 + + let get_addressing_mode t = + match Memory_access.desc t.memory_access with + | Read { addressing_mode; _ } + | Write { addressing_mode; _ } + | Read_and_write { addressing_mode; _ } -> + Some addressing_mode + | Arbitrary | Alloc -> None + + let get_width_in_bits t = + match Memory_access.desc t.memory_access with + | Read { width_in_bits; _ } + | Write { width_in_bits; _ } + | Read_and_write { width_in_bits; _ } -> + Some width_in_bits + | Arbitrary | Alloc -> None + + let get_width_in_bits_exn t = + t |> get_width_in_bits |> Option.get + |> Vectorize_utils.Width_in_bits.to_int + + let offset_in_bytes ~arg_offset_in_bytes (t1 : t) (t2 : t) = + match get_addressing_mode t1, get_addressing_mode t2 with + | Some addressing_mode_1, Some addressing_mode_2 -> + Arch.addressing_offset_in_bytes addressing_mode_1 addressing_mode_2 + t1.address_args_defined_at t2.address_args_defined_at + ~arg_offset_in_bytes + | (Some _ | None), _ -> None + + let actual_arg_offset_in_bytes block reaching_definitions r1 r2 = + (* Heuristic for handling array accesses where the scale register is not + the same, but a simple equation. *) + Reg_defined_at_instruction.get_offset r1 r2 block reaching_definitions + + (* CR-someday gyorsh: this can be extended in the future to handle + "scatter-gather" accesses. *) + let is_adjacent (t1 : t) (t2 : t) block reaching_definitions = + let arg_offset_in_bytes = + actual_arg_offset_in_bytes block reaching_definitions + in + assert (Instruction.have_isomorphic_op t1.instruction t2.instruction); + match offset_in_bytes t1 t2 ~arg_offset_in_bytes with + | None -> false + | Some offset_in_bytes -> + to_bits offset_in_bytes = get_width_in_bits_exn t1 + + (** [is_before t1 t2] returns true if we can prove that t1 and t2 are disjoint + intervals within the same block, and t1 is before t2. *) + let is_before ~arg_offset_in_bytes t1 t2 block = + let res = + match offset_in_bytes ~arg_offset_in_bytes t1 t2 with + | None -> false + | Some offset_in_bytes -> + State.dump_debug (Block.state block) + "offset_in_bytes = %d, get_width_in_bits_exn t1 = %d \n" + (to_bits offset_in_bytes) (get_width_in_bits_exn t1); + to_bits offset_in_bytes >= get_width_in_bits_exn t1 + in + State.dump_debug (Block.state block) "is_before (%a) (%a) = %b \n" + Instruction.print t1.instruction Instruction.print t2.instruction res; + res + + (* [is_adjacent] implies [is_disjoint + ~arg_offset_in_bytes:actual_arg_offset_in_bytes] *) + let is_disjoint ~arg_offset_in_bytes t1 t2 block = + let res = + is_before ~arg_offset_in_bytes t1 t2 block + || is_before ~arg_offset_in_bytes t2 t1 block + in + State.dump_debug (Block.state block) "is_disjoint (%a) (%a) = %b \n" + Instruction.print t1.instruction Instruction.print t2.instruction res; + res + + (* CR gyorsh: The middle end has more accurate information about + expressions that evaluate to closures. We can propagate it to the + backend in the future and have more precise Reg.typ for example, + instead of the current heuristic. *) + let maybe_closure_block t = + (* Closure blocks are not mutated after initialization. *) + match Memory_access.desc t.memory_access with + | Read { is_mutable = true; _ } -> + (* CR gyorsh: is it sound to return [false] here? Are reads from + closure emitted as immutable? Can immutability of closure blocks be + lost through optimizations such as CSE in the middle end? *) + false + | Read { is_mutable = false; _ } -> true + | Read_and_write _ -> false + | Write { init_or_assign = Assignment; _ } -> false + | Write { init_or_assign = Initialization; _ } -> true + | Arbitrary -> true + | Alloc -> true + + let points_to_ocaml_block t block = + let val_args = + t.address_args_defined_at |> Array.to_seq + |> Seq.filter Reg_defined_at_instruction.is_val + |> List.of_seq + in + State.dump_debug (Block.state block) + "points_to_ocaml_block: %a val_args = %a\n" Instruction.print + t.instruction + (Format.pp_print_list Reg_defined_at_instruction.print) + val_args; + match val_args with [base] -> Some base | [] | _ -> None + + let points_to_ocaml_block_not_closure_block t block = + if maybe_closure_block t then None else points_to_ocaml_block t block + + let is_disjoint t1 t2 block reaching_definitions = + let equiv () = + (* heuristic to detect that either (a) r1 and r2 point to the same + ocaml block, or (b) r1 and r2 point to disjoint ocaml blocks, but + not to partially overlapping. *) match - DLL.find_cell_opt block.body ~f:(fun instruction -> - Basic instruction |> Instruction.id - |> Instruction.Id.equal store_id) + ( points_to_ocaml_block_not_closure_block t1 block, + points_to_ocaml_block_not_closure_block t2 block ) with - | Some current_cell -> DLL.next current_cell - | None -> assert false + | Some base1, Some base2 -> + State.dump_debug (Block.state block) + "Found equiv: base 1 = %a, base2 = %a\n" + Reg_defined_at_instruction.print base1 + Reg_defined_at_instruction.print base2; + Some (base1, base2) + | (Some _ | None), _ -> + State.dump_debug (Block.state block) + "Not found equiv for (%a) and (%a)\n" Instruction.print_id + t1.instruction Instruction.print_id t2.instruction; + None in - let starting_memory_operation = - Memory_accesses.get_memory_operation_exn memory_accesses store_id + let actual_arg_offset_in_bytes r1 r2 = + actual_arg_offset_in_bytes block reaching_definitions r1 r2 in - let items_in_vector = - vector_width_in_bytes - / Memory_accesses.Memory_operation.width starting_memory_operation + let arg_offset_in_bytes r1 r2 = + State.dump_debug (Block.state block) "arg_offset_in_bytes (%a) (%a)\n" + Reg_defined_at_instruction.print r1 Reg_defined_at_instruction.print + r2; + match actual_arg_offset_in_bytes r1 r2 with + | Some _ as res -> res + | None -> ( + match equiv () with + | None -> None + | Some (base1, base2) -> + if Reg_defined_at_instruction.equal base1 r1 + && Reg_defined_at_instruction.equal base2 r2 + || Reg_defined_at_instruction.equal base1 r2 + && Reg_defined_at_instruction.equal base2 r1 + then ( + (* pretend that the registers r1 and r2 are the same. *) + State.dump_debug (Block.state block) + "arg_offset_in_bytes (%a) (%a) = Some 0\n" + Reg_defined_at_instruction.print r1 + Reg_defined_at_instruction.print r2; + Some 0) + else None) in - let can_cross_chunk seed instruction = - List.fold_left - (fun can memory_operation -> - can - && can_cross memory_accesses - (Memory_accesses.Memory_operation.instruction - memory_operation) - instruction) - true seed - in - let rec find_seed n seed cell_option = - if n = 0 - then Some seed + is_disjoint ~arg_offset_in_bytes t1 t2 block + + let dump_desc ppf (t : t) = + let open Format in + let print_bool msg ppf b = + if b then fprintf ppf " (%s=true)" msg else () + in + let print_addr ppf addr = + Arch.print_addressing Printreg.reg addr ppf t.address_args + in + let pr (init_or_assign : Memory_access.Init_or_assign.t) = + match init_or_assign with + | Initialization -> "init" + | Assignment -> "assign" + in + match desc t with + | Alloc -> fprintf ppf "Alloc" + | Arbitrary -> fprintf ppf "Arbitrary" + | Read { width_in_bits; addressing_mode; is_mutable; is_atomic } -> + fprintf ppf "Read%a [%a]%a%a" Vectorize_utils.Width_in_bits.print + width_in_bits print_addr addressing_mode (print_bool "is_mutable") + is_mutable (print_bool "is_atomic") is_atomic + | Write { width_in_bits; addressing_mode; init_or_assign } -> + fprintf ppf "Write%a [%a] %s" Vectorize_utils.Width_in_bits.print + width_in_bits print_addr addressing_mode (pr init_or_assign) + | Read_and_write { width_in_bits; addressing_mode; is_atomic } -> + fprintf ppf "Read_and_write%a [%a]%a" + Vectorize_utils.Width_in_bits.print width_in_bits print_addr + addressing_mode (print_bool "is_atomic") is_atomic + + let dump ppf t = + let dump_index = + let ind = first_memory_arg_index t in + if Int.equal ind 0 + then "" + else ", first_memory_arg_index=" ^ string_of_int ind + in + Format.fprintf ppf "%a%s\n" dump_desc t dump_index + end + + module Partitions : sig + (** Graph where nodes represent disjoint memory areas (partitions) and an edge from + partition A to B means that A may point to B (i.e., a memory location in A may + contain the address of a memory location in B). *) + type t + + val init : t + + val add_node : t -> Partition.t -> t + + val add_edges : t -> src:Partition.Set.t -> dst:Partition.Set.t -> t + + val get_successors : t -> Partition.Set.t -> Partition.Set.t + + val all : t -> Partition.Set.t + + (* val fold : f:(Partition.t -> 'a -> 'a) -> init:'a -> t -> 'a *) + end = struct + type t = Partition.Set.t Partition.Map.t + + let init = + (* [unknown] partition may point to itself *) + Partition.Map.singleton Partition.unknown Partition.Set.unknown + + let add_node t p = Partition.Map.add p Partition.Set.empty t + + let add_edges t ~src ~dst = + Partition.Set.fold + (fun src_p acc -> + let old = Partition.Map.find src_p acc in + Partition.Map.add src_p (Partition.Set.union dst old) acc) + src t + + let get_successors t set = + let init = Partition.Set.empty in + Partition.Set.fold + (fun src acc -> + let dst = Partition.Map.find src t in + Partition.Set.union dst acc) + set init + + let all t = + t |> Partition.Map.to_seq |> Seq.map fst |> Partition.Set.of_seq + + (* let fold ~f ~init t = Partition.Map.fold (fun p _ acc -> f p acc) t + init *) + end + + module Accesses : sig + type t + + val empty : t + + val add : t -> Partition.t -> Operation.t -> t + + val add_all : t -> Partition.Set.t -> Operation.t -> t + + val fold : + f:(Partition.t -> Operation.t list -> 'a -> 'a) -> init:'a -> t -> 'a + end = struct + (** The order of operations in the list is the reverse of the order they appear in the + block (relative to each other). *) + type t = Operation.t list Partition.Map.t + + let empty = Partition.Map.empty + + let get t p = + match Partition.Map.find_opt p t with None -> [] | Some s -> s + + let add t p op = Partition.Map.add p (op :: get t p) t + + let add_all t set op = + Partition.Set.fold (fun p acc -> add acc p op) set t + + let fold ~f ~init t = Partition.Map.fold f t init + end + + module Aliases : sig + type t + + val empty : t + + val replace_regs : Reg.t array -> Partition.Set.t -> t -> t + + val remove_regs : Reg.t array -> t -> t + + val get_regs : Reg.t array -> t -> Partition.Set.t + end = struct + (** for each p, a set of partitions that p may point to, including p itself + explicitly. *) + type t = Partition.Set.t Reg.Map.t + + let empty = Reg.Map.empty + + let find t reg = + match Reg.Map.find_opt reg t with + | None -> + (* reg is implicitly associated with [Partition.unknown] *) + Partition.Set.unknown + | Some old_partitions -> old_partitions + + let get_regs regs t = + Array.fold_left + (fun set reg -> + let p = find t reg in + Partition.Set.union set p) + Partition.Set.empty regs + + let replace reg partitions t = Reg.Map.add reg partitions t + + let replace_regs regs partitions t = + Array.fold_left (fun acc reg -> replace reg partitions acc) t regs + + let remove_regs regs t = replace_regs regs Partition.Set.empty t + end + + module Points_to : sig + type t + + val from_block : Block.t -> Reaching_definitions.t -> t + + (* val transform : t -> Instruction.t -> t *) + (* val partitions : t -> Partitions.t *) + val operations : t -> Operation.t Instruction.Id.Map.t + + val accesses : t -> Accesses.t + end = struct + (* CR gyorsh: not sure if the extra layer of abstraction (Aliases and + Partitions modules) is useful. *) + type t = + { partitions : Partitions.t; + (** For each partition, the set of partitions it may point. *) + aliases : Aliases.t; + (** For each register, the set of partitions it may point to. *) + accesses : Accesses.t; + (** For each partition, the list of instructions that may access it. *) + operations : Operation.t Instruction.Id.Map.t + (** Mapping from instruction id to the corresponding memory operations. Instructions + that do no access memory are not in the map. *) + } + + let init = + { partitions = Partitions.init; + aliases = Aliases.empty; + accesses = Accesses.empty; + operations = Instruction.Id.Map.empty + } + + (* let partitions t = t.partitions *) + + let accesses t = t.accesses + + let operations t = t.operations + + let update_aliases t instruction result_may_point_to_partitions = + let aliases = + (* Kill: set the points-to of clobbered registers to empty because + using these registers is illegal. Gen: set the points-to of + results. *) + t.aliases + |> Aliases.remove_regs (Instruction.destroyed instruction) + |> Aliases.replace_regs + (Instruction.results instruction) + result_may_point_to_partitions + in + { t with aliases } + + let update t ~may_access_partitions ~may_point_to_partitions ~is_atomic + instruction op = + let may_access_partitions = + if is_atomic + then + (* Should not be reordered with respect to any other + instructions. *) + Partitions.all t.partitions + else may_access_partitions + in + let accesses = Accesses.add_all t.accesses may_access_partitions op in + let result_may_point_to_partitions = + Partition.Set.union may_point_to_partitions + (Partitions.get_successors t.partitions may_access_partitions) + in + let t = { t with accesses } in + update_aliases t instruction result_may_point_to_partitions + + let transform_write t ~is_atomic ~may_access_any_partition instruction op + = + let addr_args = Operation.address_args op in + let may_access_partitions = + if may_access_any_partition + then Partitions.all t.partitions + else Aliases.get_regs addr_args t.aliases + in + let value_to_store = Operation.non_address_args op in + let may_point_to_partitions = + Aliases.get_regs value_to_store t.aliases + in + let partitions = + Partitions.add_edges t.partitions ~src:may_access_partitions + ~dst:may_point_to_partitions + in + let t = { t with partitions } in + update t ~may_access_partitions ~may_point_to_partitions ~is_atomic + instruction op + + let transform t instruction ~reaching_definitions = + match Operation.create instruction reaching_definitions with + | None -> + (* propagates from args to res *) + let args_may_point_to_partitions = + Aliases.get_regs (Instruction.arguments instruction) t.aliases + in + update_aliases t instruction args_may_point_to_partitions + | Some op -> ( + let id = Instruction.id instruction in + let t = + { t with operations = Instruction.Id.Map.add id op t.operations } + in + match Operation.desc op with + | Alloc -> + let fresh_partition = Partition.create ~allocation_site:id in + let t = + { t with + partitions = Partitions.add_node t.partitions fresh_partition; + accesses = Accesses.add t.accesses fresh_partition op + } + in + update_aliases t instruction + (Partition.Set.singleton fresh_partition) + | Read { is_atomic; _ } -> + let non_address_args = Operation.non_address_args op in + let may_point_to_partitions = + Aliases.get_regs non_address_args t.aliases + in + let addr_args = Operation.address_args op in + let may_access_partitions = Aliases.get_regs addr_args t.aliases in + update t ~may_access_partitions ~may_point_to_partitions ~is_atomic + instruction op + | Write _ -> + assert (Array.length (Instruction.results instruction) = 0); + transform_write t ~is_atomic:false ~may_access_any_partition:false + instruction op + | Read_and_write { is_atomic; _ } -> + transform_write t ~is_atomic ~may_access_any_partition:false + instruction op + | Arbitrary -> + transform_write t ~is_atomic:true ~may_access_any_partition:true + instruction op) + + let from_block block reaching_definitions = + DLL.fold_left (Block.body block) ~init ~f:(fun acc i -> + transform acc (Instruction.basic i) ~reaching_definitions) + end + + (* CR-someday gyorsh: Merge with the dependency graph for regs. Both can be + computed alongside reaching definitions and points_to. *) + module Dependencies : sig + type t + + val from_block : Accesses.t -> Block.t -> Reaching_definitions.t -> t + + val get : t -> Instruction.Id.t -> Instruction.Id.Set.t + + val for_all : + t -> f:(Instruction.Id.t -> Instruction.Id.t -> bool) -> bool + + val dump : Format.formatter -> block:Block.t -> t -> unit + end = struct + (** [i] is mapped to [j] if [i] may directly depend on [j] via memory, i.e., [i] may + read and [j] may write the same memory location. *) + type t = Instruction.Id.Set.t Instruction.Id.Map.t + + let get t id = + let default = Instruction.Id.Set.empty in + Instruction.Id.Map.find_opt id t |> Option.value ~default + + let for_all t ~f = + Instruction.Id.Map.for_all + (fun id dsts -> Instruction.Id.Set.for_all (f id) dsts) + t + + type dependency_kind = + | No_direct_dependency + | Data_dependency + | Order_constraint + + let dep_kind_to_string d = + match d with + | No_direct_dependency -> "No direct dependency" + | Data_dependency -> "Data dependency" + | Order_constraint -> "Order constraint" + + let get_dependency_kind ~src ~dst block reaching_definitions = + (* [get_dependency_kind ~src ~dst] conservatively answers the question: + does [src] directly depend on [dst]? *) + let if_not_disjoint m1 m2 dep_kind = + if Operation.is_disjoint m1 m2 block reaching_definitions + then No_direct_dependency + else ( + State.dump_debug (Block.state block) "%s: %a->%a\n" + (dep_kind_to_string dep_kind) + Instruction.Id.print + (Operation.get_instruction_id m1) + Instruction.Id.print + (Operation.get_instruction_id m2); + dep_kind) + in + (* CR-someday gyorsh: this big match can be simplified/split by checking + the following simple conditions: (a) if one of the operations is + atomic or one of the operations does not have an addressing_mode, + then no disjointness check needed. (b) if [src] may read and [dst] + may write, then this is a [Data_dependencies], otherwise it is an + [Order_constraint]. *) + match Operation.desc src, Operation.desc dst with + | Alloc, _ -> + (* Currently, Alloc always starts a new partition, so it is the first + operation in the list of accesses of its partition. *) + Misc.fatal_error "Unexpected Alloc" + | (Read _ | Write _ | Read_and_write _ | Arbitrary), Alloc -> + Order_constraint + | Read { is_atomic = true; _ }, (Write _ | Read_and_write _ | Arbitrary) + -> + (* Treat atomic instructions as memory barriers, don't reorder + anything around an atomic instruction, and conservatively assume + that an atomic read may access anywhere in memory. *) + Data_dependency + | Read { is_atomic = true; _ }, Read _ + | Read { is_atomic = false; _ }, Read { is_atomic = true; _ } + | (Write _ | Read_and_write _ | Arbitrary), Read { is_atomic = true; _ } + -> + Order_constraint + | Read { is_atomic = false; _ }, Read { is_atomic = false; _ } -> + No_direct_dependency + | Write _, Write _ | Write _, Read { is_atomic = false; _ } -> + if_not_disjoint src dst Order_constraint + | Read { is_atomic = false; _ }, Write _ -> + if_not_disjoint src dst Data_dependency + | Arbitrary, Read { is_atomic = false; _ } -> Order_constraint + | ( Arbitrary, + (Write _ | Read_and_write { is_atomic = false; _ } | Arbitrary) ) + | (Read _ | Read_and_write _), Arbitrary -> + Data_dependency + | Arbitrary, Read_and_write { is_atomic = true; _ } -> Data_dependency + | Write _, Arbitrary -> Order_constraint + | ( Read_and_write { is_atomic = false; _ }, + Read_and_write { is_atomic = false; _ } ) + | Read_and_write { is_atomic = false; _ }, Write _ + | Read { is_atomic = false; _ }, Read_and_write { is_atomic = false; _ } + -> + if_not_disjoint src dst Data_dependency + | Read_and_write { is_atomic = false; _ }, Read { is_atomic = false; _ } + -> + if_not_disjoint src dst Order_constraint + | ( ( Read { is_atomic = false; _ } + | Read_and_write { is_atomic = false; _ } ), + Read_and_write { is_atomic = true; _ } ) -> + Data_dependency + | Write _, Read_and_write { is_atomic = true; _ } -> Order_constraint + | Read_and_write { is_atomic = true; _ }, Read _ -> Order_constraint + | Read_and_write { is_atomic = true; _ }, Write _ -> Data_dependency + | Read_and_write { is_atomic = true; _ }, Read_and_write _ -> + Data_dependency + | Write _, Read_and_write { is_atomic = false; _ } -> + if_not_disjoint src dst Order_constraint + + let from_block accesses block reaching_definitions = + (* [create] is quadratic in the number of memory operations in the + block. *) + let add t ~src ~dst = + let src_id = Operation.get_instruction_id src in + let dst_id = Operation.get_instruction_id dst in + let f set = + match set with + | None -> Some (Instruction.Id.Set.singleton dst_id) + | Some set -> Some (Instruction.Id.Set.add dst_id set) + in + Instruction.Id.Map.update src_id f t + in + let add_dep t ~src ~dst = + match get_dependency_kind ~src ~dst block reaching_definitions with + | No_direct_dependency -> t + | Data_dependency | Order_constraint -> + (* CR-someday gyorsh: we don't currently use the distinction between + data dependencies and order constraints, but it can be useful for + future optimizations. *) + add t ~src ~dst + in + let rec add_deps t ~src operations = + match operations with + | [] -> t + | hd :: tl -> + let t = add_dep t ~src ~dst:hd in + add_deps t ~src tl + in + let rec add_operations t operations = + match operations with + | [] -> t + | hd :: tl -> + let t = add_deps t ~src:hd tl in + add_operations t tl + in + Accesses.fold accesses ~init:Instruction.Id.Map.empty + ~f:(fun _p operations acc -> add_operations acc operations) + + let dump ppf ~block t = + (* print dependencies in the order they appear in the block. *) + DLL.iter (Block.body block) ~f:(fun instruction -> + let instruction = Instruction.basic instruction in + let src = Instruction.id instruction in + let dsts = get t src in + Instruction.Id.Set.iter + (fun dst -> + Format.fprintf ppf "memory dep: %a->%a\n" Instruction.Id.print + src Instruction.Id.print dst) + dsts) + end + + type t = Operation.t Instruction.Id.Map.t + + let from_block block reaching_definitions = + let pts = Points_to.from_block block reaching_definitions in + let operations = Points_to.operations pts in + let deps = + Dependencies.from_block (Points_to.accesses pts) block + reaching_definitions + in + operations, deps + + let get_memory_operation t instruction = + Instruction.Id.Map.find_opt (Instruction.id instruction) t + + let get_memory_operation_exn t instruction = + Instruction.Id.Map.find (Instruction.id instruction) t + + let is_adjacent (t : t) i1 i2 block reaching_definitions : bool = + Operation.is_adjacent + (get_memory_operation_exn t i1) + (get_memory_operation_exn t i2) + block reaching_definitions + + let dump ppf ~block t = + (* print dependencies in the order they appear in the block. *) + DLL.iter (Block.body block) ~f:(fun instruction -> + let instruction = Instruction.basic instruction in + let id = Instruction.id instruction in + match Instruction.Id.Map.find_opt id t with + | None -> () + | Some op -> + Format.fprintf ppf "memory op for %a: %a\n" Instruction.Id.print id + Operation.dump op) + end + + module Dependency_graph : sig + type t + + val from_block : + Block.t -> Reaching_definitions.t -> Memory.Dependencies.t -> t + + val independent : t -> Instruction.Id.t -> Instruction.Id.t -> bool + + val get_direct_dependency_of_arg : + t -> Instruction.Id.t -> arg_i:int -> Instruction.Id.t option + + val get_direct_dependencies : t -> Instruction.Id.t -> Instruction.Id.Set.t + + val dump : Format.formatter -> block:Block.t -> t -> unit + end = struct + module Node = struct + type t = + { direct_dependencies_of_args : Instruction.Id.t option array; + (** instruction that defines the argument *) + direct_dependencies : Instruction.Id.Set.t; + (** direct dependencies of all register arguments of this instruction (does + not include memory dependencies). *) + all_dependencies : Instruction.Id.Set.t + (** transitive reflexive dependencies of this instruction, covers register and + memory dependencies (but not order constraints). *) + } + + let init instruction reaching_definitions : t = + let arguments = Instruction.arguments instruction in + let id = Instruction.id instruction in + let direct_dependencies_of_args = + Array.map (Reaching_definitions.get reaching_definitions id) arguments + in + let direct_dependencies = + Array.fold_left + (fun acc dep -> + match dep with + | None -> acc + | Some dep -> Instruction.Id.Set.add dep acc) + Instruction.Id.Set.empty direct_dependencies_of_args + in + { direct_dependencies_of_args; + direct_dependencies; + (* initialized later: *) + all_dependencies = Instruction.Id.Set.empty + } + end + + type t = Node.t Instruction.Id.Tbl.t + + let get_direct_dependency_of_arg dependency_graph id ~arg_i = + let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in + node.direct_dependencies_of_args.(arg_i) + + let get_all_dependencies dependency_graph id = + let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in + node.all_dependencies + + let get_direct_dependencies dependency_graph id = + let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in + node.direct_dependencies + + let independent t i1 i2 = + let all_deps_of_i1 = get_all_dependencies t i1 in + let all_deps_of_i2 = get_all_dependencies t i2 in + not + (Instruction.Id.Set.mem i1 all_deps_of_i2 + || Instruction.Id.Set.mem i2 all_deps_of_i1) + + (* CR-soon gyorsh: we don't need to keep [Points_to] and table of + [Reaching_definitions.t] per instruction after we computed direct + dependencies between instructions. For now it's easier to keep them for + debugging and probably not too expensive as there is a lot of sharing in + the representation of the values in these tables, because they are + constructed incrementally. *) + let from_block (block : Block.t) reaching_definitions mem_deps = + let body = Block.body block in + let terminator = Block.terminator block in + let t = Block.size block |> Instruction.Id.Tbl.create in + let add_dependencies (instruction : Instruction.t) = + (* Add direct dependencies *) + let node = Node.init instruction reaching_definitions in + (* Add transitive dependencies *) + let id = Instruction.id instruction in + let all_dependencies = + let init = + (* data dependencies via registers and memory, and order + constraints. *) + Instruction.Id.Set.union node.direct_dependencies + (Memory.Dependencies.get mem_deps id) + in + Instruction.Id.Set.fold + (fun new_id acc -> + let (new_node : Node.t) = Instruction.Id.Tbl.find t new_id in + Instruction.Id.Set.union new_node.all_dependencies acc) + init init + in + let node = { node with all_dependencies } in + Instruction.Id.Tbl.add t id node + in + DLL.iter body ~f:(fun i -> add_dependencies (Instruction.basic i)); + (* CR gyorsh: not sure we need dependencies before terminator. *) + add_dependencies terminator; + t + + let dump ppf ~(block : Block.t) (t : t) = + let open Format in + let print_node (instruction : Instruction.t) = + let args = Instruction.arguments instruction in + let print_reg arg_i dep = + let pp ppf o = + match o with + | None -> fprintf ppf "none" + | Some id -> fprintf ppf "instruction %a" Instruction.Id.print id + in + let reg = args.(arg_i) in + fprintf ppf "argument %d, %a depends on %a\n" arg_i Printreg.reg reg + pp dep + in + let id = Instruction.id instruction in + let node = Instruction.Id.Tbl.find t id in + fprintf ppf "\n%a\n" Instruction.print instruction; + fprintf ppf "\ndirect dependencies:\n"; + Instruction.Id.Set.iter + (fprintf ppf "%a " Instruction.Id.print) + node.direct_dependencies; + fprintf ppf "\nall dependencies:\n"; + Instruction.Id.Set.iter + (fprintf ppf "%a " Instruction.Id.print) + node.all_dependencies; + fprintf ppf "\nis direct dependency of:\n"; + fprintf ppf "\narg dependencies:\n"; + Array.iteri print_reg node.direct_dependencies_of_args; + fprintf ppf "\n" + in + fprintf ppf "\ndependency graph:\n"; + DLL.iter (Block.body block) ~f:(fun instruction -> + print_node (Instruction.basic instruction)); + print_node (Block.terminator block); + fprintf ppf "\n" + end + + (* CR-soon gyorsh: make [t] lazy, and only construct when needed, for + performance reasons. For now, for testing purposes, it's better to always + construct to increase coverage. *) + type t = + { reaching_definitions : Reaching_definitions.t; + dependency_graph : Dependency_graph.t; + memory_operations : Memory.t; + mem_deps : Memory.Dependencies.t; + block : Block.t + } + + let state t = Block.state t.block + + let independent t instruction_1 instruction_2 = + (* This covers register and memory dependencies and order constraints, + because of the way we constructed the graph. *) + let id1 = Instruction.id instruction_1 in + let id2 = Instruction.id instruction_2 in + Dependency_graph.independent t.dependency_graph id1 id2 + + let all_independent t instructions = + (* All pairs of instructions in the group are independent. *) + let rec check t instructions = + match instructions with + | [] -> true + | hd :: tl -> + let b = List.for_all (independent t hd) tl in + if b + then ( + State.dump_debug (state t) "Group.all_independent: %a\n" + Instruction.print_id hd; + check t tl) + else false + in + check t instructions + + let all_adjacent t instructions = + (* CR-soon gyorsh: shuffles are not supported yet, but this is the place to + identify them, for example: i<-[a];j<-[a+8];[b]<-j;[b+8]<-i *) + let rec check_adjacent hd1 tl1 = + match tl1 with + | [] -> true + | hd2 :: tl2 -> + if Memory.is_adjacent t.memory_operations hd1 hd2 t.block + t.reaching_definitions + then check_adjacent hd2 tl2 + else false + in + check_adjacent (List.hd instructions) (List.tl instructions) + + let for_all_memory_dependencies t ~f = + Memory.Dependencies.for_all ~f t.mem_deps + + (* let is_memory_operation t i = Memory.is_memory_operation + t.memory_operations i *) + + (* let width_in_bits t i = Memory.width_in_bits t.memory_operation i *) + + let get_direct_dependency_of_arg t = + Dependency_graph.get_direct_dependency_of_arg t.dependency_graph + + let get_direct_dependencies t id = + Dependency_graph.get_direct_dependencies t.dependency_graph id + + let get_direct_dependency_of_reg t id reg = + Reaching_definitions.get t.reaching_definitions id reg + + let get_memory_operation t instructon = + Memory.get_memory_operation t.memory_operations instructon + + let from_block block = + let state = Block.state block in + let reaching_definitions = Reaching_definitions.from_block block in + State.dump_debug state "%a@." + (Reaching_definitions.dump ~block) + reaching_definitions; + let memory_operations, mem_deps = + Memory.from_block block reaching_definitions + in + State.dump_debug state "%a@." (Memory.dump ~block) memory_operations; + State.dump_debug state "%a@." (Memory.Dependencies.dump ~block) mem_deps; + let dependency_graph = + Dependency_graph.from_block block reaching_definitions mem_deps + in + State.dump_debug state "%a@." + (Dependency_graph.dump ~block) + dependency_graph; + { reaching_definitions; + dependency_graph; + memory_operations; + mem_deps; + block + } +end + +module Computation : sig + type t + + module Group : sig + type t + + val scalar_instructions : t -> Instruction.t list + + val vector_instructions : t -> Vectorize_utils.Vectorized_instruction.t list + + val iter_vectorizable_args : t -> f:(arg_i:int -> unit) -> unit + end + + module Seed : sig + type t + + val from_block : Block.t -> Dependencies.t -> t list + + val dump : Format.formatter -> t list -> unit + end + + val from_seed : Block.t -> Dependencies.t -> Seed.t -> t option + + val dump : Format.formatter -> block:Block.t -> t -> unit + + val dump_one_line_stat : Format.formatter -> t -> unit + + val dump_all : Format.formatter -> block:Block.t -> t list -> unit + + (** [contains t i] returns true iff instruction [i] belongs to some + group in [t]. [i] need not be the key instruction of the group. *) + val contains : t -> Instruction.t -> bool + + (** [find_group t key] returns the group of [key] instruction, or + None if [key] instruction is not the key of any group in [t]. *) + val find_group : t -> key:Instruction.t -> Group.t option + + (** Selects disjoint computations from the input list of computations + and returns their union. *) + val select_and_join : t list -> Block.t -> Dependencies.t -> t option + + val cost : t -> int + + val num_groups : t -> int +end = struct + module Group : sig + (** Represents scalar instructions and the corresponding + vector instructions. *) + type t + + (** guaranteed to return a list with at least 2 instructions. *) + val scalar_instructions : t -> Instruction.t list + + (** guaranteed to return a non-empty list. *) + val vector_instructions : t -> Vectorize_utils.Vectorized_instruction.t list + + (** maps over the indexes of arguments that need to be considered when vectorizing + dependencies. Currently skips over arguments that are used for memory address + calculation. [init] ensures that the memory address arguments have the same values + for all instructions in the group. The result list is in reverse order. *) + val map_vectorizable_args : t -> f:(arg_i:int -> 'a) -> 'a list + + val iter_vectorizable_args : t -> f:(arg_i:int -> unit) -> unit + + val for_all_non_vectorizable_args : t -> f:(arg_i:int -> bool) -> bool + + (** [init width_in_bits instructions] checks that [instructions] + are supported isomorphic scalar instructions that are + inter-independent and if they have memory accesses, + the accesses must be adjacent. *) + val init : + width_in_bits:Vectorize_utils.Width_in_bits.t -> + Instruction.t list -> + Dependencies.t -> + t option + + val equal : t -> t -> bool + + val dump : Format.formatter -> t -> unit + end = struct + type t = + { vector_instructions : Vectorize_utils.Vectorized_instruction.t list; + instructions : Instruction.t list; + non_address_arg_count : int; + arg_count : int + } + + let scalar_instructions t = t.instructions + + let vector_instructions t = t.vector_instructions + + let equal t1 t2 = + List.equal Instruction.equal_id t1.instructions t2.instructions + + let get_arg_count i = Instruction.arguments i |> Array.length + + let get_res_count i = Instruction.results i |> Array.length + + let map_vectorizable_args t ~f = + (* Currently, the code assumes that the (variable number of) address args + are always at the end of the array of arguments of an instruction. The + non-address args therefore start from 0, conveniently, and we don't + need to know if there are any address args or not, so we don't need to + know if it's a memory operation or not. *) + List.init t.non_address_arg_count (fun arg_i -> f ~arg_i) + + let iter_vectorizable_args t ~f = + (* see [map_vectorizable_args] *) + for arg_i = 0 to t.non_address_arg_count - 1 do + f ~arg_i + done + + let for_all_non_vectorizable_args t ~f = + (* see [map_vectorizable_args] *) + let rec loop arg_i = + if arg_i = t.arg_count + then true + else if f ~arg_i + then loop (arg_i + 1) + else false + in + loop t.non_address_arg_count + + let same_stack_offset instructions = + match instructions with + | [] -> true + | hd :: tl -> + let stack_offset = Instruction.stack_offset hd in + List.for_all + (fun i -> Int.equal stack_offset (Instruction.stack_offset i)) + tl + + let have_isomorphic_op instructions = + match instructions with + | [] -> true + | hd :: tl -> + let is_isomorphic i = + Instruction.have_isomorphic_op hd i + && Int.equal (get_arg_count hd) (get_arg_count i) + && Int.equal (get_res_count hd) (get_res_count i) + in + List.for_all is_isomorphic tl + + let independent instructions deps = + let res = Dependencies.all_independent deps instructions in + State.dump_debug (Dependencies.state deps) "Group.independent: res=%b\n" + res; + res + + (** Returns true if all memory accesses performed by the [instructions] are + vectorizable. Current implementation assumes that all instructions have + isomorphic operations. It returns true if the operations do not access + memory, or if all memory accesses are adjacent; otherwise returns false. In + the future, this can be extended to support shuffles. *) + let can_vectorize_memory_accesses mem_op instructions deps = + match mem_op with + | None -> true + | Some _ -> + let res = Dependencies.all_adjacent deps instructions in + State.dump_debug (Dependencies.state deps) + "Group.all_adjacent: res=%b\n" res; + res + + let init ~width_in_bits instructions deps = + assert (List.length instructions > 1); + assert ( + Vectorize_utils.Width_in_bits.to_int width_in_bits + * List.length instructions + = Simd_selection.vector_width_in_bits); + Format.( + State.dump_debug (Dependencies.state deps) "Group.init\n%a\n" + (pp_print_list ~pp_sep:pp_print_newline Instruction.print_id) + instructions); + match instructions with + | [] -> assert false + | instruction :: _ -> ( + let arg_count = get_arg_count instruction in + let res_count = get_res_count instruction in + let mem_op = Dependencies.get_memory_operation deps instruction in + if not + (same_stack_offset instructions + && have_isomorphic_op instructions + && independent instructions deps + && can_vectorize_memory_accesses mem_op instructions deps) + then None + else + let cfg_ops = + List.map (fun i -> i |> Instruction.op |> Option.get) instructions + in + let vector_instructions = + Simd_selection.vectorize_operation width_in_bits ~arg_count + ~res_count cfg_ops + in + match vector_instructions with + | None -> None + | Some vector_instructions -> + let non_address_arg_count = + match mem_op with + | None -> arg_count + | Some mem_op -> + Dependencies.Memory.Operation.first_memory_arg_index mem_op + in + assert (List.length vector_instructions > 0); + Some + { vector_instructions; + instructions; + non_address_arg_count; + arg_count + }) + + (* Load: At the moment, we do not vectorize dependencies of load + instructions, and don't support vectorizing load instructions that have + interesting vectorizable address dependencies, such as scatter-gather. *) + (* Store: It must be the seed group, because store instruction has no + "result", so it can't be a dependency of another group in the tree. We + have already checked that seed consists of adjacent memory accesses that + are independent. The only remaining dependency to check is the new value + stored. It's always the argument at index 0. *) + + let dump ppf t = + let open Format in + let spp ppf l = + pp_print_list ~pp_sep:pp_print_newline Instruction.print ppf l + in + let vpp ppf l = + pp_print_list ~pp_sep:pp_print_newline + Vectorize_utils.Vectorized_instruction.print ppf l + in + fprintf ppf "Group:\nScalar:\n%a\nVector:\n%a\n" spp t.instructions vpp + t.vector_instructions + end + + module Seed : sig + (** A seed is a group of inter-independent store instructions that access adjacent + memory addresses. *) + type t + + val lane_width_in_bits : t -> Vectorize_utils.Width_in_bits.t + + val group : t -> Group.t + + val from_block : Block.t -> Dependencies.t -> t list + + val dump : Format.formatter -> t list -> unit + + val exists_address_dependency : + t -> f:(Instruction.Id.t -> Reg.t -> bool) -> bool + end = struct + type t = + { group : Group.t; + width_in_bits : Vectorize_utils.Width_in_bits.t + } + + let init ~width_in_bits instructions deps = + match Group.init ~width_in_bits instructions deps with + | None -> None + | Some group -> Some { group; width_in_bits } + + (* [take ~n list] returns the first [n] items of [list], or None if list has + fewer than [n] items. *) + let take ~n ~width_in_bits l = + let rec loop n l acc = + if n <= 0 + then Some (List.rev acc) + else + match l with + | [] -> None + | (w, hd) :: tl -> + if Vectorize_utils.Width_in_bits.equal w width_in_bits + then loop (n - 1) tl (hd :: acc) + else None + in + loop n l [] + + let lane_width_in_bits t = t.width_in_bits + + let group t = t.group + + (* [is_store i] if [i] is a Store, return the width in bits, otherwise + return None. *) + let is_store instruction = + match Instruction.op instruction with + | None -> None + | Some op -> ( + match op with + | Store (chunk, _, _) -> Some chunk + | Alloc _ | Load _ | Move | Reinterpret_cast _ | Static_cast _ | Spill + | Reload | Const_int _ | Const_float32 _ | Const_float _ + | Const_symbol _ | Const_vec128 _ | Stackoffset _ | Intop _ + | Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ + | Opaque | Begin_region | End_region | Specific _ | Name_for_debugger _ + | Dls_get | Poll -> + None) + + let from_block (block : Block.t) deps : t list = + (* For each store instruction, tries to form a seed with the closest + stores after it, skipping other instructions. This is a heuristic that + considers only [Store] instructions, not other intructions that write + to memory. The goal is to quickly filter out blocks without candidate + seed, and only use [deps] when needed. *) + (* CR-someday gyorsh: try to reorder independent stores to the same base + address to create adjacent accesses that can be vectorized, for + example: [b+8] <- x; [b]<-y *) + (* CR-someday gyorsh: find seeds with any store, not necessarily the + closest stores. This requires proving that stores that are skipped are + independent, but at the moment we can only do it if they are to the + same address which limits the effectiveness of looking further ahead. + It would only add cases of the form: [b] <- x0; [b+16] <-x2 [b+8] <-x1; + [b+24] <- x3 *) + let body = Block.body block in + let all_stores = + DLL.fold_right body ~init:[] ~f:(fun i acc -> + let i = Instruction.basic i in + match is_store i with + | Some chunk -> + (Vectorize_utils.Width_in_bits.of_memory_chunk chunk, i) :: acc + | None -> acc) + in + Format.( + let pp_pair ppf (x, y) = + fprintf ppf "(%a, %a)" Vectorize_utils.Width_in_bits.print x + Instruction.print y + in + State.dump_debug (Block.state block) + "Seeds.from_block: all_stores=\n(%a)\n" + (pp_print_list ~pp_sep:pp_print_newline pp_pair) + all_stores); + let rec loop stores acc = + match stores with + | [] -> List.rev acc + | (width_in_bits, _store) :: tl -> ( + let n = + Simd_selection.vector_width_in_bits + / Vectorize_utils.Width_in_bits.to_int width_in_bits + in + if n <= 1 + then + (* nothing to vectorize, the store's access is at least + vector-length. *) + loop tl acc else - match cell_option with - | None -> None - | Some cell -> - let instruction = Instruction.Basic (DLL.value cell) in - if Instruction.is_store instruction - then - let new_store = - Instruction.id instruction - |> Memory_accesses.get_memory_operation_exn memory_accesses - in - if Memory_accesses.Memory_operation.is_adjacent (List.hd seed) - new_store - then find_seed (n - 1) (new_store :: seed) (DLL.next cell) - else None - else if can_cross_chunk seed instruction - then find_seed n seed (DLL.next cell) - else None - in - find_seed (items_in_vector - 1) - [starting_memory_operation] - starting_cell - |> Option.map List.rev) - stores - - let dump ppf (seeds : t list) = + match take ~n ~width_in_bits stores with + | None -> + (* not enough instructions in [stores] to make a group of [n] + instructions. *) + loop tl acc + | Some instructions -> ( + Format.( + State.dump_debug (Block.state block) + "Seeds.from_block: instructions=\n(%a)\n" + (pp_print_list Instruction.print_id) + instructions); + match init ~width_in_bits instructions deps with + | None -> loop tl acc + | Some t -> loop tl (t :: acc))) + in + loop all_stores [] |> List.rev + + let exists_address_dependency (t : t) ~f = + (* Seed construction guarantees that all instructions in the [seed] have + the same address arguments and isomorphic operations. *) + let instruction = Group.scalar_instructions t.group |> List.hd in + let args = Instruction.arguments instruction in + let len = Array.length args in + let id = Instruction.id instruction in + let rec exists i = + (* The first argument of a store is the new value, the rest are address + arguments. *) + if i < 1 then false else f id args.(i) || exists (i - 1) + in + exists (len - 1) + + let dump ppf (seeds : t list) = + let open Format in + fprintf ppf "\nSeeds:\n"; + List.iter (fun t -> fprintf ppf "(%a)\n" Group.dump t.group) seeds + end + + type t = + { groups : Group.t Instruction.Id.Map.t; + (* [all_instructions] is all the scalar instructions in the computations. + It is an optimization to cache this value here. It is used for ruling + out computations that are invalid or not implementable, and to estimate + cost/benefit of vectorized computations. *) + all_scalar_instructions : Instruction.Id.Set.t; + new_positions : int Instruction.Id.Map.t + } + + let num_groups t = Instruction.Id.Map.cardinal t.groups + + let num_vector_instructions t = + Instruction.Id.Map.fold + (fun _k g acc -> acc + List.length (Group.vector_instructions g)) + t.groups 0 + + let num_scalar_instructions t = + Instruction.Id.Set.cardinal t.all_scalar_instructions + + (** [cost t] returns an integer [n] describing the cost of + vectorized computation [t] instead of the original code: + negative [n] means vectorized computation is better than + the original code. The goal is to find [t] that minimizes cost(t). + + Currently, [cost] uses a naive measure of number of instructions, + i.e., the difference between the number of vector instructions + and the number of scalar instructions. *) + let cost t = num_vector_instructions t - num_scalar_instructions t + + let is_cost_effective t = cost t < 0 + + let dump_one_line_stat ppf t = + Format.fprintf ppf + "%d groups, %d scalar instructions, %d vector instructions, cost = %d" + (num_groups t) + (num_scalar_instructions t) + (num_vector_instructions t) + (cost t) + + let dump ppf ~(block : Block.t) t = + let open Format in + let print_group id group_option = + match group_option with + | None -> () + | Some (group : Group.t) -> + fprintf ppf "\nGroup key: %a\n%a\n" Instruction.Id.print id Group.dump + group + in + Format.fprintf ppf "Vectorized computation has %a:\n" dump_one_line_stat t; + DLL.iter (Block.body block) ~f:(fun instruction -> + let instruction = Instruction.basic instruction in + let id = Instruction.id instruction in + Instruction.Id.Map.find_opt id t.groups |> print_group id) + + let dump_all ppf ~(block : Block.t) (trees : t list) = let open Format in - let print_seed seed = - List.iter - (fun (address : Memory_accesses.Memory_operation.t) -> - Memory_accesses.Memory_operation.dump ppf address) - seed + let print_trees ppf trees = + List.iter (fun tree -> fprintf ppf "(%a)\n" (dump ~block) tree) trees + in + fprintf ppf "Vectorized computations:\n(%a)\n" print_trees trees + (* The key is the id of the instruction where the group will be inserted, + which is the last instruction in the group for now, we can change that + later *) + + let find_group t ~key = + let key_id = Instruction.id key in + Instruction.Id.Map.find_opt key_id t.groups + + let contains_id t id = Instruction.Id.Set.mem id t.all_scalar_instructions + + let contains t instruction = + let id = Instruction.id instruction in + contains_id t id + + let is_dependency_of_the_rest_of_body t block deps = + (* is there an instruction outside the computation that directly depends on + a register defined in the computation? *) + let res = + DLL.exists (Block.body block) ~f:(fun instruction -> + let instruction = Instruction.basic instruction in + if contains t instruction + then false + else + let id = Instruction.id instruction in + let direct_deps = Dependencies.get_direct_dependencies deps id in + let res = + not + (Instruction.Id.Set.disjoint direct_deps + t.all_scalar_instructions) + in + State.dump_debug (Block.state block) + "Computation.is_dependency_of_the_rest_of_body = %b %a\n" res + Instruction.print instruction; + res) + in + State.dump_debug (Block.state block) + "Computation.is_dependency_of_the_rest_of_body result = %b\n" res; + res + + (** [depends_on_computation t deps id reg] does the value of [reg] at [id] + depend on any instruction in [t]? *) + let depends_on_computation t deps id reg = + match Dependencies.get_direct_dependency_of_reg deps id reg with + | None -> false + | Some reaching_definition_id -> + Instruction.Id.Set.mem reaching_definition_id t.all_scalar_instructions + + let is_dependency_of_outside_body t block deps = + (* live registers before terminator represent dependencies outside of + body. *) + let terminator_id = Instruction.id (Block.terminator block) in + let live_before_terminator = Block.get_live_regs_before_terminator block in + let res = + (* live register before terminator are not defined by the computation. *) + Reg.Set.exists + (depends_on_computation t deps terminator_id) + live_before_terminator + in + State.dump_debug (Block.state block) + "Computation.is_valid: is_dependency_of_outside_body = %b\n" res; + res + + let seed_address_does_not_depend_on_tree t block deps seed = + let res = + (* the only way seed address can depend on the tree is if there is a + constant assignment *) + not + (Seed.exists_address_dependency seed ~f:(depends_on_computation t deps)) + in + State.dump_debug (Block.state block) + "Computation.is_valid seed_address_does_not_depend_on_tree = %b\n" res; + res + + let is_respected t block src dst = + let old_pos id = Block.pos block id in + let new_pos id = + match Instruction.Id.Map.find_opt id t.new_positions with + | None -> old_pos id + | Some pos -> pos + in + let appears get_pos src ~after:dst = get_pos src > get_pos dst in + (* [src] depends on [dst] implies that [src] appears after [dst] in the + block, i.e., [old_pos_src] > [old_pos_dst]. Check that [new_pos_src] > + [new_pos_dst]. *) + if not (appears old_pos src ~after:dst) + then + Misc.fatal_errorf + "Unexpected old position: %a (old pos %d) depends on %a (old pos %d)" + Instruction.Id.print src (old_pos src) Instruction.Id.print dst + (old_pos dst); + let res = appears new_pos src ~after:dst in + State.dump_debug (Block.state block) + "Computation.respects_memory_dependencies = %b: %a (old pos %d, new pos \ + %d) depends on %a (old pos %d, new pos %d)\n" + res Instruction.Id.print src (old_pos src) (new_pos src) + Instruction.Id.print dst (old_pos dst) (new_pos dst); + res + + let respects_memory_dependencies t block deps = + (* conservative: for each memory dependency (data or order), check that the + new order of instructions satisfies the dependency. This is needed to + ensure that memory deps between different groups are respected, and + memory deps between instructions in and out of the vectorized computation + are respected. The construction of the groups themselves guarantees that + there are no deps of any kind (reg, mem, or order) between instructions + in the same group. *) + let res = + Dependencies.for_all_memory_dependencies ~f:(is_respected t block) deps + in + State.dump_debug (Block.state block) + "Computation.respects_memory_dependencies result = %b\n" res; + res + + let respects_register_order_constraints t deps = + (* Check that read or write of register [r] is not reordered past another + write to [r] (similarly to respecting order constraints between memory + operations in [respects_memory_dependencies]). This check would not be + needed if we had basic-block-level SSA. *) + let is_valid_definition instruction ~arg_i ~new_pos = + let id = Instruction.id instruction in + let args = Instruction.arguments instruction in + let reg = args.(arg_i) in + let old_def = Dependencies.get_direct_dependency_of_reg deps id reg in + let new_def = + Dependencies.get_direct_dependency_of_reg deps new_pos reg + in + match old_def, new_def with + | None, None -> + (* The register is defined before the block, and not redefined within + the block. *) + true + | None, Some _ -> + (* The instruction uses register defined before the block, but the + register is redefined before the new position. *) + false + | Some _, None -> + Misc.fatal_errorf + "Use of clobbered register %a at %a, previously defined at %a" + Printreg.reg reg Instruction.Id.print new_pos Instruction.print + instruction + | Some old_def, Some new_def -> + (* cannot move past another definition point *) + Instruction.Id.equal old_def new_def in - let print_seeds seeds = - List.iter - (fun seed -> - fprintf ppf "("; - print_seed seed; - fprintf ppf "\n)\n") - seeds + Instruction.Id.Map.for_all + (fun key group -> + let scalar_instructions = Group.scalar_instructions group in + Group.for_all_non_vectorizable_args group ~f:(fun ~arg_i -> + List.for_all + (is_valid_definition ~arg_i ~new_pos:key) + scalar_instructions)) + t.groups + + let respects_register_dependencies t block deps = + (* Each computation is guaranteed by construction to respect register + dependencies, but after merging two computation, they may be violated if + one of the computations depends on another and the placement of vector + instructions does not respect the dependency. Conservatively reject such + a computation for now. This check should run at the same time as + [is_dependency_of_the_rest_of_body]. Better placement of vectorized + instructions can help in some cases, but not all and not yet + implemented. *) + let is_valid_dep i = + let id = Instruction.id i in + let set = Dependencies.get_direct_dependencies deps id in + Instruction.Id.Set.for_all + (fun dep_id -> + if contains_id t dep_id then is_respected t block id dep_id else true) + set in - fprintf ppf "\nseeds:\n"; - print_seeds seeds; - fprintf ppf "\n" + Instruction.Id.Map.for_all + (fun _key group -> + let scalar_instructions = Group.scalar_instructions group in + List.for_all is_valid_dep scalar_instructions) + t.groups + + (* CR gyorsh: [is_dependency_of_outside_body] condition can be weakened if we + propagate register substitution to instructions that depend on them outside + the tree (in the same block and other blocks), but may require additional + instructions to extract scalar values from vector registers. Same weakening + can be applied to [is_dependency_of_the_rest_of_body]. We check + [is_dependency_of_the_rest_of_body] later, after [select_and_merge], + because it allows us to vectorize computations that share some nodes. *) + let is_valid t block deps = + respects_memory_dependencies t block deps + && respects_register_order_constraints t deps + && not (is_dependency_of_outside_body t block deps) + + (** The key is the last instruction id, for now. This is the place + where the vectorized intructions will be inserted. *) + let get_key block instruction_ids = + let last_instruction = Block.find_last_instruction block instruction_ids in + Instruction.id last_instruction + + (** Returns the dependencies of arguments at position [arg_i] + of each instruction in [instruction_ids]. Returns None if + one of the instruction's dependencies is None for [arg_i]. *) + let get_deps deps ~arg_i instruction_ids = + Misc.Stdlib.List.map_option + (Dependencies.get_direct_dependency_of_arg deps ~arg_i) + instruction_ids + + let all_instructions map = + Instruction.Id.Map.fold + (fun _key (group : Group.t) acc -> + let instructions = Group.scalar_instructions group in + let seq = List.to_seq instructions |> Seq.map Instruction.id in + Instruction.Id.Set.add_seq seq acc) + map Instruction.Id.Set.empty + + let new_positions map block = + (* CR-someday gyorsh: improve instruction scheduling for vectorized + instructions to allow more code to be vectorized. Order constraints can + be used to choose the position of vectorized instructions within a block, + instead of the predefined position used now for [Group.key]. *) + let old_pos id = Block.pos block id in + Instruction.Id.Map.fold + (fun key group acc -> + let key_pos = old_pos key in + List.fold_left + (fun acc i -> + let id = Instruction.id i in + Instruction.Id.Map.add id key_pos acc) + acc + (Group.scalar_instructions group)) + map Instruction.Id.Map.empty + + let empty = + { groups = Instruction.Id.Map.empty; + all_scalar_instructions = Instruction.Id.Set.empty; + new_positions = Instruction.Id.Map.empty + } + + (* CR gyorsh: if same instruction belongs to two groups, is it handled + correctly? no, only same key is handled correctly. *) + (* CR gyorsh: handle same instruction multiple times in the same group + correctly, don't allow duplicating. Currently, this can only lead to a + valid tree if the duplicated instruction is [Const_*] *) + let rec build group map ~block ~deps ~width_in_bits = + (* Recursively builds the vectorized computation and returns the key of the + root, otherwise None. It consists of groups do not depend on instructions + outside the computation except for loads. *) + match group with + | None -> None + | Some (group : Group.t) -> ( + let instruction_ids = + Group.scalar_instructions group |> List.map Instruction.id + in + let key = get_key block instruction_ids in + (* Is there another group with the same key already in the tree? If the + key instruction of the group is already in another group, and the other + group is different from this group, we won't vectorize this for + simplicity's sake. *) + match Instruction.Id.Map.find_opt key map with + | Some (old_group : Group.t) -> + if Group.equal group old_group then Some map else None + | None -> + (* add to the map *) + let map = Instruction.Id.Map.add key group map in + (* try to create groups for all dependencies *) + let dep_groups = + Group.map_vectorizable_args group ~f:(fun ~arg_i -> + (* [arg_i] ranges over indexes of arguments that need to be + considered when vectorizing dependencies. Currently skips over + arguments that are used for memory address calculation. + [Group.init] ensures that the memory address arguments have the + same values for all instructions in the group. *) + (* CR-someday gyorsh: refer directly to [Reg.t] instead of + positional [arg_i]. Currently, the code assumes that address + args are always at the end. *) + match get_deps deps ~arg_i instruction_ids with + | None -> + (* At least one of the arguments has a dependency outside the + block. Currently, not supported. *) + None + | Some dep_ids -> + State.dump_debug (Block.state block) + "Computation.from_seed build deps arg_i=%d\n" arg_i; + let instructions = List.map (Block.find block) dep_ids in + Group.init ~width_in_bits instructions deps) + in + let missing_deps = List.exists Option.is_none dep_groups in + if missing_deps + then None + else + (* recurse to vectorize dependencies. *) + List.fold_left + (fun acc g -> Option.bind acc (build g ~block ~deps ~width_in_bits)) + (Some map) dep_groups) + + let from_seed (block : Block.t) deps seed = + let width_in_bits = Seed.lane_width_in_bits seed in + let root = Seed.group seed in + State.dump_debug (Block.state block) "Computation.from_seed root=\n%a\n" + Group.dump root; + let map = Instruction.Id.Map.empty in + match build (Some root) map ~block ~deps ~width_in_bits with + | None -> None + | Some map -> + let t = + { groups = map; + all_scalar_instructions = all_instructions map; + new_positions = new_positions map block + } + in + State.dump_debug (Block.state block) + "Computation.from_seed build finished\n%a\n" (dump ~block) t; + assert (seed_address_does_not_depend_on_tree t block deps seed); + if is_valid t block deps then Some t else None + + let join t1 t2 = + { groups = + Instruction.Id.Map.union + (fun key g1 g2 -> + if not (Group.equal g1 g2) + then + Misc.fatal_errorf + "Computation.join: illegal groups for key=%a group1=%a \ + group2=%a" + Instruction.Id.print key Group.dump g1 Group.dump g2; + Some g1) + t1.groups t2.groups; + all_scalar_instructions = + Instruction.Id.Set.union t1.all_scalar_instructions + t2.all_scalar_instructions; + new_positions = + Instruction.Id.Map.union + (fun key pos1 pos2 -> + if not (Int.equal pos1 pos2) + then + Misc.fatal_errorf + "Computation.join: illegal new_positions for key=%a pos1=%d \ + pos2=%d" + Instruction.Id.print key pos1 pos2; + Some pos1) + t1.new_positions t2.new_positions + } + + (** [compatible t t'] returns true if for every group [g] in [t], + and [g'] in [t'], [g] and [g'] are equal or have disjoint sets + of scalar instructions. *) + let compatible t t' = + if Instruction.Id.Set.disjoint t.all_scalar_instructions + t'.all_scalar_instructions + then true + else + let sub t1 t2 = + Instruction.Id.Map.for_all + (fun key g1 -> + match Instruction.Id.Map.find_opt key t2.groups with + | Some g2 -> + (* equal groups: if the key is in t2, then the corresponding + groups are equal. *) + (* CR gyorsh: don't need to repeat this check in the symmetric + case, but it's easier to understand the code this way. *) + Group.equal g1 g2 + | None -> + (* disjoint groups: if the key is not in t2, then all insts are + not in t2. *) + List.for_all + (fun i -> + not + (Instruction.Id.Set.mem (Instruction.id i) + t2.all_scalar_instructions)) + (Group.scalar_instructions g1)) + t1.groups + in + sub t t' && sub t' t + + let select_and_join trees block deps = + match trees with + | [] -> None + | trees -> + (* sort by cost, ascending *) + let compare_cost t1 t2 = Int.compare (cost t1) (cost t2) in + let trees = List.sort compare_cost trees in + let rec loop trees acc = + match trees with + | [] -> acc + | hd :: tl -> + if compatible hd acc + then + let new_acc = join hd acc in + if compare_cost new_acc acc < 0 + then loop tl new_acc + else + (* CR gyorsh: this case is not reachable with the current cost function. *) + (* skip [hd], try to add the rest of the tail *) + loop tl acc + else (* skip [hd], add the rest of the tail *) + loop tl acc + in + let res = loop trees empty in + (* CR gyorsh: does join respect memory order? *) + assert (is_valid res block deps); + State.dump_debug (Block.state block) "Computation.select_and_join %a\n" + (dump ~block) res; + if is_dependency_of_the_rest_of_body res block deps + || (not (respects_register_dependencies res block deps)) + || not (is_cost_effective res) + then None + else Some res end -let dump ppf cfg_with_layout ~msg = - let open Format in - let cfg = Cfg_with_layout.cfg cfg_with_layout in - fprintf ppf "\nvectorization extra information for %s\n" msg; - fprintf ppf "%s\n" (Cfg.fun_name cfg); - let block_count = Label.Tbl.length cfg.blocks in - fprintf ppf "blocks.length=%d\n" block_count; - let body_instruction_count = - Cfg.fold_body_instructions cfg ~f:(fun sum _ -> sum + 1) ~init:0 +let augment_reg_map reg_map group = + (* Make sure that [reg_map] contains all scalar registers of the [group] that + will be replaced by vector registers. It is not enough to map only scalar + registers that appear in [key_instruction] because another group's key + instruction may refer to different scalar registers in the same packed + register (i.e., the order of scalar instructions need not be the same as + block order, and the key is currently the last instruction in the block + order. + + For example, group [10;7] and group [13;14]: *) + + (* (id:5) V/63 := val [b:V/62 + 8] + * (id:6) V/64 := val [a:V/61 + 8] + * (id:7) Paddint:I/65 := V/64 + V/63 + -1 + * (id:8) V/66 := val [b:V/62] + * (id:9) V/67 := val [a:V/61] + * (id:10) Paddint:I/68 := V/67 + V/66 + -1 + * (id:13) val[V/69] := Paddint:I/68 (init) + * (id:14) val[V/69 + 8] := Paddint:I/65 (init) *) + + (* The key of [10;7] has result register [I/68] but the corresponding argument + register in the key of [13;14] is [I/65]. *) + let scalar_instructions = Computation.Group.scalar_instructions group in + let arg = List.map Instruction.arguments scalar_instructions in + let res = List.map Instruction.results scalar_instructions in + let augment index reg_arrays = + let pack = List.map (fun reg_array -> reg_array.(index)) reg_arrays in + match pack with + | [] -> () + | hd :: tl -> ( + match Substitution.get_reg_opt reg_map hd with + | None -> Substitution.fresh_reg_for_pack reg_map pack Vec128 + | Some old_reg_for_hd -> + (* other registers in the pack must be mapped in the same way as + [hd]. *) + List.iter + (fun reg -> + match Substitution.get_reg_opt reg_map reg with + | None -> + Misc.fatal_errorf + "augment_reg_map: %a is mapped to %a but %a is not mapped" + Printreg.reg hd Printreg.reg old_reg_for_hd Printreg.reg reg + | Some old_reg -> + if not (Reg.same old_reg_for_hd old_reg) + then + Misc.fatal_errorf + "augment_reg_map: %a is mapped to %a but %a is mapped to %a" + Printreg.reg hd Printreg.reg old_reg_for_hd Printreg.reg reg + Printreg.reg old_reg) + tl) in - fprintf ppf "body instruction count=%d\n" body_instruction_count; - fprintf ppf "terminator instruction count=%d\n" block_count; - fprintf ppf "body and terminator instruction count=%d\n" - (body_instruction_count + block_count); - fprintf ppf "@." + (* only some of the args are vectorizable, but all results are vectorizable. *) + (* CR gyorsh: get rid of positional interface, use registers directly. *) + Computation.Group.iter_vectorizable_args group ~f:(fun ~arg_i -> + augment arg_i arg); + Array.iteri (fun i _r -> augment i res) (List.hd res) + +let add_vector_instructions_for_group reg_map state group ~before:cell + old_instruction = + let vector_instructions = Computation.Group.vector_instructions group in + let key_instruction = Instruction.basic old_instruction in + let new_regs : Reg.t Numbers.Int.Tbl.t = Numbers.Int.Tbl.create 2 in + let get_new_reg n = + match Numbers.Int.Tbl.find_opt new_regs n with + | Some reg -> reg + | None -> + let new_reg = Reg.create Vec128 in + Numbers.Int.Tbl.add new_regs n new_reg; + new_reg + in + let create_instruction + (simd_instruction : Vectorize_utils.Vectorized_instruction.t) = + let get_register + (simd_reg : Vectorize_utils.Vectorized_instruction.register) = + match simd_reg with + | New n -> get_new_reg n + | Argument n -> + let original_reg = (Instruction.arguments key_instruction).(n) in + Substitution.get_reg_exn reg_map original_reg + | Result n -> + let original_reg = (Instruction.results key_instruction).(n) in + Substitution.get_reg_exn reg_map original_reg + | Original n -> + let original_reg = (Instruction.arguments key_instruction).(n) in + original_reg + in + let desc = Cfg.Op simd_instruction.operation in + let arg = Array.map get_register simd_instruction.arguments in + let res = Array.map get_register simd_instruction.results in + let id = State.next_available_instruction state in + Instruction.copy old_instruction ~desc ~arg ~res ~id + in + augment_reg_map reg_map group; + (* actually insert the vector instructions into the body of the block *) + List.iter + (fun simd_instruction -> + create_instruction simd_instruction |> DLL.insert_before cell) + vector_instructions + +let vectorize (block : Block.t) tree = + let reg_map = Block.reg_map block in + let state = Block.state block in + (* Add vector instructions. *) + let rec add_vector_instructions cell_option = + match cell_option with + | None -> () + | Some cell -> + (let old_instruction = DLL.value cell in + let instruction = Instruction.basic old_instruction in + match Computation.find_group tree ~key:instruction with + | None -> () + | Some group -> + add_vector_instructions_for_group reg_map state group ~before:cell + old_instruction); + DLL.next cell |> add_vector_instructions + in + let body = Block.body block in + DLL.hd_cell body |> add_vector_instructions; + (* Remove all instructions that were replaced by vector instructions. *) + DLL.filter_left body ~f:(fun instruction -> + let instruction = Instruction.basic instruction in + not (Computation.contains tree instruction)) + +exception Cannot_reorder of Instruction.t * Instruction.t + +let can_reorder tree body deps = + (* Checks nodes can be grouped together. Modifies a copy of the block's body + to move the scalar instructions together to where they would be replaced by + the corresponding vector instructions. Does not actually emit vectorized + instructions. There is a faster/simpler/safer way to emit vectorized code. + + Used for validation only. Intended to work even after the vectorization + heuristics evolve. Can be expensive. *) + let reorder_instruction body instruction position = + match position with + | None -> assert false + | Some position -> ( + (* find the cell that holds the scalar [instruction] *) + let same_position i = + let p = DLL.value position in + Instruction.equal_id i p + in + if same_position instruction + then + (* the [instruction] is already at [position], no need to reorder. *) + DLL.prev position + else + let cell = + DLL.find_cell_opt body ~f:(Instruction.equal_id instruction) + |> Option.get + in + (* Traverse from [cell] to [position], making sure the [instruction] can + be moved across every [other_instruction] along the way. *) + let rec can_cross cur = + match DLL.next cur with + | None -> assert false + | Some next_cell -> + let other_instruction = DLL.value next_cell in + if not (Dependencies.independent deps instruction other_instruction) + then raise (Cannot_reorder (instruction, other_instruction)) + else if same_position other_instruction + then true + else can_cross next_cell + in + match can_cross cell with + | false -> None + | true -> + (* Insert the scalar [instruction] after [position] and return the new + cell. This duplicates the scalar instruction, including the key. *) + DLL.insert_after position instruction; + (* delete old scalar instructions *) + DLL.delete_curr cell; + Some position) + in + let reorder_group position : Instruction.t DLL.cell option = + let key = DLL.value position in + let group = Computation.find_group tree ~key in + match group with + | None -> DLL.prev position + | Some (group : Computation.Group.t) -> + let instructions = Computation.Group.scalar_instructions group in + (* traverse [instructions] backwards, moving each to [position], and + returns the cell before the group. *) + List.fold_right (reorder_instruction body) instructions (Some position) + in + let rec reorder cell_option = + match cell_option with + | None -> () + | Some cell -> reorder_group cell |> reorder + in + (* traverse the block backwards *) + DLL.last_cell body |> reorder + +let validate tree block deps = + (* CR-soon gyorsh: if placement changes, the check will be too strict. *) + let state = Block.state block in + let body = Block.body block in + (* copy body *) + let body = DLL.map ~f:Instruction.basic body in + (try can_reorder tree body deps + with Cannot_reorder (i1, i2) -> + let pp ppf body = + DLL.iter body ~f:(fun i -> Format.fprintf ppf "%a\n" Instruction.print i) + in + Misc.fatal_errorf "Cannot reorder %a %a in Block:\n%a\n" + Instruction.print_id i1 Instruction.print_id i2 pp body); + State.dump_debug state "Validated. Reordered block:\n"; + DLL.iter body ~f:(fun i -> State.dump_debug state "%a\n" Instruction.print i) + +let count block computation = + let counter = + Profile.Counters.create () + |> Profile.Counters.set "tried_to_vectorize_blocks" 1 + |> Profile.Counters.set "block_size" (Block.size block) + in + if Block.size block > !Flambda_backend_flags.vectorize_max_block_size + then counter |> Profile.Counters.set "block_too_big" 1 + else + match computation with + | None -> counter + | Some c -> + counter + |> Profile.Counters.set "vectorized_block" 1 + |> Profile.Counters.set "cost" (Computation.cost c) + |> Profile.Counters.set "num_groups" (Computation.num_groups c) + +let maybe_vectorize block = + let state = Block.state block in + let instruction_count = Block.size block in + let label = Block.start block in + State.dump state "\nBlock %a:\n" Label.print label; + if instruction_count > !Flambda_backend_flags.vectorize_max_block_size + then ( + State.dump state + "Skipping block %a with %d instructions (> %d = \ + max_block_size_to_vectorize).\n" + Label.print label instruction_count + !Flambda_backend_flags.vectorize_max_block_size; + None) + else + let deps = Dependencies.from_block block in + let seeds = Computation.Seed.from_block block deps in + State.dump_debug state "%a@." Computation.Seed.dump seeds; + let computations = + List.filter_map (Computation.from_seed block deps) seeds + in + State.dump_debug state "%a@." (Computation.dump_all ~block) computations; + match Computation.select_and_join computations block deps with + | None -> None + | Some computation -> + let scoped_name = + State.fun_dbg state |> Debuginfo.get_dbg |> Debuginfo.Dbg.to_list + |> List.map (fun dbg -> + Debuginfo.( + Scoped_location.string_of_scopes ~include_zero_alloc:false + dbg.dinfo_scopes)) + |> String.concat "," + in + State.dump state "**** Vectorize selected computation: %a (%s)\n" + Computation.dump_one_line_stat computation scoped_name; + State.dump_debug state "%a\n" (Computation.dump ~block) computation; + if State.extra_debug then validate computation block deps; + let dump_block msg block = + let size = DLL.length (Block.body block) in + State.dump state "Block %a in %s: %s body instruction count=%d\n" + Label.print (Block.start block) (State.fun_name state) msg size; + DLL.iter (Block.body block) ~f:(fun i -> + State.dump_debug state "%a\n" Instruction.print + (Instruction.basic i)) + in + dump_block "before vectorize" block; + (* This is the only function that changes the [block]. *) + vectorize block computation; + dump_block "after vectorize" block; + Some computation let cfg ppf_dump cl = - if !Flambda_backend_flags.dump_vectorize - then Format.fprintf ppf_dump "*** Vectorization@."; + let state = State.create ppf_dump cl in + State.dump state "*** Vectorize@."; let cfg = Cfg_with_layout.cfg cl in + (* Iterate in layout order instead of default block order to make debugging + easier. *) let layout = Cfg_with_layout.layout cl in DLL.iter layout ~f:(fun label -> - let block = Cfg.get_block_exn cfg label in - let instruction_count = DLL.length block.body in - Format.fprintf ppf_dump "\nBlock %a (%d basic instructions):\n" - Label.format label instruction_count; - if instruction_count > 1000 - then - Format.fprintf ppf_dump - "more than 1000 instructions in basic block, cannot vectorize\n" - else - let dependency_graph = Dependency_graph.from_block block in - if !Flambda_backend_flags.dump_vectorize - then Dependency_graph.dump ppf_dump dependency_graph block; - let memory_accesses = Memory_accesses.from_block block in - if !Flambda_backend_flags.dump_vectorize - then Memory_accesses.dump ppf_dump memory_accesses; - let seeds = Seed.from_block block in - if !Flambda_backend_flags.dump_vectorize then Seed.dump ppf_dump seeds); - if !Flambda_backend_flags.dump_vectorize then dump ppf_dump ~msg:"" cl; + let block = Block.create (Cfg.get_block_exn cfg label) state in + (Profile.record_with_counters ~counter_f:(count block) ~accumulate:true + "vectorize_block" maybe_vectorize block + : Computation.t option) + |> ignore); cl diff --git a/backend/cmm.ml b/backend/cmm.ml index 78e5dbd8930..f055944d92f 100644 --- a/backend/cmm.ml +++ b/backend/cmm.ml @@ -388,14 +388,6 @@ type phrase = Cfunction of fundecl | Cdata of data_item list -let width_in_bytes (memory_chunk : memory_chunk) : int = - match memory_chunk with - | Byte_unsigned | Byte_signed -> 1 - | Sixteen_unsigned | Sixteen_signed -> 2 - | Thirtytwo_unsigned | Thirtytwo_signed | Single _ -> 4 - | Word_int | Word_val | Double -> 8 - | Onetwentyeight_unaligned | Onetwentyeight_aligned -> 16 - let ccatch (i, ids, e1, e2, dbg, kind, is_cold) = Ccatch(Nonrecursive, [i, ids, e2, dbg, is_cold], e1, kind) @@ -746,3 +738,8 @@ let equal_integer_comparison left right = false let caml_flambda2_invalid = "caml_flambda2_invalid" + +let is_val (m: machtype_component) = + match m with + | Val -> true + | Addr | Int | Float | Vec128 | Float32 -> false diff --git a/backend/cmm.mli b/backend/cmm.mli index aec2331ceae..cfb84c1ba65 100644 --- a/backend/cmm.mli +++ b/backend/cmm.mli @@ -391,8 +391,6 @@ type phrase = Cfunction of fundecl | Cdata of data_item list -val width_in_bytes : memory_chunk -> int - val ccatch : Lambda.static_label * (Backend_var.With_provenance.t * machtype) list * expression * expression * Debuginfo.t * kind_for_unboxing @@ -436,3 +434,4 @@ val equal_memory_chunk : memory_chunk -> memory_chunk -> bool val equal_integer_comparison : integer_comparison -> integer_comparison -> bool val caml_flambda2_invalid : string +val is_val : machtype_component -> bool diff --git a/backend/dune b/backend/dune index 138cb1f09e9..a066b4b4b72 100644 --- a/backend/dune +++ b/backend/dune @@ -15,7 +15,9 @@ (rule (targets arch.ml arch.mli cfg_selection.ml CSE.ml proc.ml regalloc_stack_operands.ml reload.ml selection.ml selection_utils.ml simd.ml simd_selection.ml simd_reload.ml simd_proc.ml - stack_check.ml) + stack_check.ml + vectorize_specific.ml + ) (mode fallback) (deps (glob_files amd64/*.ml) (glob_files amd64/*.mli) diff --git a/backend/operation.mli b/backend/operation.mli index 1afbedd9e37..fe086e7d225 100644 --- a/backend/operation.mli +++ b/backend/operation.mli @@ -30,7 +30,6 @@ (* CR-soon xclerc for xclerc: consider whether `Simple_operation` and `Operation` should be merged into a single module. *) - type t = | Move | Spill diff --git a/backend/vectorize_utils.ml b/backend/vectorize_utils.ml new file mode 100644 index 00000000000..f119306bbe8 --- /dev/null +++ b/backend/vectorize_utils.ml @@ -0,0 +1,97 @@ +open Arch + +module Width_in_bits = struct + type t = + | W8 + | W16 + | W32 + | W64 + | W128 + + let of_memory_chunk (c : Cmm.memory_chunk) = + match c with + | Byte_unsigned | Byte_signed -> W8 + | Sixteen_unsigned | Sixteen_signed -> W16 + | Thirtytwo_unsigned | Thirtytwo_signed | Single _ -> W32 + | Word_int | Word_val | Double -> W64 + | Onetwentyeight_unaligned | Onetwentyeight_aligned -> W128 + + let of_atomic_bitwidth (b : Cmm.atomic_bitwidth) = + match b with Thirtytwo -> W32 | Sixtyfour -> W64 | Word -> W64 + + let to_int t = + match t with W128 -> 128 | W64 -> 64 | W32 -> 32 | W16 -> 16 | W8 -> 8 + + let equal t1 t2 = + match t1, t2 with + | W128, W128 -> true + | W64, W64 -> true + | W32, W32 -> true + | W16, W16 -> true + | W8, W8 -> true + | (W128 | W64 | W32 | W16 | W8), _ -> false + + let print ppf t = Format.fprintf ppf "%d" (to_int t) +end + +module Memory_access = struct + module Init_or_assign = struct + type t = + | Initialization + | Assignment + end + + type desc = + | Alloc + | Arbitrary + | Read of + { width_in_bits : Width_in_bits.t; + addressing_mode : addressing_mode; + is_mutable : bool; + is_atomic : bool + } + | Write of + { width_in_bits : Width_in_bits.t; + addressing_mode : addressing_mode; + init_or_assign : Init_or_assign.t + } + | Read_and_write of + { width_in_bits : Width_in_bits.t; + addressing_mode : addressing_mode; + is_atomic : bool + } + + type t = + { desc : desc; + first_memory_arg_index : int + } + + let create ?(first_memory_arg_index = 0) desc = + { desc; first_memory_arg_index } + + let desc t = t.desc + + let first_memory_arg_index t = t.first_memory_arg_index +end + +module Vectorized_instruction = struct + type register = + | New of int + | Argument of int + | Result of int + | Original of int + + type t = + { operation : Operation.t; + arguments : register array; + results : register array + } + + let print ppf t = Format.fprintf ppf "%a " Cfg.dump_basic (Cfg.Op t.operation) + + let make_default ~arg_count ~res_count operation : t = + { operation; + arguments = Array.init arg_count (fun i -> Argument i); + results = Array.init res_count (fun i -> Result i) + } +end diff --git a/backend/vectorize_utils.mli b/backend/vectorize_utils.mli new file mode 100644 index 00000000000..43e6961f35a --- /dev/null +++ b/backend/vectorize_utils.mli @@ -0,0 +1,83 @@ +open Arch + +module Width_in_bits : sig + type t = + | W8 + | W16 + | W32 + | W64 + | W128 + + val of_memory_chunk : Cmm.memory_chunk -> t + + val of_atomic_bitwidth : Cmm.atomic_bitwidth -> t + + val to_int : t -> int + + val print : Format.formatter -> t -> unit + + val equal : t -> t -> bool +end + +module Memory_access : sig + module Init_or_assign : sig + type t = + | Initialization + | Assignment + end + + type desc = + | Alloc + | Arbitrary + | Read of + { width_in_bits : Width_in_bits.t; + addressing_mode : addressing_mode; + is_mutable : bool; + is_atomic : bool + } + | Write of + { width_in_bits : Width_in_bits.t; + addressing_mode : addressing_mode; + init_or_assign : Init_or_assign.t + } + | Read_and_write of + { width_in_bits : Width_in_bits.t; + addressing_mode : addressing_mode; + is_atomic : bool + } + + type t + + val create : ?first_memory_arg_index:int -> desc -> t + + val desc : t -> desc + + val first_memory_arg_index : t -> int +end + +module Vectorized_instruction : sig + (** Registers used in vectorized instructions of one scalar instruction + group. *) + type register = + | New of int + (** The n-th new temporary register used in the vectorized instructions *) + | Argument of int + (** Vector version of the n-th argument's register of the scalar + instruction *) + | Result of int + (** Vector version of the n-th result's register of the scalar instruction *) + | Original of int + (** Keep the original instruction in the n-th argument/result (depending on whether + it is used in the argument or result of the vectorized instructions) of the + scalar instruction*) + + type t = + { operation : Operation.t; + arguments : register array; + results : register array + } + + val print : Format.formatter -> t -> unit + + val make_default : arg_count:int -> res_count:int -> Operation.t -> t +end diff --git a/driver/flambda_backend_args.ml b/driver/flambda_backend_args.ml index ef92c551d2e..44ade683f79 100644 --- a/driver/flambda_backend_args.ml +++ b/driver/flambda_backend_args.ml @@ -60,6 +60,11 @@ let mk_vectorize f = let mk_no_vectorize f = "-no-vectorize", Arg.Unit f, " Disable vectorizer (EXPERIMENTAL)" +let mk_vectorize_max_block_size f = + "-vectorize-max-block-size", Arg.Int f, + Printf.sprintf " Only CFG block with at most n IR instructions will be vectorized \ + (default %d)" Flambda_backend_flags.default_vectorize_max_block_size + let mk_dvectorize f = "-dvectorize", Arg.Unit f, " (undocumented)" ;; @@ -710,6 +715,7 @@ module type Flambda_backend_options = sig val vectorize : unit -> unit val no_vectorize : unit -> unit + val vectorize_max_block_size : int -> unit val dvectorize : unit -> unit val cfg_selection : unit -> unit @@ -847,6 +853,7 @@ struct mk_vectorize F.vectorize; mk_no_vectorize F.no_vectorize; + mk_vectorize_max_block_size F.vectorize_max_block_size; mk_dvectorize F.dvectorize; mk_cfg_selection F.cfg_selection; @@ -1015,6 +1022,8 @@ module Flambda_backend_options_impl = struct let vectorize = set' Flambda_backend_flags.vectorize let no_vectorize = clear' Flambda_backend_flags.vectorize + let vectorize_max_block_size n = + Flambda_backend_flags.vectorize_max_block_size := n let dvectorize = set' Flambda_backend_flags.dump_vectorize let cfg_selection = set' Flambda_backend_flags.cfg_selection @@ -1356,6 +1365,7 @@ module Extra_params = struct | "regalloc-param" -> add_string Flambda_backend_flags.regalloc_params | "regalloc-validate" -> set' Flambda_backend_flags.regalloc_validate | "vectorize" -> set' Flambda_backend_flags.vectorize + | "vectorize-max-block-size" -> set_int' Flambda_backend_flags.vectorize_max_block_size | "cfg-selection" -> set' Flambda_backend_flags.cfg_selection | "cfg-peephole-optimize" -> set' Flambda_backend_flags.cfg_peephole_optimize | "cfg-cse-optimize" -> set' Flambda_backend_flags.cfg_cse_optimize diff --git a/driver/flambda_backend_args.mli b/driver/flambda_backend_args.mli index 0cf27be21ed..6454287a392 100644 --- a/driver/flambda_backend_args.mli +++ b/driver/flambda_backend_args.mli @@ -36,6 +36,7 @@ module type Flambda_backend_options = sig val vectorize : unit -> unit val no_vectorize : unit -> unit + val vectorize_max_block_size : int -> unit val dvectorize : unit -> unit val cfg_selection : unit -> unit diff --git a/driver/flambda_backend_flags.ml b/driver/flambda_backend_flags.ml index 91ad2b4cdf9..5c371cc9d4b 100644 --- a/driver/flambda_backend_flags.ml +++ b/driver/flambda_backend_flags.ml @@ -24,6 +24,10 @@ let regalloc_validate = ref true (* -[no-]regalloc-validate *) let vectorize = ref false (* -[no-]vectorize *) let dump_vectorize = ref false (* -dvectorize *) +let default_vectorize_max_block_size = 100 +let vectorize_max_block_size = + ref default_vectorize_max_block_size (* -vectorize-max-block-size *) + let cfg_selection = ref false (* -[no-]cfg-selection *) let cfg_peephole_optimize = ref true (* -[no-]cfg-peephole-optimize *) diff --git a/driver/flambda_backend_flags.mli b/driver/flambda_backend_flags.mli index 486751cf7b3..a2b082dd77b 100644 --- a/driver/flambda_backend_flags.mli +++ b/driver/flambda_backend_flags.mli @@ -24,6 +24,8 @@ val regalloc_validate : bool ref val vectorize : bool ref val dump_vectorize : bool ref +val default_vectorize_max_block_size : int +val vectorize_max_block_size : int ref val cfg_selection : bool ref diff --git a/dune b/dune index 11fd6826617..b72e46d606b 100644 --- a/dune +++ b/dune @@ -527,6 +527,8 @@ linear_utils simplify_terminator vectorize + vectorize_utils + vectorize_specific ;; backend/regalloc regalloc_gi regalloc_gi_state diff --git a/flambda-backend/tests/backend/vectorizer/.ocamlformat b/flambda-backend/tests/backend/vectorizer/.ocamlformat new file mode 100644 index 00000000000..0262494e37d --- /dev/null +++ b/flambda-backend/tests/backend/vectorizer/.ocamlformat @@ -0,0 +1,18 @@ +# Please make a pull request to change this file. +disable=false +# There is an .ocamlformat-enable file in this directory. +# Keep the remainder of this file in sync with other .ocamlformat files in this repo. +assignment-operator=begin-line +cases-exp-indent=2 +doc-comments=before +dock-collection-brackets=false +exp-grouping=preserve +if-then-else=keyword-first +module-item-spacing=sparse +parens-tuple=multi-line-only +sequence-blank-line=compact +space-around-lists=false +space-around-variants=false +type-decl=sparse +wrap-comments=true +version=0.24.1 diff --git a/flambda-backend/tests/backend/vectorizer/dune b/flambda-backend/tests/backend/vectorizer/dune index c43d921d279..b20daea1c66 100644 --- a/flambda-backend/tests/backend/vectorizer/dune +++ b/flambda-backend/tests/backend/vectorizer/dune @@ -1,303 +1,9 @@ -; Test1 +(include dune.inc) -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets test1_runner.exe) - (deps test1.ml) - (action - (run - %{bin:ocamlopt.opt} - %{deps} - -o - test1_runner.exe - -S - -O3 - -regalloc - cfg - -regalloc-param - IRC_SPILLING_HEURISTICS:flat-uses - -regalloc-param - SPLIT_LIVE_RANGES:on - -regalloc-param - STACK_SLOTS_THRESHOLD:3072 - -no-vectorize))) - -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets test1_runner_simd.exe) - (deps test1_simd.ml stubs.c) - (action - (run - %{bin:ocamlopt.opt} - %{deps} - -o - test1_runner_simd.exe - -S - -O3 - -regalloc - cfg - -regalloc-param - IRC_SPILLING_HEURISTICS:flat-uses - -regalloc-param - SPLIT_LIVE_RANGES:on - -regalloc-param - STACK_SLOTS_THRESHOLD:3072 - -dump-into-file - -dcfg - -no-vectorize - -extension - simd))) - -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets test1_vectorized.ml) - (deps test1.ml) - (action - (run cp test1.ml test1_vectorized.ml))) - -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets test1_runner_vectorized.exe test1_vectorized.cmx.dump) - (deps test1_vectorized.ml) - (action - (run - %{bin:ocamlopt.opt} - test1_vectorized.ml - -o - test1_runner_vectorized.exe - -S - -O3 - -regalloc - cfg - -regalloc-param - IRC_SPILLING_HEURISTICS:flat-uses - -regalloc-param - SPLIT_LIVE_RANGES:on - -regalloc-param - STACK_SLOTS_THRESHOLD:3072 - -dump-into-file - -dcfg - -dvectorize - -vectorize))) - -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (deps filter.sh test1_vectorized.cmx.dump) - (action - (with-outputs-to - grep_test1_vectorizer_cmx.out - (run ./filter.sh test1_vectorized.cmx.dump)))) - -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (action - (with-outputs-to - test1.out - (run ./test1_runner.exe)))) - -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (action - (with-outputs-to - test1_simd.out - (run ./test1_runner_simd.exe)))) - -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (action - (with-outputs-to - test1_vectorized.out - (run ./test1_runner_vectorized.exe)))) - -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (= %{architecture} "amd64") - (<> %{system} macosx))) - (action - (progn - (diff grep_test1_vectorizer_cmx.out grep_vectorizer_cmx.expected) - (diff test1.out test1.expected) - (diff test1_simd.out test1.expected) - (diff test1_vectorized.out test1.expected)))) - -; Examples - -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets examples_runner.exe) - (deps examples.ml) - (action - (run - %{bin:ocamlopt.opt} - %{deps} - -o - examples_runner.exe - -S - -O3 - -regalloc - cfg - -regalloc-param - IRC_SPILLING_HEURISTICS:flat-uses - -regalloc-param - SPLIT_LIVE_RANGES:on - -regalloc-param - STACK_SLOTS_THRESHOLD:3072 - -no-vectorize))) - -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets examples_runner_unrolled.exe) - (deps examples_unrolled.ml stubs.c) - (action - (run - %{bin:ocamlopt.opt} - %{deps} - -o - examples_runner_unrolled.exe - -S - -O3 - -regalloc - cfg - -regalloc-param - IRC_SPILLING_HEURISTICS:flat-uses - -regalloc-param - SPLIT_LIVE_RANGES:on - -regalloc-param - STACK_SLOTS_THRESHOLD:3072 - -dump-into-file - -dcfg - -no-vectorize))) - -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets examples_vectorized.ml) - (deps examples.ml) - (action - (run cp examples.ml examples_vectorized.ml))) +(rule (with-stdout-to dune.inc.gen (run ./gen/gen_dune.exe))) (rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (targets examples_runner_vectorized.exe examples_vectorized.cmx.dump) - (deps examples_vectorized.ml) - (action - (run - %{bin:ocamlopt.opt} - examples_vectorized.ml - -o - examples_runner_vectorized.exe - -S - -O3 - -regalloc - cfg - -regalloc-param - IRC_SPILLING_HEURISTICS:flat-uses - -regalloc-param - SPLIT_LIVE_RANGES:on - -regalloc-param - STACK_SLOTS_THRESHOLD:3072 - -dump-into-file - -dcfg - -dvectorize - -vectorize))) + (alias runtest) + (action (diff dune.inc dune.inc.gen))) -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (deps filter.sh examples_vectorized.cmx.dump) - (action - (with-outputs-to - grep_examples_vectorizer_cmx.out - (run ./filter.sh examples_vectorized.cmx.dump)))) - -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (action - (with-outputs-to - examples.out - (run ./examples_runner.exe)))) - -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (action - (with-outputs-to - examples_unrolled.out - (run ./examples_runner_unrolled.exe)))) - -(rule - (enabled_if - (and - (= %{context_name} "main") - (<> %{system} macosx))) - (action - (with-outputs-to - examples_vectorized.out - (run ./examples_runner_vectorized.exe)))) -(rule - (alias runtest) - (enabled_if - (and - (= %{context_name} "main") - (= %{architecture} "amd64") - (<> %{system} macosx))) - (action - (progn - (diff grep_examples_vectorizer_cmx.out grep_vectorizer_cmx.expected) - (diff examples.out examples.expected) - (diff examples_unrolled.out examples.expected) - (diff examples_vectorized.out examples.expected)))) diff --git a/flambda-backend/tests/backend/vectorizer/dune.inc b/flambda-backend/tests/backend/vectorizer/dune.inc new file mode 100644 index 00000000000..64efbe74dc1 --- /dev/null +++ b/flambda-backend/tests/backend/vectorizer/dune.inc @@ -0,0 +1,75 @@ + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (targets test1_runner.exe test1.cmx.dump) + (deps test1.mli test1.ml) + (action (run %{bin:ocamlopt.opt} %{deps} -S -O3 -g -dump-into-file -dcfg -dvectorize -dsel -dlinear -dlive -regalloc cfg -extension simd -no-vectorize -o test1_runner.exe))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (action + (with-outputs-to + test1.output + (run ./test1_runner.exe)))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (action + (diff test1.expected test1.output))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (action + (copy test1.ml test1_vectorized.ml))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (action + (copy test1.mli test1_vectorized.mli))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (targets test1_vectorized_runner.exe test1_vectorized.cmx.dump) + (deps test1_vectorized.mli test1_vectorized.ml) + (action (run %{bin:ocamlopt.opt} %{deps} -S -O3 -g -dump-into-file -dcfg -dvectorize -dsel -dlinear -dlive -regalloc cfg -extension simd -vectorize -o test1_vectorized_runner.exe))) + +(rule + (enabled_if (and (= %{context_name} "main") (= %{architecture} "amd64")) ) + (target test1_vectorized.cmx.dump.output) + (deps ./filter.sh test1_vectorized.cmx.dump) + (action + (with-outputs-to + %{target} + (run %{deps})))) + +(rule + (alias runtest) + (enabled_if (and (= %{context_name} "main") (= %{architecture} "amd64")) ) + (action + (diff test1_vectorized.cmx.dump.expected test1_vectorized.cmx.dump.output))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (action + (with-outputs-to + test1_vectorized.output + (run ./test1_vectorized_runner.exe)))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (action + (copy test1.expected test1_vectorized.expected))) + +(rule + (alias runtest) + (enabled_if (= %{context_name} "main")) + (action + (diff test1_vectorized.expected test1_vectorized.output))) diff --git a/flambda-backend/tests/backend/vectorizer/examples.ml b/flambda-backend/tests/backend/vectorizer/examples.ml deleted file mode 100644 index 213b7fa12d2..00000000000 --- a/flambda-backend/tests/backend/vectorizer/examples.ml +++ /dev/null @@ -1,9 +0,0 @@ -let big_n = 1024 -let a = Array.make (big_n * 2) 0 -let b = Array.make (big_n * 2) 0 -let c = Array.make (big_n * 2) 0 - -let example1 () = - for i = 0 to 255 do - Array.set a i (Array.get b i + Array.get c i) - done diff --git a/flambda-backend/tests/backend/vectorizer/examples_unrolled.ml b/flambda-backend/tests/backend/vectorizer/examples_unrolled.ml deleted file mode 100644 index e2c6d3a3aa8..00000000000 --- a/flambda-backend/tests/backend/vectorizer/examples_unrolled.ml +++ /dev/null @@ -1,10 +0,0 @@ -let big_n = 1024 -let a = Array.make (big_n * 2) 0 -let b = Array.make (big_n * 2) 0 -let c = Array.make (big_n * 2) 0 - -let example1 () = - for i = 0 to 127 do - Array.set a (i * 2) (Array.get b (i * 2) + Array.get c (i * 2)); - Array.set a (i * 2 + 1) (Array.get b (i * 2 + 1) + Array.get c (i * 2 + 1)) - done diff --git a/flambda-backend/tests/backend/vectorizer/filter.sh b/flambda-backend/tests/backend/vectorizer/filter.sh index 5b400eb2b9c..8ca64e193c1 100755 --- a/flambda-backend/tests/backend/vectorizer/filter.sh +++ b/flambda-backend/tests/backend/vectorizer/filter.sh @@ -1,3 +1,3 @@ #!/bin/bash -grep "*** Vectorization" $1 --line-regexp --max-count=1 +grep "\*\*\*\* Vectorize selected computation:" $1 diff --git a/flambda-backend/tests/backend/vectorizer/gen/dune b/flambda-backend/tests/backend/vectorizer/gen/dune new file mode 100644 index 00000000000..aaa783a5e0c --- /dev/null +++ b/flambda-backend/tests/backend/vectorizer/gen/dune @@ -0,0 +1,2 @@ +(executable + (name gen_dune)) diff --git a/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml b/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml new file mode 100644 index 00000000000..54784707a27 --- /dev/null +++ b/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml @@ -0,0 +1,168 @@ +let enabled_if_main = {|(enabled_if (= %{context_name} "main"))|} + +let enabled_if_main_amd64 = + {|(enabled_if (and (= %{context_name} "main") (= %{architecture} "amd64")) )|} + +let flags = + "-S -O3 -g -dump-into-file -dcfg -dvectorize -dsel -dlinear -dlive -regalloc \ + cfg -extension simd" + +let runner name = name ^ "_runner.exe" + +let output name = name ^ ".output" + +let expected name = name ^ ".expected" + +let impl name = name ^ ".ml" + +let intf name = name ^ ".mli" + +let cmx_dump name = name ^ ".cmx.dump" + +let vectorized name = name ^ "_vectorized" + +let buf = Buffer.create 1000 + +let rule ~subst template = + Buffer.add_substitute buf subst template; + Buffer.output_buffer Out_channel.stdout buf; + Buffer.clear buf + +let compile ~enabled_if ~extra_flags name = + let subst = function + | "enabled_if" -> enabled_if + | "flags" -> flags + | "extra_flags" -> extra_flags + | "runner" -> runner name + | "deps" -> String.concat " " [intf name; impl name] + | "cmx_dump" -> cmx_dump name + | _ -> assert false + in + rule ~subst + {| +(rule + (alias runtest) + ${enabled_if} + (targets ${runner} ${cmx_dump}) + (deps ${deps}) + (action (run %{bin:ocamlopt.opt} %{deps} ${flags} ${extra_flags} -o ${runner}))) +|} + +let run ~enabled_if name = + let subst = function + | "enabled_if" -> enabled_if + | "runner" -> runner name + | "output" -> output name + | _ -> assert false + in + rule ~subst + {| +(rule + (alias runtest) + ${enabled_if} + (action + (with-outputs-to + ${output} + (run ./${runner})))) +|} + +let diff_output ~enabled_if name = + let subst = function + | "enabled_if" -> enabled_if + | "output" -> output name + | "expected" -> expected name + | _ -> assert false + in + rule ~subst + {| +(rule + (alias runtest) + ${enabled_if} + (action + (diff ${expected} ${output}))) +|} + +let copy_file ~enabled_if name new_name = + let subst = function + | "enabled_if" -> enabled_if + | "source" -> name + | "target" -> new_name + | _ -> assert false + in + rule ~subst + {| +(rule + (alias runtest) + ${enabled_if} + (action + (copy ${source} ${target}))) +|} + +let filter_dump ~enabled_if name = + let subst = function + | "enabled_if" -> enabled_if + | "dump" -> name |> cmx_dump + | "filtered" -> name |> cmx_dump |> output + | _ -> assert false + in + rule ~subst + {| +(rule + ${enabled_if} + (target ${filtered}) + (deps ./filter.sh ${dump}) + (action + (with-outputs-to + %{target} + (run %{deps})))) +|} + +let copy_source_to_vectorize name = + copy_file ~enabled_if:enabled_if_main (name |> impl) + (name |> vectorized |> impl); + copy_file ~enabled_if:enabled_if_main (name |> intf) + (name |> vectorized |> intf) + +let compile_no_vectorizer name = + compile ~enabled_if:enabled_if_main ~extra_flags:"-no-vectorize" name + +let compile_with_vectorizer name = + compile ~enabled_if:enabled_if_main ~extra_flags:"-vectorize" + (vectorized name) + +let filter_vectorizer_dump ~enabled_if name = + filter_dump ~enabled_if (name |> vectorized) + +let diff_vectorizer_dump ~enabled_if name = + diff_output ~enabled_if (name |> vectorized |> cmx_dump) + +let run_no_vectorizer name = run ~enabled_if:enabled_if_main name + +let run_vectorized name = run ~enabled_if:enabled_if_main (name |> vectorized) + +let diff_output_no_vectorizer name = + diff_output ~enabled_if:enabled_if_main name + +let diff_output_vectorized name = + diff_output ~enabled_if:enabled_if_main (name |> vectorized) + +let copy_expected_output name = + copy_file ~enabled_if:enabled_if_main (name |> expected) + (name |> vectorized |> expected) + +let print_test name = + (* check expected test output is up to date *) + compile_no_vectorizer name; + run_no_vectorizer name; + diff_output_no_vectorizer name; + (* vectorizer *) + copy_source_to_vectorize name; + compile_with_vectorizer name; + filter_vectorizer_dump name ~enabled_if:enabled_if_main_amd64; + diff_vectorizer_dump name ~enabled_if:enabled_if_main_amd64; + run_vectorized name; + copy_expected_output name; + diff_output_vectorized name; + () + +let () = print_test "test1" diff --git a/flambda-backend/tests/backend/vectorizer/grep_vectorizer_cmx.expected b/flambda-backend/tests/backend/vectorizer/grep_vectorizer_cmx.expected deleted file mode 100644 index aaa82db1e7e..00000000000 --- a/flambda-backend/tests/backend/vectorizer/grep_vectorizer_cmx.expected +++ /dev/null @@ -1 +0,0 @@ -*** Vectorization diff --git a/flambda-backend/tests/backend/vectorizer/test1.expected b/flambda-backend/tests/backend/vectorizer/test1.expected index 0efd67f9502..4b1e5a753fe 100644 --- a/flambda-backend/tests/backend/vectorizer/test1.expected +++ b/flambda-backend/tests/backend/vectorizer/test1.expected @@ -1 +1,11 @@ -2 4 +add_pairs_immutable_record { d0 = 88 ; d1 = 110 } +add_fours_immutable_record { d0 = 34 ; d1 = 97; d2 = 88 ; d3 = 146 } +add_int_tuples (52, 102) +add_t2_to_t4 { d0 = 88 ; d1 = 110; d2 = 88 ; d3 = 110 } +add_t2_to_t4_reordered { d0 = 88 ; d1 = 110; d2 = 110 ; d3 = 88 } +copy_t2_to_t4_immutable_record { d0 = 8 ; d1 = 96; d2 = 8 ; d3 = 96 } +same_value_in_both_fields_immutable_record { d0 = 104 ; d1 = 104 } +copy_pairs_mutable_record { f0 = 42 ; f1 = 27 } +copy_pairs_mutable_record_return { f0 = 42 ; f1 = 27 } +copy_fours_mutable_record { f0 = 1 ; f1 = 2; f2 = 3 ; f3 = -4 } +add_fours_mutable_record { f0 = 43 ; f1 = 29; f2 = 426 ; f3 = 235 } diff --git a/flambda-backend/tests/backend/vectorizer/test1.ml b/flambda-backend/tests/backend/vectorizer/test1.ml index e6d41a69f52..0f7a77fdd1e 100644 --- a/flambda-backend/tests/backend/vectorizer/test1.ml +++ b/flambda-backend/tests/backend/vectorizer/test1.ml @@ -1,9 +1,143 @@ +[@@@ocaml.warnerror "+a-40-41-42"] -let add_pairs (a0, a1 : int64 * int64) (b0, b1 : int64 * int64) = - (Int64.add a0 b0, Int64.add a1 b1) -;; +(* Record with immutable int fields *) + +type t2 = + { d0 : int; + d1 : int + } + +let[@inline never] [@local never] add_pairs_immutable_record (a : t2) (b : t2) : + t2 = + { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1 } + +type t4 = + { d0 : int; + d1 : int; + d2 : int; + d3 : int + } + +let[@inline never] [@local never] add_fours_immutable_record (a : t4) (b : t4) : + t4 = + { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1; d2 = a.d2 + b.d2; d3 = a.d3 + b.d3 } + +(* Tuples *) + +let[@inline never] [@local never] add_int_tuples ((a0, a1) : int * int) + ((b0, b1) : int * int) = + a0 + b0, a1 + b1 + +let[@inline never] [@local never] add_t2_to_t4 (a : t2) (b : t2) : t4 = + { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1; d2 = a.d0 + b.d0; d3 = a.d1 + b.d1 } + +(* CR gyorsh: can't vectorize, requires a shuffle because the order of the first + vector access is not the same as the second. *) +let[@inline never] [@local never] add_t2_to_t4_reordered (a : t2) (b : t2) : t4 + = + { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1; d3 = a.d0 + b.d0; d2 = a.d1 + b.d1 } + +let[@inline never] [@local never] copy_t2_to_t4_immutable_record (a : t2) : t4 = + { d0 = a.d0; d1 = a.d1; d2 = a.d0; d3 = a.d1 } + +(* CR gyorsh: can't vectorize same load. *) +let[@inline never] [@local never] same_value_in_both_fields_immutable_record + (a : t2) : t2 = + let x = a.d0 in + let y = a.d1 in + let z = x + y in + { d0 = z; d1 = z } + +type s2 = + { mutable f0 : int; + mutable f1 : int + } + +let[@inline never] [@local never] copy_pairs_mutable_record (a : s2) (b : s2) : + unit = + b.f0 <- a.f0; + b.f1 <- a.f1; + () + +(* CR gyorsh: dependency outside computation should only look at reg not mem *) +let[@inline never] [@local never] copy_pairs_mutable_record_return (a : s2) + (b : s2) : s2 = + b.f0 <- a.f0; + b.f1 <- a.f1; + b + +type s4 = + { mutable f0 : int; + mutable f1 : int; + mutable f2 : int; + mutable f3 : int + } + +let[@inline never] [@local never] copy_fours_mutable_record (a : s4) (b : s4) : + unit = + a.f0 <- b.f0; + a.f1 <- b.f1; + a.f2 <- b.f2; + a.f3 <- b.f3; + () + +let[@inline never] [@local never] add_fours_mutable_record (a : s4) (b : s4) : + unit = + a.f0 <- b.f0 + a.f0; + a.f1 <- b.f1 + a.f1; + a.f2 <- b.f2 + a.f2; + a.f3 <- b.f3 + a.f3; + () + +(* CR-someday gyorsh: use expect test *) + +let print_t2 ppf (t2 : t2) = + Format.fprintf ppf "{ d0 = %d ; d1 = %d }" t2.d0 t2.d1 + +let print_t4 ppf (t4 : t4) = + Format.fprintf ppf "{ d0 = %d ; d1 = %d; d2 = %d ; d3 = %d }" t4.d0 t4.d1 + t4.d2 t4.d3 + +let print_s2 ppf (s2 : s2) = + Format.fprintf ppf "{ f0 = %d ; f1 = %d }" s2.f0 s2.f1 + +let print_s4 ppf (s4 : s4) = + Format.fprintf ppf "{ f0 = %d ; f1 = %d; f2 = %d ; f3 = %d }" s4.f0 s4.f1 + s4.f2 s4.f3 + +let print_pair ppf (x, y) = Format.fprintf ppf "(%d, %d)" x y let () = - let sum0, sum1 = add_pairs (0L, 1L) (2L, 3L) in - Printf.printf "%Lx %Lx\n" sum0 sum1 -;; + Format.printf "add_pairs_immutable_record %a\n" print_t2 + (add_pairs_immutable_record { d0 = 8; d1 = 96 } { d0 = 80; d1 = 14 }); + Format.printf "add_fours_immutable_record %a\n" print_t4 + (add_fours_immutable_record + { d0 = 9; d1 = 12; d2 = 16; d3 = 98 } + { d0 = 25; d1 = 85; d2 = 72; d3 = 48 }); + Format.printf "add_int_tuples %a\n" print_pair + (add_int_tuples (48, 31) (4, 71)); + Format.printf "add_t2_to_t4 %a\n" print_t4 + (add_t2_to_t4 { d0 = 8; d1 = 96 } { d0 = 80; d1 = 14 }); + Format.printf "add_t2_to_t4_reordered %a\n" print_t4 + (add_t2_to_t4_reordered { d0 = 8; d1 = 96 } { d0 = 80; d1 = 14 }); + Format.printf "copy_t2_to_t4_immutable_record %a\n" print_t4 + (copy_t2_to_t4_immutable_record { d0 = 8; d1 = 96 }); + Format.printf "same_value_in_both_fields_immutable_record %a\n" print_t2 + (same_value_in_both_fields_immutable_record { d0 = 8; d1 = 96 }); + Format.printf "copy_pairs_mutable_record %a\n" print_s2 + (let s2 = { f0 = 42; f1 = 27 } in + let s2' = { f0 = 1; f1 = -1 } in + copy_pairs_mutable_record s2 s2'; + s2); + Format.printf "copy_pairs_mutable_record_return %a\n" print_s2 + (copy_pairs_mutable_record_return { f0 = 42; f1 = 27 } { f0 = 0; f1 = -100 }); + Format.printf "copy_fours_mutable_record %a\n" print_s4 + (let s4 = { f0 = 42; f1 = 27; f2 = 423; f3 = 239 } in + let s4' = { f0 = 1; f1 = 2; f2 = 3; f3 = -4 } in + copy_fours_mutable_record s4 s4'; + s4); + Format.printf "add_fours_mutable_record %a\n" print_s4 + (let s4 = { f0 = 42; f1 = 27; f2 = 423; f3 = 239 } in + let s4' = { f0 = 1; f1 = 2; f2 = 3; f3 = -4 } in + add_fours_mutable_record s4 s4'; + s4) diff --git a/flambda-backend/tests/backend/vectorizer/test1.mli b/flambda-backend/tests/backend/vectorizer/test1.mli new file mode 100644 index 00000000000..5b909d90a8c --- /dev/null +++ b/flambda-backend/tests/backend/vectorizer/test1.mli @@ -0,0 +1 @@ +(* blank, make sure all the functions are called from top-level *) diff --git a/flambda-backend/tests/backend/vectorizer/test1_simd.ml b/flambda-backend/tests/backend/vectorizer/test1_simd.ml deleted file mode 100644 index 3ffad872fd4..00000000000 --- a/flambda-backend/tests/backend/vectorizer/test1_simd.ml +++ /dev/null @@ -1,60 +0,0 @@ -module Int64_u = struct - type t = int64# - - external to_int64 : t -> (int64[@local_opt]) = "%box_int64" [@@warning "-187"] - - external of_int64 : (int64[@local_opt]) -> t = "%unbox_int64" [@@warning "-187"] -end - -module Int64x2 = struct - type t = int64x2 - external add : t -> t -> t = "ocaml_simd_unreachable" "caml_sse2_int64x2_add" - [@@noalloc] [@@unboxed] [@@builtin] - - external low_of - : int64# - -> (t[@unboxed]) - = "ocaml_simd_unreachable" "caml_int64x2_low_of_int64" - [@@noalloc] [@@builtin] - - external low_to - : (t[@unboxed]) - -> int64# - = "ocaml_simd_unreachable" "caml_int64x2_low_to_int64" - [@@noalloc] [@@builtin] - - external high_64_to_low_64 - : onto:t - -> from:t - -> t - = "ocaml_simd_unreachable" "caml_sse_vec128_high_64_to_low_64" - [@@noalloc] [@@unboxed] [@@builtin] - - external low_64_to_high_64 - : onto:t - -> from:t - -> t - = "ocaml_simd_unreachable" "caml_sse_vec128_low_64_to_high_64" - [@@noalloc] [@@unboxed] [@@builtin] - - let[@inline always] set a b = - let a = low_of a in - let b = low_of b in - low_64_to_high_64 ~onto:a ~from:b - ;; - - let of_int64s a b = set (Int64_u.of_int64 a) (Int64_u.of_int64 b) - let low_int64 t = Int64_u.to_int64(low_to t) - let high_int64 t = Int64_u.to_int64(low_to (high_64_to_low_64 ~onto:t ~from:t)) -;; -end - -let add_pairs (a0, a1 : int64 * int64) (b0, b1 : int64 * int64) = - let sum_vector = (Int64x2.add (Int64x2.of_int64s a0 a1) (Int64x2.of_int64s b0 b1)) in - (Int64x2.low_int64 sum_vector, Int64x2.high_int64 sum_vector) -;; - -let () = - let sum0, sum1 = add_pairs (0L, 1L) (2L, 3L) in - Printf.printf "%Lx %Lx\n" sum0 sum1 -;; diff --git a/flambda-backend/tests/backend/vectorizer/test1_vectorized.cmx.dump.expected b/flambda-backend/tests/backend/vectorizer/test1_vectorized.cmx.dump.expected new file mode 100644 index 00000000000..bfbcfa3b662 --- /dev/null +++ b/flambda-backend/tests/backend/vectorizer/test1_vectorized.cmx.dump.expected @@ -0,0 +1,9 @@ +**** Vectorize selected computation: 4 groups, 8 scalar instructions, 7 vector instructions, cost = -1 (Test1_vectorized.add_pairs_immutable_record) +**** Vectorize selected computation: 8 groups, 16 scalar instructions, 14 vector instructions, cost = -2 (Test1_vectorized.add_fours_immutable_record) +**** Vectorize selected computation: 4 groups, 8 scalar instructions, 7 vector instructions, cost = -1 (Test1_vectorized.add_int_tuples) +**** Vectorize selected computation: 5 groups, 10 scalar instructions, 8 vector instructions, cost = -2 (Test1_vectorized.add_t2_to_t4) +**** Vectorize selected computation: 3 groups, 6 scalar instructions, 3 vector instructions, cost = -3 (Test1_vectorized.copy_t2_to_t4_immutable_record) +**** Vectorize selected computation: 2 groups, 4 scalar instructions, 2 vector instructions, cost = -2 (Test1_vectorized.copy_pairs_mutable_record) +**** Vectorize selected computation: 2 groups, 4 scalar instructions, 2 vector instructions, cost = -2 (Test1_vectorized.copy_pairs_mutable_record_return) +**** Vectorize selected computation: 4 groups, 8 scalar instructions, 4 vector instructions, cost = -4 (Test1_vectorized.copy_fours_mutable_record) +**** Vectorize selected computation: 8 groups, 16 scalar instructions, 14 vector instructions, cost = -2 (Test1_vectorized.add_fours_mutable_record) diff --git a/lambda/matching.ml b/lambda/matching.ml index 44beae72709..c794d044f51 100644 --- a/lambda/matching.ml +++ b/lambda/matching.ml @@ -104,18 +104,16 @@ exception Error of Location.t * error let dbg = false -let jkind_layout_default_to_value_and_check_not_void loc jkind = - let rec contains_void : Jkind.Layout.Const.t -> bool = function - | Any -> false +let sort_check_not_void loc sort = + let rec contains_void : Jkind.Sort.Const.t -> bool = function | Base Void -> true | Base (Value | Float64 | Float32 | Word | Bits8 | Bits16 | Bits32 | Bits64 | Vec128) -> false | Product [] -> - Misc.fatal_error "nil in jkind_layout_default_to_value_and_check_not_void" - | Product ts -> List.exists contains_void ts + Misc.fatal_error "nil in sort_check_not_void" + | Product ss -> List.exists contains_void ss in - let layout = Jkind.get_layout_defaulting_to_value jkind in - if contains_void layout then + if contains_void sort then raise (Error (loc, Void_layout)) ;; @@ -231,7 +229,7 @@ module Half_simple : sig type nonrec clause = pattern Non_empty_row.t clause val of_clause : - arg:lambda -> arg_sort:Jkind.sort -> General.clause -> clause + arg:lambda -> arg_sort:Jkind.Sort.Const.t -> General.clause -> clause end = struct include Patterns.Half_simple @@ -308,7 +306,7 @@ module Simple : sig val explode_or_pat : arg:lambda -> - arg_sort:Jkind.sort -> + arg_sort:Jkind.Sort.Const.t -> Half_simple.pattern -> mk_action:(vars:Ident.t list -> lambda) -> patbound_action_vars:Ident.t list -> @@ -1011,7 +1009,7 @@ end type 'row pattern_matching = { mutable cases : 'row list; - args : (lambda * let_kind * Jkind.sort * layout) list; + args : (lambda * let_kind * Jkind.Sort.Const.t * layout) list; (** args are not just Ident.t in at least the following cases: - when matching the arguments of a constructor, direct field projections are used (make_field_args) @@ -1822,7 +1820,7 @@ let make_line_matching get_expr_args head def = function } type 'a division = { - args : (lambda * let_kind * Jkind.sort * layout) list; + args : (lambda * let_kind * Jkind.Sort.Const.t * layout) list; cells : ('a * cell) list } @@ -1911,9 +1909,7 @@ let get_pat_args_constr p rem = match p with | { pat_desc = Tpat_construct (_, {cstr_args}, args, _) } -> List.iter2 - (fun { ca_jkind } arg -> - jkind_layout_default_to_value_and_check_not_void - arg.pat_loc ca_jkind) + (fun { ca_sort } arg -> sort_check_not_void arg.pat_loc ca_sort) cstr_args args; (* CR layouts v5: This sanity check will have to go (or be replaced with a void-specific check) when we have other non-value sorts *) @@ -1929,12 +1925,11 @@ let get_expr_args_constr ~scopes head (arg, _mut, sort, layout) rem = let loc = head_loc ~scopes head in (* CR layouts v5: This sanity check should be removed or changed to specifically check for void when we add other non-value sorts. *) - List.iter (fun { ca_jkind } -> - jkind_layout_default_to_value_and_check_not_void head.pat_loc ca_jkind) + List.iter (fun { ca_sort } -> sort_check_not_void head.pat_loc ca_sort) cstr.cstr_args; let ubr = Translmode.transl_unique_barrier (head.pat_unique_barrier) in let sem = add_barrier_to_read ubr Reads_agree in - let make_field_access binding_kind jkind ~field ~pos = + let make_field_access binding_kind sort ~field ~pos = let prim = match cstr.cstr_shape with | Constructor_uniform_value -> Pfield (pos, Pointer, sem) @@ -1956,7 +1951,6 @@ let get_expr_args_constr ~scopes head (arg, _mut, sort, layout) rem = let shape = Lambda.transl_mixed_product_shape shape in Pmixedfield (pos, read, shape, sem) in - let sort = Jkind.sort_of_jkind jkind in let layout = Typeopt.layout_of_sort head.pat_loc sort in (Lprim (prim, [ arg ], loc), binding_kind, sort, layout) in @@ -1967,15 +1961,15 @@ let get_expr_args_constr ~scopes head (arg, _mut, sort, layout) rem = match cstr.cstr_repr with | Variant_boxed _ -> List.mapi - (fun i { ca_jkind } -> - make_field_access str ca_jkind ~field:i ~pos:i) + (fun i { ca_sort } -> + make_field_access str ca_sort ~field:i ~pos:i) cstr.cstr_args @ rem | Variant_unboxed -> (arg, str, sort, layout) :: rem | Variant_extensible -> List.mapi - (fun i { ca_jkind } -> - make_field_access str ca_jkind ~field:i ~pos:(i+1)) + (fun i { ca_sort } -> + make_field_access str ca_sort ~field:i ~pos:(i+1)) cstr.cstr_args @ rem @@ -2001,7 +1995,7 @@ let get_expr_args_variant_nonconst ~scopes head (arg, _mut, _sort, _layout) let ubr = Translmode.transl_unique_barrier (head.pat_unique_barrier) in let field_prim = nonconstant_variant_field ubr 1 in let str = add_barrier_to_let_kind ubr Alias in - (Lprim (field_prim, [ arg ], loc), str, Jkind.Sort.for_variant_arg, + (Lprim (field_prim, [ arg ], loc), str, Jkind.Sort.Const.for_variant_arg, layout_variant_arg) :: rem @@ -2217,7 +2211,7 @@ let inline_lazy_force arg pos loc = let get_expr_args_lazy ~scopes head (arg, _mut, _sort, _layout) rem = let loc = head_loc ~scopes head in - (inline_lazy_force arg Rc_normal loc, Strict, Jkind.Sort.for_lazy_body, + (inline_lazy_force arg Rc_normal loc, Strict, Jkind.Sort.Const.for_lazy_body, layout_lazy_contents) :: rem let divide_lazy ~scopes head ctx pm = @@ -2252,7 +2246,7 @@ let get_expr_args_tuple ~scopes head (arg, _mut, _sort, _layout) rem = rem else (Lprim (Pfield (pos, Pointer, sem), [ arg ], loc), str, - Jkind.Sort.for_tuple_element, layout_tuple_element) + Jkind.Sort.Const.for_tuple_element, layout_tuple_element) :: make_args (pos + 1) in make_args 0 @@ -2262,6 +2256,7 @@ let get_expr_args_unboxed_tuple ~scopes shape head (arg, _mut, _sort, _layout) let loc = head_loc ~scopes head in let shape = List.map (fun (_, sort) -> + let sort = Jkind.Sort.default_for_transl_and_get sort in sort, (* CR layouts v7.1: consider whether more accurate [Lambda.layout]s here would make a difference for later optimizations. *) @@ -2296,7 +2291,7 @@ let record_matching_line num_fields lbl_pat_list = List.iter (fun (_, lbl, pat) -> (* CR layouts v5: This void sanity check can be removed when we add proper void support (or whenever we remove `lbl_pos_void`) *) - jkind_layout_default_to_value_and_check_not_void pat.pat_loc lbl.lbl_jkind; + sort_check_not_void pat.pat_loc lbl.lbl_sort; patv.(lbl.lbl_pos) <- pat) lbl_pat_list; Array.to_list patv @@ -2330,11 +2325,9 @@ let get_expr_args_record ~scopes head (arg, _mut, sort, layout) rem = rem else let lbl = all_labels.(pos) in - jkind_layout_default_to_value_and_check_not_void - head.pat_loc lbl.lbl_jkind; + sort_check_not_void head.pat_loc lbl.lbl_sort; let ptr = Typeopt.maybe_pointer_type head.pat_env lbl.lbl_arg in - let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in - let lbl_layout = Typeopt.layout_of_sort lbl.lbl_loc lbl_sort in + let lbl_layout = Typeopt.layout_of_sort lbl.lbl_loc lbl.lbl_sort in let sem = if Types.is_mutable lbl.lbl_mut then Reads_vary else Reads_agree in @@ -2345,21 +2338,21 @@ let get_expr_args_record ~scopes head (arg, _mut, sort, layout) rem = | Record_boxed _ | Record_inlined (_, Constructor_uniform_value, Variant_boxed _) -> Lprim (Pfield (lbl.lbl_pos, ptr, sem), [ arg ], loc), - lbl_sort, lbl_layout + lbl.lbl_sort, lbl_layout | Record_unboxed | Record_inlined (_, _, Variant_unboxed) -> arg, sort, layout | Record_float -> (* TODO: could optimise to Alloc_local sometimes *) Lprim (Pfloatfield (lbl.lbl_pos, sem, alloc_heap), [ arg ], loc), (* Here we are projecting a boxed float from a float record. *) - lbl_sort, lbl_layout + lbl.lbl_sort, lbl_layout | Record_ufloat -> Lprim (Pufloatfield (lbl.lbl_pos, sem), [ arg ], loc), (* Here we are projecting an unboxed float from a float record. *) - lbl_sort, lbl_layout + lbl.lbl_sort, lbl_layout | Record_inlined (_, Constructor_uniform_value, Variant_extensible) -> Lprim (Pfield (lbl.lbl_pos + 1, ptr, sem), [ arg ], loc), - lbl_sort, lbl_layout + lbl.lbl_sort, lbl_layout | Record_inlined (_, Constructor_mixed _, Variant_extensible) -> (* CR layouts v5.9: support this *) fatal_error @@ -2386,7 +2379,7 @@ let get_expr_args_record ~scopes head (arg, _mut, sort, layout) rem = { value_prefix_len; flat_suffix } in Lprim (Pmixedfield (lbl.lbl_pos, read, shape, sem), [ arg ], loc), - lbl_sort, lbl_layout + lbl.lbl_sort, lbl_layout in let str = if Types.is_mutable lbl.lbl_mut then StrictOpt else Alias in let str = add_barrier_to_let_kind ubr str in @@ -2408,7 +2401,7 @@ let get_expr_args_record_unboxed_product ~scopes head in let lbl_layouts = Array.map (fun lbl -> - Typeopt.layout_of_sort lbl.lbl_loc (Jkind.sort_of_jkind lbl.lbl_jkind) + Typeopt.layout_of_sort lbl.lbl_loc lbl.lbl_sort ) all_labels |> Array.to_list in @@ -2417,8 +2410,7 @@ let get_expr_args_record_unboxed_product ~scopes head rem else let lbl = all_labels.(pos) in - jkind_layout_default_to_value_and_check_not_void - head.pat_loc lbl.lbl_jkind; + sort_check_not_void head.pat_loc lbl.lbl_sort; let access = if Array.length all_labels = 1 then arg (* erase singleton unboxed records before lambda *) else @@ -2432,9 +2424,8 @@ let get_expr_args_record_unboxed_product ~scopes head else Alias in - let sort = Jkind.sort_of_jkind lbl.lbl_jkind in - let layout = Typeopt.layout_of_sort lbl.lbl_loc sort in - (access, str, sort, layout) :: make_args (pos + 1) + let layout = Typeopt.layout_of_sort lbl.lbl_loc lbl.lbl_sort in + (access, str, lbl.lbl_sort, layout) :: make_args (pos + 1) in make_args 0 @@ -2475,6 +2466,7 @@ let get_expr_args_array ~scopes kind head (arg, _mut, _sort, _layout) rem = | Array (am, arg_sort, len) -> am, arg_sort, len | _ -> assert false in + let arg_sort = Jkind.Sort.default_for_transl_and_get arg_sort in let loc = head_loc ~scopes head in let rec make_args pos = if pos >= len then @@ -3880,6 +3872,7 @@ and do_compile_matching ~scopes value_kind repr partial ctx pmh = (combine_constructor value_kind ploc arg ph.pat_env ph.pat_unique_barrier cstr partial) ctx pm | Array (_, elt_sort, _) -> + let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in let kind = Typeopt.array_pattern_kind pomega elt_sort in compile_test (compile_match ~scopes value_kind repr partial) @@ -4094,7 +4087,7 @@ let for_trywith ~scopes ~return_layout loc param pat_act_list = It is important to *not* include location information in the reraise (hence the [_noloc]) to avoid seeing this silent reraise in exception backtraces. *) - compile_matching ~scopes ~arg_sort:Jkind.Sort.for_predef_value + compile_matching ~scopes ~arg_sort:Jkind.Sort.Const.for_predef_value ~arg_layout:layout_block ~return_layout loc ~failer:(Reraise_noloc param) None param pat_act_list Partial @@ -4211,12 +4204,12 @@ let assign_pat ~scopes body_layout opt nraise catch_ids loc pat pat_sort lam = opt := true; List.fold_left2 (fun acc (_, pat) lam -> - collect Jkind.Sort.for_tuple_element acc pat lam) + collect Jkind.Sort.Const.for_tuple_element acc pat lam) acc patl lams | Tpat_tuple patl, Lconst (Const_block (_, scl)) -> opt := true; let collect_const acc (_, pat) sc = - collect Jkind.Sort.for_tuple_element acc pat (Lconst sc) + collect Jkind.Sort.Const.for_tuple_element acc pat (Lconst sc) in List.fold_left2 collect_const acc patl scl | _ -> @@ -4293,7 +4286,7 @@ let for_tupled_function ~scopes ~return_layout loc paraml pats_act_list partial (* The arguments of a tupled function are always values since they must be tuple elements *) let args = - List.map (fun id -> (Lvar id, Strict, Jkind.Sort.for_tuple_element, + List.map (fun id -> (Lvar id, Strict, Jkind.Sort.Const.for_tuple_element, layout_tuple_element)) paraml in @@ -4393,12 +4386,12 @@ let do_for_multiple_match ~scopes ~return_layout loc paraml mode pat_act_list pa let sloc = Scoped_location.of_location ~scopes loc in Lprim (Pmakeblock (0, Immutable, None, mode), param_lambda, sloc) in - let arg_sort = Jkind.Sort.for_tuple in + let arg_sort = Jkind.Sort.Const.for_tuple in let handler = let partial = check_partial pat_act_list partial in let rows = map_on_rows (fun p -> (p, [])) pat_act_list in toplevel_handler ~scopes ~return_layout loc ~failer:Raise_match_failure - partial [ (arg, Strict, Jkind.Sort.for_tuple, layout_block) ] rows in + partial [ (arg, Strict, Jkind.Sort.Const.for_tuple, layout_block) ] rows in handler (fun partial pm1 -> let pm1_half = { pm1 with diff --git a/lambda/matching.mli b/lambda/matching.mli index cd87113064c..c1694124f5f 100644 --- a/lambda/matching.mli +++ b/lambda/matching.mli @@ -22,7 +22,7 @@ open Debuginfo.Scoped_location (* Entry points to match compiler *) val for_function: scopes:scopes -> - arg_sort:Jkind.sort -> arg_layout:layout -> return_layout:layout -> + arg_sort:Jkind.Sort.Const.t -> arg_layout:layout -> return_layout:layout -> Location.t -> int ref option -> lambda -> (pattern * lambda) list -> partial -> lambda @@ -31,12 +31,12 @@ val for_trywith: lambda -> (pattern * lambda) list -> lambda val for_let: - scopes:scopes -> arg_sort:Jkind.sort -> return_layout:layout -> + scopes:scopes -> arg_sort:Jkind.Sort.Const.t -> return_layout:layout -> Location.t -> lambda -> pattern -> lambda -> lambda val for_multiple_match: scopes:scopes -> return_layout:layout -> Location.t -> - (lambda * Jkind.sort * layout) list -> locality_mode -> + (lambda * Jkind.Sort.Const.t * layout) list -> locality_mode -> (pattern * lambda) list -> partial -> lambda @@ -57,7 +57,7 @@ val for_tupled_function: *) val for_optional_arg_default: scopes:scopes -> Location.t -> pattern -> param:Ident.t -> - default_arg:lambda -> default_arg_sort:Jkind.sort -> + default_arg:lambda -> default_arg_sort:Jkind.Sort.Const.t -> return_layout:layout -> lambda -> lambda exception Cannot_flatten diff --git a/lambda/transl_array_comprehension.ml b/lambda/transl_array_comprehension.ml index 440ae5779c0..4f6183380aa 100644 --- a/lambda/transl_array_comprehension.ml +++ b/lambda/transl_array_comprehension.ml @@ -444,7 +444,7 @@ let iterator ~transl_exp ~scopes ~loc : | Texp_comp_range { ident; pattern = _; start; stop; direction } -> let bound name value = Let_binding.make (Immutable Strict) layout_int name - (transl_exp ~scopes Jkind.Sort.for_predef_value value) + (transl_exp ~scopes Jkind.Sort.Const.for_predef_value value) in let start = bound "start" start in let stop = bound "stop" stop in @@ -462,7 +462,7 @@ let iterator ~transl_exp ~scopes ~loc : | Texp_comp_in { pattern; sequence = iter_arr_exp } -> let iter_arr = Let_binding.make (Immutable Strict) layout_any_value "iter_arr" - (transl_exp ~scopes Jkind.Sort.for_predef_value iter_arr_exp) + (transl_exp ~scopes Jkind.Sort.Const.for_predef_value iter_arr_exp) in let iter_arr_kind = (* CR layouts v4: [~elt_sort:None] here is not ideal and @@ -493,7 +493,7 @@ let iterator ~transl_exp ~scopes ~loc : for_dir = Upto; for_body = Matching.for_let ~scopes - ~arg_sort:Jkind.Sort.for_array_comprehension_element + ~arg_sort:Jkind.Sort.Const.for_array_comprehension_element ~return_layout:layout_int pattern.pat_loc (Lprim ( Parrayrefu @@ -550,7 +550,7 @@ let clause ~transl_exp ~scopes ~loc = function | Texp_comp_when cond -> fun body -> Lifthenelse - ( transl_exp ~scopes Jkind.Sort.for_predef_value cond, + ( transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond, body, lambda_unit, layout_unit ) @@ -874,8 +874,8 @@ let comprehension ~transl_exp ~scopes ~loc ~(array_kind : Lambda.array_kind) (* CR layouts v4: Ensure that the [transl_exp] here can cope with non-values. *) ~body: - (transl_exp ~scopes Jkind.Sort.for_array_comprehension_element - comp_body)), + (transl_exp ~scopes + Jkind.Sort.Const.for_array_comprehension_element comp_body)), (* If it was dynamically grown, cut it down to size *) match array_sizing with | Fixed_size -> array.var diff --git a/lambda/transl_array_comprehension.mli b/lambda/transl_array_comprehension.mli index 2caeddc2f1d..690183c9fe6 100644 --- a/lambda/transl_array_comprehension.mli +++ b/lambda/transl_array_comprehension.mli @@ -22,7 +22,7 @@ open Debuginfo.Scoped_location so is parameterized by [Translcore.transl_exp], its [scopes] argument, and the [loc]ation. *) val comprehension : - transl_exp:(scopes:scopes -> Jkind.sort -> expression -> lambda) -> + transl_exp:(scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda) -> scopes:scopes -> loc:scoped_location -> array_kind:array_kind -> diff --git a/lambda/transl_list_comprehension.ml b/lambda/transl_list_comprehension.ml index 02d85b9d28c..8694c387c19 100644 --- a/lambda/transl_list_comprehension.ml +++ b/lambda/transl_list_comprehension.ml @@ -169,7 +169,7 @@ let iterator ~transl_exp ~scopes = function correct (i.e., left-to-right) order *) let transl_bound var bound = Let_binding.make (Immutable Strict) layout_int var - (transl_exp ~scopes Jkind.Sort.for_predef_value bound) + (transl_exp ~scopes Jkind.Sort.Const.for_predef_value bound) in let start = transl_bound "start" start in let stop = transl_bound "stop" stop in @@ -185,7 +185,7 @@ let iterator ~transl_exp ~scopes = function | Texp_comp_in { pattern; sequence } -> let iter_list = Let_binding.make (Immutable Strict) layout_any_value "iter_list" - (transl_exp ~scopes Jkind.Sort.for_predef_value sequence) + (transl_exp ~scopes Jkind.Sort.Const.for_predef_value sequence) in (* Create a fresh variable to use as the function argument *) let element = Ident.create_local "element" in @@ -194,10 +194,10 @@ let iterator ~transl_exp ~scopes = function element; element_kind = Typeopt.layout pattern.pat_env pattern.pat_loc - Jkind.Sort.for_list_element pattern.pat_type; + Jkind.Sort.Const.for_list_element pattern.pat_type; add_bindings = (* CR layouts: to change when we allow non-values in sequences *) - Matching.for_let ~scopes ~arg_sort:Jkind.Sort.for_list_element + Matching.for_let ~scopes ~arg_sort:Jkind.Sort.Const.for_list_element ~return_layout:layout_any_value pattern.pat_loc (Lvar element) pattern } @@ -287,7 +287,7 @@ let rec translate_clauses ~transl_exp ~scopes ~loc ~comprehension_body Let_binding.let_all arg_lets bindings | Texp_comp_when cond -> Lifthenelse - ( transl_exp ~scopes Jkind.Sort.for_predef_value cond, + ( transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond, body ~accumulator, accumulator, layout_any_value (* [list]s have the standard representation *) )) @@ -298,7 +298,7 @@ let comprehension ~transl_exp ~scopes ~loc { comp_body; comp_clauses } = translate_clauses ~transl_exp ~scopes ~loc ~comprehension_body:(fun ~accumulator -> rev_list_snoc_local ~loc ~init:accumulator - ~last:(transl_exp ~scopes Jkind.Sort.for_list_element comp_body)) + ~last:(transl_exp ~scopes Jkind.Sort.Const.for_list_element comp_body)) ~accumulator:rev_list_nil comp_clauses in Lambda_utils.apply ~loc ~mode:alloc_heap diff --git a/lambda/transl_list_comprehension.mli b/lambda/transl_list_comprehension.mli index 65ec6208328..fcaeaaa2dca 100644 --- a/lambda/transl_list_comprehension.mli +++ b/lambda/transl_list_comprehension.mli @@ -15,7 +15,7 @@ open Debuginfo.Scoped_location so is parameterized by [Translcore.transl_exp], its [scopes] argument, and the [loc]ation. *) val comprehension : - transl_exp:(scopes:scopes -> Jkind.sort -> expression -> lambda) -> + transl_exp:(scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda) -> scopes:scopes -> loc:scoped_location -> comprehension -> diff --git a/lambda/translclass.ml b/lambda/translclass.ml index 49022547fb3..5c0df6fcf7a 100644 --- a/lambda/translclass.ml +++ b/lambda/translclass.ml @@ -105,7 +105,7 @@ let transl_meth_list lst = let set_inst_var ~scopes obj id expr = Lprim(Psetfield_computed (Typeopt.maybe_pointer expr, Assignment modify_heap), - [Lvar obj; Lvar id; transl_exp ~scopes Jkind.Sort.for_instance_var expr], + [Lvar obj; Lvar id; transl_exp ~scopes Jkind.Sort.Const.for_instance_var expr], Loc_unknown) let transl_val tbl create name = @@ -212,7 +212,7 @@ let rec build_object_init ~scopes cl_table obj params inh_init obj_init cl = (inh_init, let build params rem = let param = name_pattern "param" pat in - let arg_sort = Jkind.Sort.for_class_arg in + let arg_sort = Jkind.Sort.Const.for_class_arg in let arg_layout = Typeopt.layout pat.pat_env pat.pat_loc arg_sort pat.pat_type in @@ -366,7 +366,7 @@ let rec build_class_init ~scopes cla cstr super inh_init cl_init msubst top cl = | Tcf_method (name, _, Tcfk_concrete (_, exp)) -> let scopes = enter_method_definition ~scopes name.txt in let met_code = - msubst true (transl_scoped_exp ~scopes Jkind.Sort.for_method exp) + msubst true (transl_scoped_exp ~scopes Jkind.Sort.Const.for_method exp) in let met_code = if !Clflags.native_code && List.length met_code = 1 then @@ -383,7 +383,7 @@ let rec build_class_init ~scopes cla cstr super inh_init cl_init msubst top cl = Lsequence(mkappl (oo_prim "add_initializer", Lvar cla :: msubst false (transl_exp ~scopes - Jkind.Sort.for_initializer exp), + Jkind.Sort.Const.for_initializer exp), layout_unit), cl_init), methods, values) @@ -502,7 +502,7 @@ let rec transl_class_rebind ~scopes obj_init cl vf = transl_class_rebind ~scopes obj_init cl vf in let build params rem = let param = name_pattern "param" pat in - let arg_sort = Jkind.Sort.for_class_arg in + let arg_sort = Jkind.Sort.Const.for_class_arg in let arg_layout = Typeopt.layout pat.pat_env pat.pat_loc arg_sort pat.pat_type in diff --git a/lambda/translcore.ml b/lambda/translcore.ml index 384e054c89f..146fe666902 100644 --- a/lambda/translcore.ml +++ b/lambda/translcore.ml @@ -51,13 +51,13 @@ let use_dup_for_constant_mutable_arrays_bigger_than = 4 appropriately. *) let sort_must_not_be_void loc ty sort = - if Jkind.Sort.is_void_defaulting sort then raise (Error (loc, Void_sort ty)) + if Jkind.Sort.Const.(equal void sort) then raise (Error (loc, Void_sort ty)) let layout_exp sort e = layout e.exp_env e.exp_loc sort e.exp_type let layout_pat sort p = layout p.pat_env p.pat_loc sort p.pat_type let check_record_field_sort loc sort = - match Jkind.Sort.default_to_value_and_get sort with + match (sort : Jkind.Sort.Const.t) with | Base (Value | Float64 | Float32 | Bits8 | Bits16 | Bits32 | Bits64 | Vec128 | Word) -> () | Base Void -> raise (Error (loc, Illegal_void_record_field)) @@ -268,7 +268,7 @@ let assert_failed loc ~scopes exp = type fusable_function = { params : function_param list ; body : function_body - ; return_sort : Jkind.sort + ; return_sort : Jkind.Sort.Const.t ; return_mode : locality_mode ; region : bool } @@ -310,10 +310,11 @@ let fuse_method_arity (parent : fusable_function) : fusable_function = Mode.Alloc.disallow_right Mode.Alloc.legacy } } in + let return_sort = Jkind.Sort.default_for_transl_and_get method_.ret_sort in { params = self_param :: method_.params; body = method_.body; return_mode = transl_alloc_mode_l method_.ret_mode; - return_sort = method_.ret_sort; + return_sort; region = true; } | _ -> parent @@ -419,6 +420,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = (event_before ~scopes body (transl_exp ~scopes sort body)) | Texp_function { params; body; ret_sort; ret_mode; alloc_mode; zero_alloc } -> + let ret_sort = Jkind.Sort.default_for_transl_and_get ret_sort in transl_function ~in_new_scope ~scopes e params body ~alloc_mode ~ret_mode ~ret_sort ~region:true ~zero_alloc | Texp_apply({ exp_desc = Texp_ident(path, _, {val_kind = Val_prim p}, @@ -433,8 +435,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = | ((_, arg_repr) :: prim_repr), ((_, Arg (x, _)) :: oargs) -> let arg_exps, extra_args = cut_args prim_repr oargs in let arg_sort = - Jkind.Sort.of_const - (Translprim.sort_of_native_repr arg_repr ~poly_sort:psort) + Translprim.sort_of_native_repr arg_repr ~poly_sort:psort in (x, arg_sort) :: arg_exps, extra_args | _, ((_, Omitted _) :: _) -> assert false @@ -490,9 +491,10 @@ and transl_exp0 ~in_new_scope ~scopes sort e = (transl_apply ~scopes ~tailcall ~inlined ~specialised ~assume_zero_alloc ~result_layout - ~position ~mode (transl_exp ~scopes Jkind.Sort.for_function funct) + ~position ~mode (transl_exp ~scopes Jkind.Sort.Const.for_function funct) oargs (of_location ~scopes e.exp_loc)) | Texp_match(arg, arg_sort, pat_expr_list, partial) -> + let arg_sort = Jkind.Sort.default_for_transl_and_get arg_sort in transl_match ~scopes ~arg_sort ~return_sort:sort e arg pat_expr_list partial | Texp_try(body, pat_expr_list) -> @@ -505,7 +507,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = | Texp_tuple (el, alloc_mode) -> let ll, shape = transl_value_list_with_shape ~scopes - (List.map (fun (_, a) -> (a, Jkind.Sort.for_tuple_element)) el) + (List.map (fun (_, a) -> (a, Jkind.Sort.Const.for_tuple_element)) el) in begin try Lconst(Const_block(0, List.map extract_constant ll)) @@ -516,6 +518,10 @@ and transl_exp0 ~in_new_scope ~scopes sort e = (of_location ~scopes e.exp_loc)) end | Texp_unboxed_tuple el -> + let el = + List.map (fun (l, e, s) -> + (l, e, Jkind.Sort.default_for_transl_and_get s)) el + in let shape = List.map (fun (_, e, s) -> layout_exp s e) el in let ll = List.map (fun (_, e, s) -> transl_exp ~scopes s e) el in Lprim(Pmake_unboxed_product shape, @@ -523,10 +529,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = of_location ~scopes e.exp_loc) | Texp_construct(_, cstr, args, alloc_mode) -> let args_with_sorts = - List.map2 (fun { ca_jkind } e -> - let sort = Jkind.sort_of_jkind ca_jkind in - e, sort) - cstr.cstr_args args + List.map2 (fun { ca_sort } e -> e, ca_sort) cstr.cstr_args args in let ll = List.map (fun (e, sort) -> transl_exp ~scopes sort e) args_with_sorts @@ -618,7 +621,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = begin match arg with None -> Lconst(const_int tag) | Some (arg, alloc_mode) -> - let lam = transl_exp ~scopes Jkind.Sort.for_poly_variant arg in + let lam = transl_exp ~scopes Jkind.Sort.Const.for_poly_variant arg in try Lconst(Const_block(0, [const_int tag; extract_constant lam])) @@ -637,13 +640,12 @@ and transl_exp0 ~in_new_scope ~scopes sort e = transl_record_unboxed_product ~scopes e.exp_loc e.exp_env fields representation extended_expression | Texp_field(arg, id, lbl, float, ubr) -> - let targ = transl_exp ~scopes Jkind.Sort.for_record arg in + let targ = transl_exp ~scopes Jkind.Sort.Const.for_record arg in let sem = if Types.is_mutable lbl.lbl_mut then Reads_vary else Reads_agree in let sem = add_barrier_to_read (transl_unique_barrier ubr) sem in - let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in - check_record_field_sort id.loc lbl_sort; + check_record_field_sort id.loc lbl.lbl_sort; begin match lbl.lbl_repres with Record_boxed _ | Record_inlined (_, Constructor_uniform_value, Variant_boxed _) -> @@ -701,10 +703,9 @@ and transl_exp0 ~in_new_scope ~scopes sort e = | Texp_unboxed_field(arg, arg_sort, _id, lbl, _) -> begin match lbl.lbl_repres with | Record_unboxed_product -> - let lbl_layout l = - layout e.exp_env l.lbl_loc (Jkind.sort_of_jkind l.lbl_jkind) l.lbl_arg - in + let lbl_layout l = layout e.exp_env l.lbl_loc l.lbl_sort l.lbl_arg in let layouts = Array.to_list (Array.map lbl_layout lbl.lbl_all) in + let arg_sort = Jkind.Sort.default_for_transl_and_get arg_sort in let targ = transl_exp ~scopes arg_sort arg in if Array.length lbl.lbl_all == 1 then (* erase singleton unboxed records before lambda *) @@ -718,8 +719,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = representability on construction, [sort_of_jkind] will be unsafe here. Probably we should add a sort to `Texp_setfield` in the typed tree, then. *) - let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in - check_record_field_sort id.loc lbl_sort; + check_record_field_sort id.loc lbl.lbl_sort; let mode = Assignment (transl_modify_mode arg_mode) in @@ -756,11 +756,12 @@ and transl_exp0 ~in_new_scope ~scopes sort e = Psetmixedfield(lbl.lbl_pos, write, shape, mode) end in - Lprim(access, [transl_exp ~scopes Jkind.Sort.for_record arg; - transl_exp ~scopes lbl_sort newval], + Lprim(access, [transl_exp ~scopes Jkind.Sort.Const.for_record arg; + transl_exp ~scopes lbl.lbl_sort newval], of_location ~scopes e.exp_loc) | Texp_array (amut, element_sort, expr_list, alloc_mode) -> let mode = transl_alloc_mode alloc_mode in + let element_sort = Jkind.Sort.default_for_transl_and_get element_sort in let kind = array_kind e element_sort in let ll = transl_list ~scopes @@ -836,6 +837,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = type checker; both mutable and immutable arrays are created the same way *) let loc = of_location ~scopes e.exp_loc in + let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in let array_kind = Typeopt.array_kind e elt_sort in begin match array_kind with | Pgenarray | Paddrarray | Pintarray | Pfloatarray @@ -848,22 +850,24 @@ and transl_exp0 ~in_new_scope ~scopes sort e = Transl_array_comprehension.comprehension ~transl_exp ~scopes ~loc ~array_kind comp | Texp_ifthenelse(cond, ifso, Some ifnot) -> - Lifthenelse(transl_exp ~scopes Jkind.Sort.for_predef_value cond, + Lifthenelse(transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond, event_before ~scopes ifso (transl_exp ~scopes sort ifso), event_before ~scopes ifnot (transl_exp ~scopes sort ifnot), layout_exp sort e) | Texp_ifthenelse(cond, ifso, None) -> - Lifthenelse(transl_exp ~scopes Jkind.Sort.for_predef_value cond, + Lifthenelse(transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond, event_before ~scopes ifso (transl_exp ~scopes sort ifso), lambda_unit, Lambda.layout_unit) | Texp_sequence(expr1, sort', expr2) -> + let sort' = Jkind.Sort.default_for_transl_and_get sort' in sort_must_not_be_void expr1.exp_loc expr1.exp_type sort'; Lsequence(transl_exp ~scopes sort' expr1, event_before ~scopes expr2 (transl_exp ~scopes sort expr2)) | Texp_while {wh_body; wh_body_sort; wh_cond} -> + let wh_body_sort = Jkind.Sort.default_for_transl_and_get wh_body_sort in sort_must_not_be_void wh_body.exp_loc wh_body.exp_type wh_body_sort; - let cond = transl_exp ~scopes Jkind.Sort.for_predef_value wh_cond in + let cond = transl_exp ~scopes Jkind.Sort.Const.for_predef_value wh_cond in let body = transl_exp ~scopes wh_body_sort wh_body in Lwhile { wh_cond = maybe_region_layout layout_int cond; @@ -871,13 +875,14 @@ and transl_exp0 ~in_new_scope ~scopes sort e = (maybe_region_layout layout_unit body); } | Texp_for {for_id; for_from; for_to; for_dir; for_body; for_body_sort} -> + let for_body_sort = Jkind.Sort.default_for_transl_and_get for_body_sort in sort_must_not_be_void for_body.exp_loc for_body.exp_type for_body_sort; let body = transl_exp ~scopes for_body_sort for_body in Lfor { for_id; for_loc = of_location ~scopes e.exp_loc; - for_from = transl_exp ~scopes Jkind.Sort.for_predef_value for_from; - for_to = transl_exp ~scopes Jkind.Sort.for_predef_value for_to; + for_from = transl_exp ~scopes Jkind.Sort.Const.for_predef_value for_from; + for_to = transl_exp ~scopes Jkind.Sort.Const.for_predef_value for_to; for_dir; for_body = event_before ~scopes for_body (maybe_region_layout layout_unit body); @@ -890,10 +895,10 @@ and transl_exp0 ~in_new_scope ~scopes sort e = let layout = layout_exp sort e in match met with | Tmeth_val id -> - let obj = transl_exp ~scopes Jkind.Sort.for_object expr in + let obj = transl_exp ~scopes Jkind.Sort.Const.for_object expr in Lsend (Self, Lvar id, obj, [], pos, mode, loc, layout) | Tmeth_name nm -> - let obj = transl_exp ~scopes Jkind.Sort.for_object expr in + let obj = transl_exp ~scopes Jkind.Sort.Const.for_object expr in let (tag, cache) = Translobj.meth obj nm in let kind = if cache = [] then Public else Cached in Lsend (kind, tag, obj, cache, pos, mode, loc, layout) @@ -987,7 +992,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = then lambda_unit else begin Lifthenelse - (transl_exp ~scopes Jkind.Sort.for_predef_value cond, + (transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond, lambda_unit, assert_failed loc ~scopes e, Lambda.layout_unit) @@ -1000,14 +1005,14 @@ and transl_exp0 ~in_new_scope ~scopes sort e = | `Constant_or_function -> (* A constant expr (of type <> float if [Config.flat_float_array] is true) gets compiled as itself. *) - transl_exp ~scopes Jkind.Sort.for_lazy_body e + transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e | `Float_that_cannot_be_shortcut -> (* We don't need to wrap with Popaque: this forward block will never be shortcutted since it points to a float and Config.flat_float_array is true. *) Lprim(Pmakeblock(Obj.forward_tag, Immutable, None, alloc_heap), - [transl_exp ~scopes Jkind.Sort.for_lazy_body e], + [transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e], of_location ~scopes e.exp_loc) | `Identifier `Forward_value -> (* CR-someday mshinwell: Consider adding a new primitive @@ -1019,11 +1024,11 @@ and transl_exp0 ~in_new_scope ~scopes sort e = Lprim (Popaque Lambda.layout_lazy, [Lprim(Pmakeblock(Obj.forward_tag, Immutable, None, alloc_heap), - [transl_exp ~scopes Jkind.Sort.for_lazy_body e], + [transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e], of_location ~scopes e.exp_loc)], of_location ~scopes e.exp_loc) | `Identifier `Other -> - transl_exp ~scopes Jkind.Sort.for_lazy_body e + transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e | `Other -> (* other cases compile to a lazy block holding a function. The typechecker enforces that e has jkind value. *) @@ -1045,7 +1050,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = ~region:true ~body:(maybe_region_layout Lambda.layout_lazy_contents - (transl_exp ~scopes Jkind.Sort.for_lazy_body e)) + (transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e)) in Lprim(Pmakeblock(Config.lazy_tag, Mutable, None, alloc_heap), [fn], of_location ~scopes e.exp_loc) @@ -1061,6 +1066,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = cl_attributes = []; } | Texp_letop{let_; ands; param; param_sort; body; body_sort; partial} -> + let body_sort = Jkind.Sort.default_for_transl_and_get body_sort in event_after ~scopes e (transl_letop ~scopes e.exp_loc e.exp_env let_ ands param param_sort body body_sort partial) @@ -1093,7 +1099,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e = end | Texp_probe {name; handler=exp; enabled_at_init} -> if !Clflags.native_code && !Clflags.probes then begin - let lam = transl_exp ~scopes Jkind.Sort.for_probe_body exp in + let lam = transl_exp ~scopes Jkind.Sort.Const.for_probe_body exp in let map = Ident.Set.fold (fun v acc -> Ident.Map.add v (Ident.rename v) acc) (free_variables lam) @@ -1262,7 +1268,7 @@ and transl_guard ~scopes guard rhs_sort rhs = | None -> expr | Some cond -> event_before ~scopes cond - (Lifthenelse(transl_exp ~scopes Jkind.Sort.for_predef_value cond, + (Lifthenelse(transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond, expr, staticfail, layout)) and transl_case ~scopes rhs_sort {c_lhs; c_guard; c_rhs} = @@ -1397,6 +1403,8 @@ and transl_apply ~scopes let mode = transl_alloc_mode_r mode_closure in let arg_mode = transl_alloc_mode_l mode_arg in let ret_mode = transl_alloc_mode_l mode_ret in + let sort_arg = Jkind.Sort.default_for_transl_and_get sort_arg in + let sort_ret = Jkind.Sort.default_for_transl_and_get sort_ret in let result_layout = layout_of_sort (to_location loc) sort_ret in let body = build_apply handle [Lvar id_arg] loc Rc_normal ret_mode @@ -1438,6 +1446,7 @@ and transl_apply ~scopes match arg with | Omitted _ as arg -> arg | Arg (exp, sort_arg) -> + let sort_arg = Jkind.Sort.default_for_transl_and_get sort_arg in Arg (transl_exp ~scopes sort_arg exp, layout_exp sort_arg exp)) sargs in @@ -1480,9 +1489,11 @@ and transl_tupled_function Tfunction_cases { fc_cases = { c_lhs; _ } :: _ as cases; fc_partial; fc_arg_mode; fc_arg_sort } -> + let fc_arg_sort = Jkind.Sort.default_for_transl_and_get fc_arg_sort in Some (cases, fc_partial, c_lhs, fc_arg_mode, fc_arg_sort) | [{ fp_kind = Tparam_pat pat; fp_partial; fp_mode; fp_sort }], Tfunction_body body -> + let fp_sort = Jkind.Sort.default_for_transl_and_get fp_sort in let case = { c_lhs = pat; c_guard = None; c_rhs = body } in Some ([ case ], fp_partial, pat, fp_mode, fp_sort) | _ -> None @@ -1560,6 +1571,7 @@ and transl_curried_function ~scopes loc repr params body | Tfunction_cases { fc_cases; fc_partial; fc_param; fc_loc; fc_arg_sort; fc_arg_mode } -> + let fc_arg_sort = Jkind.Sort.default_for_transl_and_get fc_arg_sort in let arg_layout = match fc_cases with | { c_lhs } :: _ -> layout_pat fc_arg_sort c_lhs @@ -1601,6 +1613,7 @@ and transl_curried_function ~scopes loc repr params body | Tparam_optional_default (pat, expr, _) -> expr.exp_env, Predef.type_option expr.exp_type, Translattribute.transl_param_attributes pat in + let fp_sort = Jkind.Sort.default_for_transl_and_get fp_sort in let arg_layout = layout arg_env fp_loc fp_sort arg_type in let arg_mode = transl_alloc_mode_l fp_mode in let param = @@ -1619,6 +1632,7 @@ and transl_curried_function ~scopes loc repr params body ~arg_sort:fp_sort ~arg_layout ~return_layout | Tparam_optional_default (pat, default_arg, default_arg_sort) -> + let default_arg_sort = Jkind.Sort.default_for_transl_and_get default_arg_sort in let default_arg = event_before ~scopes default_arg (transl_exp ~scopes default_arg_sort default_arg) @@ -1810,6 +1824,7 @@ and transl_let ~scopes ~return_layout ?(add_regions=false) ?(in_structure=false) fun body -> body | {vb_pat=pat; vb_expr=expr; vb_sort=sort; vb_rec_kind=_; vb_attributes; vb_loc} :: rem -> + let sort = Jkind.Sort.default_for_transl_and_get sort in let lam = transl_bound_exp ~scopes ~in_structure pat sort expr vb_loc vb_attributes in @@ -1832,6 +1847,7 @@ and transl_let ~scopes ~return_layout ?(add_regions=false) ?(in_structure=false) let transl_case {vb_expr=expr; vb_sort; vb_attributes; vb_rec_kind = rkind; vb_loc; vb_pat} id = + let vb_sort = Jkind.Sort.default_for_transl_and_get vb_sort in let def = transl_bound_exp ~scopes ~in_structure vb_pat vb_sort expr vb_loc vb_attributes in @@ -1844,7 +1860,7 @@ and transl_let ~scopes ~return_layout ?(add_regions=false) ?(in_structure=false) and transl_setinstvar ~scopes loc self var expr = Lprim(Psetfield_computed (maybe_pointer expr, Assignment modify_heap), - [self; var; transl_exp ~scopes Jkind.Sort.for_instance_var expr], loc) + [self; var; transl_exp ~scopes Jkind.Sort.Const.for_instance_var expr], loc) (* CR layouts v5: Invariant - this is only called on values. Relax that. *) and transl_record ~scopes loc env mode fields repres opt_init_expr = @@ -1862,8 +1878,7 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr = let copy_id = Ident.create_local "newrecord" in let update_field cont (lbl, definition) = (* CR layouts v5: allow more unboxed types here. *) - let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in - check_record_field_sort lbl.lbl_loc lbl_sort; + check_record_field_sort lbl.lbl_loc lbl.lbl_sort; match definition with | Kept _ -> cont | Overridden (_lid, expr) -> @@ -1910,14 +1925,14 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr = end in Lsequence(Lprim(upd, [Lvar copy_id; - transl_exp ~scopes lbl_sort expr], + transl_exp ~scopes lbl.lbl_sort expr], of_location ~scopes loc), cont) in assert (is_heap_mode (Option.get mode)); (* Pduprecord must be Alloc_heap and not unboxed *) Llet(Strict, Lambda.layout_block, copy_id, Lprim(Pduprecord (repres, size), - [transl_exp ~scopes Jkind.Sort.for_record init_expr], + [transl_exp ~scopes Jkind.Sort.Const.for_record init_expr], of_location ~scopes loc), Array.fold_left update_field (Lvar copy_id) fields) | Some _ | None -> @@ -1928,14 +1943,9 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr = let lv = Array.mapi (fun i (lbl, definition) -> - (* CR layouts v2.5: When we allow `any` in record fields and check - representability on construction, [sort_of_layout] will be unsafe - here. Probably we should add sorts to record construction in the - typed tree, then. *) - let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in match definition with | Kept (typ, mut, _) -> - let field_layout = layout env lbl.lbl_loc lbl_sort typ in + let field_layout = layout env lbl.lbl_loc lbl.lbl_sort typ in let sem = if Types.is_mutable mut then Reads_vary else Reads_agree in @@ -1991,8 +2001,8 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr = of_location ~scopes loc), field_layout | Overridden (_lid, expr) -> - let field_layout = layout_exp lbl_sort expr in - transl_exp ~scopes lbl_sort expr, field_layout) + let field_layout = layout_exp lbl.lbl_sort expr in + transl_exp ~scopes lbl.lbl_sort expr, field_layout) fields in let ll, shape = List.split (Array.to_list lv) in @@ -2077,7 +2087,7 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr = begin match opt_init_expr with None -> lam | Some (init_expr, _) -> Llet(Strict, Lambda.layout_block, init_id, - transl_exp ~scopes Jkind.Sort.for_record init_expr, lam) + transl_exp ~scopes Jkind.Sort.Const.for_record init_expr, lam) end and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr = @@ -2087,10 +2097,9 @@ and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr = let shape = Array.map (fun (lbl, definition) -> - let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in match definition with - | Kept (typ, _mut, _) -> layout env lbl.lbl_loc lbl_sort typ - | Overridden (_lid, expr) -> layout_exp lbl_sort expr) + | Kept (typ, _mut, _) -> layout env lbl.lbl_loc lbl.lbl_sort typ + | Overridden (_lid, expr) -> layout_exp lbl.lbl_sort expr) fields |> Array.to_list in @@ -2102,8 +2111,7 @@ and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr = let access = Punboxed_product_field (i, shape) in Lprim (access, [Lvar init_id], of_location ~scopes loc) | Overridden (_lid, expr) -> - let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in - transl_exp ~scopes lbl_sort expr) + transl_exp ~scopes lbl.lbl_sort expr) fields |> Array.to_list in @@ -2114,6 +2122,9 @@ and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr = match opt_init_expr with | None -> lam | Some (init_expr, init_expr_sort) -> + let init_expr_sort = + Jkind.Sort.default_for_transl_and_get init_expr_sort + in let layout = layout_exp init_expr_sort init_expr in let exp = transl_exp ~scopes init_expr_sort init_expr in Llet(Strict, layout, init_id, exp, lam) @@ -2210,13 +2221,13 @@ and transl_match ~scopes ~arg_sort ~return_sort e arg pat_expr_list partial = assert (static_handlers = []); let mode = transl_alloc_mode alloc_mode in let argl = - List.map (fun (_, a) -> (a, Jkind.Sort.for_tuple_element)) argl + List.map (fun (_, a) -> (a, Jkind.Sort.Const.for_tuple_element)) argl in Matching.for_multiple_match ~scopes ~return_layout e.exp_loc (transl_list_with_layout ~scopes argl) mode val_cases partial | {exp_desc = Texp_tuple (argl, alloc_mode)}, _ :: _ -> let argl = - List.map (fun (_, a) -> (a, Jkind.Sort.for_tuple_element)) argl + List.map (fun (_, a) -> (a, Jkind.Sort.Const.for_tuple_element)) argl in let val_ids, lvars = List.map @@ -2260,10 +2271,16 @@ and transl_letop ~scopes loc env let_ ands param param_sort case case_sort transl_ident (of_location ~scopes and_.bop_op_name.loc) env and_.bop_op_type and_.bop_op_path and_.bop_op_val Id_value in - let exp = transl_exp ~scopes and_.bop_exp_sort and_.bop_exp in - let right_layout = layout_exp and_.bop_exp_sort and_.bop_exp in + let and_bop_exp_sort = + Jkind.Sort.default_for_transl_and_get and_.bop_exp_sort + in + let and_bop_op_return_sort = + Jkind.Sort.default_for_transl_and_get and_.bop_op_return_sort + in + let exp = transl_exp ~scopes and_bop_exp_sort and_.bop_exp in + let right_layout = layout_exp and_bop_exp_sort and_.bop_exp in let result_layout = - function2_return_layout env and_.bop_loc and_.bop_op_return_sort + function2_return_layout env and_.bop_loc and_bop_op_return_sort and_.bop_op_type in let lam = @@ -2287,9 +2304,15 @@ and transl_letop ~scopes loc env let_ ands param param_sort case case_sort transl_ident (of_location ~scopes let_.bop_op_name.loc) env let_.bop_op_type let_.bop_op_path let_.bop_op_val Id_value in + let let_bop_exp_sort = + Jkind.Sort.default_for_transl_and_get let_.bop_exp_sort + in + let let_bop_op_return_sort = + Jkind.Sort.default_for_transl_and_get let_.bop_op_return_sort + in let exp = - loop (layout_exp let_.bop_exp_sort let_.bop_exp) - (transl_exp ~scopes let_.bop_exp_sort let_.bop_exp) ands + loop (layout_exp let_bop_exp_sort let_.bop_exp) + (transl_exp ~scopes let_bop_exp_sort let_.bop_exp) ands in let func = let return_mode = alloc_heap (* XXX fixme: use result of is_function_type *) in @@ -2320,7 +2343,7 @@ and transl_letop ~scopes loc env let_ ands param param_sort case case_sort ap_func = op; ap_args=[exp; func]; ap_result_layout= - function2_return_layout env let_.bop_loc let_.bop_op_return_sort + function2_return_layout env let_.bop_loc let_bop_op_return_sort let_.bop_op_type; ap_region_close=Rc_normal; ap_mode=alloc_heap; diff --git a/lambda/translcore.mli b/lambda/translcore.mli index f50cf21e65f..bf1caa95227 100644 --- a/lambda/translcore.mli +++ b/lambda/translcore.mli @@ -24,7 +24,7 @@ open Debuginfo.Scoped_location val pure_module : module_expr -> let_kind (* Used for translating Alloc_heap values in classes and modules. *) -val transl_exp: scopes:scopes -> Jkind.sort -> expression -> lambda +val transl_exp: scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda val transl_apply: scopes:scopes -> ?tailcall:tailcall_attribute -> ?inlined:inlined_attribute @@ -42,7 +42,7 @@ val transl_extension_constructor: scopes:scopes -> Env.t -> Longident.t option -> extension_constructor -> lambda -val transl_scoped_exp : scopes:scopes -> Jkind.sort -> expression -> lambda +val transl_scoped_exp : scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda type error = Free_super_var diff --git a/lambda/translmod.ml b/lambda/translmod.ml index 09a80649091..d9869c2532a 100644 --- a/lambda/translmod.ml +++ b/lambda/translmod.ml @@ -39,7 +39,7 @@ type unsafe_info = type error = Circular_dependency of (Ident.t * unsafe_info) list | Conflicting_inline_attributes -| Non_value_jkind of type_expr * Jkind.sort +| Non_value_jkind of type_expr * Jkind.Sort.Const.t | Instantiating_packed of Compilation_unit.t exception Error of Location.t * error @@ -56,7 +56,7 @@ exception Error of Location.t * error When this sanity check is removed, consider whether it must be replaced with some defaulting. *) let sort_must_not_be_void loc ty sort = - if Jkind.Sort.is_void_defaulting sort then + if Jkind.Sort.Const.(equal void sort) then raise (Error (loc, Non_value_jkind (ty, sort))) let cons_opt x_opt xs = @@ -607,7 +607,7 @@ and transl_module ~scopes cc rootpath mexp = transl_module ~scopes (compose_coercions cc ccarg) rootpath arg | Tmod_unpack(arg, _) -> apply_coercion loc Strict cc - (Translcore.transl_exp ~scopes Jkind.Sort.for_module arg) + (Translcore.transl_exp ~scopes Jkind.Sort.Const.for_module arg) and transl_apply ~scopes ~loc ~cc mod_env funct translated_arg = let inlined_attribute = @@ -694,6 +694,7 @@ and transl_structure ~scopes loc fields cc rootpath final_env = function let body, size = transl_structure ~scopes loc fields cc rootpath final_env rem in + let sort = Jkind.Sort.default_for_transl_and_get sort in sort_must_not_be_void expr.exp_loc expr.exp_type sort; Lsequence(transl_exp ~scopes sort expr, body), size | Tstr_value(rec_flag, pat_expr_list) -> @@ -1225,6 +1226,7 @@ let transl_store_structure ~scopes glob map prims aliases str = | item :: rem -> match item.str_desc with | Tstr_eval (expr, sort, _attrs) -> + let sort = Jkind.Sort.default_for_transl_and_get sort in sort_must_not_be_void expr.exp_loc expr.exp_type sort; Lsequence(Lambda.subst no_env_update subst (transl_exp ~scopes sort expr), @@ -1652,6 +1654,7 @@ let transl_store_structure_gen match str with | [ { str_desc = Tstr_eval (expr, sort, _attrs) } ] when topl -> assert (size = 0); + let sort = Jkind.Sort.default_for_transl_and_get sort in sort_must_not_be_void expr.exp_loc expr.exp_type sort; Lambda.subst (fun _ _ env -> env) !transl_store_subst (transl_exp ~scopes sort expr) @@ -1779,11 +1782,13 @@ let transl_toplevel_item ~scopes item = Otherwise, the normal compilation would result in a Lsequence returning unit. *) Tstr_eval (expr, sort, _) -> + let sort = Jkind.Sort.default_for_transl_and_get sort in sort_must_not_be_void expr.exp_loc expr.exp_type sort; transl_exp ~scopes sort expr | Tstr_value(Nonrecursive, [{vb_pat = {pat_desc=Tpat_any}; vb_expr = expr; vb_sort = sort}]) -> + let sort = Jkind.Sort.default_for_transl_and_get sort in transl_exp ~scopes sort expr | Tstr_value(rec_flag, pat_expr_list) -> let idents = let_bound_idents pat_expr_list in @@ -2142,7 +2147,7 @@ let report_error loc = function Location.errorf "Non-value sort %a detected in [translmod] in type %a:@ \ Please report this error to the Jane Street compilers team." - Jkind.Sort.format sort + Jkind.Sort.Const.format sort Printtyp.type_expr ty | Instantiating_packed comp_unit -> Location.errorf ~loc diff --git a/lambda/translmod.mli b/lambda/translmod.mli index d519998fd78..66c24d9d6a7 100644 --- a/lambda/translmod.mli +++ b/lambda/translmod.mli @@ -81,7 +81,7 @@ type unsafe_info = type error = Circular_dependency of (Ident.t * unsafe_info) list | Conflicting_inline_attributes -| Non_value_jkind of Types.type_expr * Jkind.sort +| Non_value_jkind of Types.type_expr * Jkind.Sort.Const.t | Instantiating_packed of Compilation_unit.t exception Error of Location.t * error diff --git a/lambda/translprim.ml b/lambda/translprim.ml index 97611290c8c..054701e0d85 100644 --- a/lambda/translprim.ml +++ b/lambda/translprim.ml @@ -1274,7 +1274,7 @@ let specialize_primitive env loc ty ~has_constant_constructor prim = let shape = List.map (fun typ -> Lambda.must_be_value (Typeopt.layout env (to_location loc) - Jkind.Sort.for_block_element typ)) + Jkind.Sort.Const.for_block_element typ)) fields in let useful = List.exists (fun knd -> knd <> Lambda.generic_value) shape in @@ -1634,10 +1634,7 @@ let transl_primitive loc p env ty ~poly_mode ~poly_sort path = let rec make_params ty repr_args repr_res = match repr_args, repr_res with | [], (_, res_repr) -> - let res_sort = - Jkind.Sort.of_const - (sort_of_native_repr res_repr ~poly_sort) - in + let res_sort = sort_of_native_repr res_repr ~poly_sort in [], Typeopt.layout env error_loc res_sort ty | (((_, arg_repr) as arg) :: repr_args), _ -> match Typeopt.is_function_type env ty with @@ -1645,10 +1642,7 @@ let transl_primitive loc p env ty ~poly_mode ~poly_sort path = Misc.fatal_errorf "Primitive %s type does not correspond to arity" (Primitive.byte_name p) | Some (arg_ty, ret_ty) -> - let arg_sort = - Jkind.Sort.of_const - (sort_of_native_repr arg_repr ~poly_sort) - in + let arg_sort = sort_of_native_repr arg_repr ~poly_sort in let arg_layout = Typeopt.layout env error_loc arg_sort arg_ty in diff --git a/middle_end/flambda2/from_lambda/closure_conversion.ml b/middle_end/flambda2/from_lambda/closure_conversion.ml index 46c8fc0a2c7..f2fbb88756b 100644 --- a/middle_end/flambda2/from_lambda/closure_conversion.ml +++ b/middle_end/flambda2/from_lambda/closure_conversion.ml @@ -527,7 +527,7 @@ let unarize_extern_repr alloc_mode (extern_repr : Lambda.extern_repr) = match extern_repr with | Same_as_ocaml_repr (Base _ as sort) -> let kind = - Typeopt.layout_of_const_sort sort + Typeopt.layout_of_non_void_sort sort |> K.With_subkind.from_lambda_values_and_unboxed_numbers_only |> K.With_subkind.kind in diff --git a/ocamldoc/odoc_sig.ml b/ocamldoc/odoc_sig.ml index b24c1ce2e9d..cc75da1715d 100644 --- a/ocamldoc/odoc_sig.ml +++ b/ocamldoc/odoc_sig.ml @@ -496,7 +496,7 @@ module Analyser = { Typedtree.ld_id; ld_mutable; ld_type; ld_loc; ld_attributes } = get_field env comments @@ {Types.ld_id; ld_mutable; ld_modalities = Mode.Modality.Value.Const.id; - ld_jkind=Jkind.Builtin.any ~why:Dummy_jkind (* ignored *); + ld_sort=Jkind.Sort.Const.void (* ignored *); ld_type=ld_type.Typedtree.ctyp_type; ld_loc; ld_attributes; ld_uid=Types.Uid.internal_not_actually_unique} in let open Typedtree in diff --git a/otherlibs/stdlib_alpha/capsule.ml b/otherlibs/stdlib_alpha/capsule.ml index f7778126658..c1238df02d5 100644 --- a/otherlibs/stdlib_alpha/capsule.ml +++ b/otherlibs/stdlib_alpha/capsule.ml @@ -121,6 +121,9 @@ end it never returns is also [portable] *) external reraise : exn -> 'a @ portable @@ portable = "%reraise" +external raise_with_backtrace : + exn -> Printexc.raw_backtrace -> 'a @ portable @@ portable = "%raise_with_backtrace" + module Data = struct type ('a, 'k) t : value mod portable uncontended @@ -349,6 +352,21 @@ module Rwlock = struct end +module Condition = struct + + type 'k t : value mod portable uncontended + + external create : unit -> 'k t @@ portable = "caml_ml_condition_new" + external wait : 'k t -> M.t -> unit @@ portable = "caml_ml_condition_wait" + external signal : 'k t -> unit @@ portable = "caml_ml_condition_signal" + external broadcast : 'k t -> unit @@ portable = "caml_ml_condition_broadcast" + + let wait t (mut : 'k Mutex.t) _password = + (* [mut] is locked, so we know it is not poisoned. *) + wait t mut.mutex + +end + let create_with_mutex () = let (P name) = Name.make () in Mutex.P { name; mutex = M.create (); poisoned = false } @@ -356,3 +374,17 @@ let create_with_mutex () = let create_with_rwlock () = let (P name) = Name.make () in Rwlock.P { name; rwlock = Rw.create (); poisoned = false } + +exception Protected : 'k Mutex.t * (exn, 'k) Data.t -> exn + +(* CR-soon mslater: replace with portable stdlib *) +let get_raw_backtrace : unit -> Printexc.raw_backtrace @@ portable = + O.magic O.magic Printexc.get_raw_backtrace + +let protect f = + try f () with + | exn -> + let (P mut) = create_with_mutex () in + raise_with_backtrace (Protected (mut, Data.unsafe_mk exn)) (get_raw_backtrace ()) + ;; + diff --git a/otherlibs/stdlib_alpha/capsule.mli b/otherlibs/stdlib_alpha/capsule.mli index 51bccbc855e..4dfc63af086 100644 --- a/otherlibs/stdlib_alpha/capsule.mli +++ b/otherlibs/stdlib_alpha/capsule.mli @@ -147,9 +147,6 @@ val access_shared : with a shared {!Access.t} for ['k]. The result is within ['k] so it must be [portable] and it is marked [contended]. *) -(** Mutual exclusion primtives for controlling uncontended access to a capsule. - - Requires OCaml 5 runtime. *) module Mutex : sig type 'k t : value mod portable uncontended @@ -236,6 +233,40 @@ module Rwlock : sig lock as poisoned. *) end +module Condition : sig + + type 'k t : value mod portable uncontended + (** ['k t] is the type of a condition variable associated with the capsule ['k]. + This condition may only be used with the matching ['k Mutex.t]. *) + + val create : unit -> 'k t @@ portable + (** [create ()] creates and returns a new condition variable. + This condition variable is associated with the matching ['k Mutex.t] + and with a certain property {i P} that is protected by the mutex. *) + + val wait : 'k t -> 'k Mutex.t -> 'k Password.t @ local -> unit @@ portable + (** [wait c m] atomically unlocks the mutex [m] and suspends the + current thread on the condition variable [c]. This thread can + later be woken up after the condition variable [c] has been signaled + via {!signal} or {!broadcast}; however, it can also be woken up for + no reason. The mutex [m] is locked again before [wait] returns. One + cannot assume that the property {i P} associated with the condition + variable [c] holds when [wait] returns; one must explicitly test + whether {i P} holds after calling [wait]. *) + + val signal : 'k t -> unit @@ portable + (** [signal c] wakes up one of the threads waiting on the condition + variable [c], if there is one. If there is none, this call has no effect. + It is recommended to call [signal c] inside a critical section, that is, + while the mutex [m] associated with [c] is locked. *) + + val broadcast : 'k t -> unit @@ portable + (** [broadcast c] wakes up all threads waiting on the condition + variable [c]. If there are none, this call has no effect. + It is recommended to call [broadcast c] inside a critical section, + that is, while the mutex [m] associated with [c] is locked. *) +end + val create_with_mutex : unit -> Mutex.packed @@ portable (** [create_with_mutex ()] creates a new capsule with an associated mutex. *) @@ -373,7 +404,6 @@ module Data : sig so it must be [portable] and it is marked [contended]. Since [nonportable] functions may enclose [uncontended] (and thus write) access to data, ['a] must cross [portability] *) - end exception Encapsulated : 'k Name.t * (exn, 'k) Data.t -> exn @@ -381,3 +411,16 @@ exception Encapsulated : 'k Name.t * (exn, 'k) Data.t -> exn exception, it is wrapped in [Encapsulated] to avoid leaking access to the data. The [Name.t] can be used to associate the [Data.t] with a particular [Password.t] or [Mutex.t]. *) + +exception Protected : 'k Mutex.t * (exn, 'k) Data.t -> exn +(** If a function passed to [protect] raises an exception, it is wrapped + in [Protected] to provide access to the capsule in which the function ran. *) +(* CR-soon mslater: this should return a key, not a mutex. *) + +val protect + : (unit -> 'a @ portable contended) @ local portable + -> 'a @ portable contended + @@ portable +(** [protect f] runs [f] in a fresh capsule. If [f] returns normally, [protect] + merges this capsule into the caller's capsule. If [f] raises, [protect] + raises [Protected], giving the caller access to the encapsulated exception. *) diff --git a/otherlibs/stdlib_beta/.ocamlformat-enable b/otherlibs/stdlib_beta/.ocamlformat-enable index 6a5f81486bb..038c19ee974 100644 --- a/otherlibs/stdlib_beta/.ocamlformat-enable +++ b/otherlibs/stdlib_beta/.ocamlformat-enable @@ -1,6 +1,7 @@ -stdlib_beta.mli -stdlib_beta.ml int8.ml int8.mli int16.ml int16.mli +int_wrapper.ml +stdlib_beta.ml +stdlib_beta.mli diff --git a/otherlibs/stdlib_beta/int16.ml b/otherlibs/stdlib_beta/int16.ml index 81dac52f206..e88be132034 100644 --- a/otherlibs/stdlib_beta/int16.ml +++ b/otherlibs/stdlib_beta/int16.ml @@ -12,81 +12,19 @@ (* *) (**************************************************************************) -open! Stdlib - -type t = int16 - [@@@ocaml.flambda_o3] -external to_int : t -> int = "%identity" - -external unsafe_of_int : int -> t = "%identity" - -let int_size = 16 - -let max_int = (1 lsl (int_size - 1)) - 1 - -let min_int = -1 lsl (int_size - 1) - -let mask = (1 lsl int_size) - 1 - -let[@inline] of_int i = unsafe_of_int (((i - min_int) land mask) + min_int) - -let zero = of_int 0 - -let one = of_int 1 - -let minus_one = of_int (-1) - -let neg x = of_int (Int.neg (to_int x)) - -let add x y = of_int (Int.add (to_int x) (to_int y)) - -let sub x y = of_int (Int.sub (to_int x) (to_int y)) - -let mul x y = of_int (Int.mul (to_int x) (to_int y)) - -let div x y = of_int (Int.div (to_int x) (to_int y)) - -let rem x y = of_int (Int.rem (to_int x) (to_int y)) - -let succ x = of_int (Int.succ (to_int x)) - -let pred x = of_int (Int.pred (to_int x)) - -let abs x = of_int (Int.abs (to_int x)) - -let logand x y = unsafe_of_int (Int.logand (to_int x) (to_int y)) - -let logor x y = unsafe_of_int (Int.logor (to_int x) (to_int y)) - -let logxor x y = unsafe_of_int (Int.logxor (to_int x) (to_int y)) - -let lognot x = unsafe_of_int (Int.lognot (to_int x)) - -let shift_left x y = of_int (Int.shift_left (to_int x) y) - -let shift_right x y = of_int (Int.shift_right (to_int x) y) - -let shift_right_logical x y = - of_int (Int.shift_right_logical (to_int x land mask) y) - -let equal x y = Int.equal (to_int x) (to_int y) - -let compare x y = Int.compare (to_int x) (to_int y) - -let min x y = if to_int x > to_int y then y else x - -let max x y = if to_int x < to_int y then y else x - -let of_float f = - let i = Int.of_float f in - if min_int <= i && i <= max_int then unsafe_of_int i else zero +type t = int16 -let to_float t = Int.to_float (to_int t) +include + Int_wrapper.Make + (Int_wrapper) + (struct + type nonrec t = t -let to_string t = Int.to_string (to_int t) + let int_size = 16 -let max_int = of_int max_int + external inject : t -> int = "%identity" -let min_int = of_int min_int + external unchecked_project : int -> t = "%identity" + end) diff --git a/otherlibs/stdlib_beta/int16.mli b/otherlibs/stdlib_beta/int16.mli index 14864cafb43..bca8493bb13 100644 --- a/otherlibs/stdlib_beta/int16.mli +++ b/otherlibs/stdlib_beta/int16.mli @@ -2,11 +2,9 @@ (* *) (* OCaml *) (* *) -(* The OCaml programmers *) (* Jacob Van Buren, Jane Street, New York *) (* *) -(* Copyright 2018 Institut National de Recherche en Informatique et *) -(* en Automatique. *) +(* Copyright 2024 Jane Street Group LLC *) (* *) (* All rights reserved. This file is distributed under the terms of *) (* the GNU Lesser General Public License version 2.1, with the *) @@ -16,121 +14,11 @@ (** Signed 16-bit integer values. - These integers are {16} bits wide and use two's complement - representation. All operations are taken modulo - 2{^16}. They do not fail on overflow. *) - -(** {1:ints 16-bit Integers} *) + These integers are {16} bits wide and use two's complement representation. + All operations are taken modulo 2{^16}. They do not fail on overflow. *) (** The type for 16-bit integer values. *) -type t = int16 - -(** [int_size] is the number of bits in an integer (i.e., 16). *) -val int_size : int - -(** [zero] is the integer [0]. *) -val zero : int16 - -(** [one] is the integer [1]. *) -val one : int16 - -(** [minus_one] is the integer [-1]. *) -val minus_one : int16 - -(** [neg x] is [~-x]. *) -val neg : int16 -> int16 - -(** [add x y] is the addition [x + y]. *) -val add : int16 -> int16 -> int16 - -(** [sub x y] is the subtraction [x - y]. *) -val sub : int16 -> int16 -> int16 - -(** [mul x y] is the multiplication [x * y]. *) -val mul : int16 -> int16 -> int16 - -(** [div x y] is the division [x / y]. See {!Stdlib.( / )} for details. *) -val div : int16 -> int16 -> int16 - -(** [rem x y] is the remainder [x mod y]. See {!Stdlib.( mod )} for details. *) -val rem : int16 -> int16 -> int16 - -(** [succ x] is [add x 1]. *) -val succ : int16 -> int16 - -(** [pred x] is [sub x 1]. *) -val pred : int16 -> int16 - -(** [abs x] is the absolute value of [x]. That is [x] if [x] is positive - and [neg x] if [x] is negative. {b Warning.} This may be negative if - the argument is {!min_int}. *) -val abs : int16 -> int16 - -(** [max_int] is the greatest representable integer, - [2{^[16 - 1]} - 1]. *) -val max_int : int16 - -(** [min_int] is the smallest representable integer, - [-2{^[16 - 1]}]. *) -val min_int : int16 - -(** [logand x y] is the bitwise logical and of [x] and [y]. *) -val logand : int16 -> int16 -> int16 - -(** [logor x y] is the bitwise logical or of [x] and [y]. *) -val logor : int16 -> int16 -> int16 - -(** [logxor x y] is the bitwise logical exclusive or of [x] and [y]. *) -val logxor : int16 -> int16 -> int16 - -(** [lognot x] is the bitwise logical negation of [x]. *) -val lognot : int16 -> int16 - -(** [shift_left x n] shifts [x] to the left by [n] bits. The result - is unspecified if [n < 0] or [n > ]{!16}. *) -val shift_left : int16 -> int -> int16 - -(** [shift_right x n] shifts [x] to the right by [n] bits. This is an - arithmetic shift: the sign bit of [x] is replicated and inserted - in the vacated bits. The result is unspecified if [n < 0] or - [n > ]{!16}. *) -val shift_right : int16 -> int -> int16 - -(** [shift_right x n] shifts [x] to the right by [n] bits. This is a - logical shift: zeroes are inserted in the vacated bits regardless - of the sign of [x]. The result is unspecified if [n < 0] or - [n > ]{!16}. *) -val shift_right_logical : int16 -> int -> int16 - -(** {1:preds Predicates and comparisons} *) - -(** [equal x y] is [true] if and only if [x = y]. *) -val equal : int16 -> int16 -> bool - -(** [compare x y] is {!Stdlib.compare}[ x y] but more efficient. *) -val compare : int16 -> int16 -> int - -(** Return the smaller of the two arguments. *) -val min : int16 -> int16 -> int16 - -(** Return the greater of the two arguments. *) -val max : int16 -> int16 -> int16 - -(** {1:convert Converting} *) - -(** [to_int x] is [x] as an {!int}. *) -val to_int : int16 -> int - -(** [of_int x] represents [x] as an 16-bit integer. *) -val of_int : int -> int16 - -(** [to_float x] is [x] as a floating point number. *) -val to_float : int16 -> float - -(** [of_float x] truncates [x] to an integer. The result is - unspecified if the argument is [nan] or falls outside the range of - representable integers. *) -val of_float : float -> int16 +type t = int16 [@@immediate] -(** [to_string x] is the written representation of [x] in decimal. *) -val to_string : int16 -> string +(** @inline *) +include Int_wrapper.S with type t := int16 diff --git a/otherlibs/stdlib_beta/int8.ml b/otherlibs/stdlib_beta/int8.ml index 6c3c7b4f2be..e0b1d5c6157 100644 --- a/otherlibs/stdlib_beta/int8.ml +++ b/otherlibs/stdlib_beta/int8.ml @@ -12,81 +12,19 @@ (* *) (**************************************************************************) -open! Stdlib - -type t = int8 - [@@@ocaml.flambda_o3] -external to_int : t -> int = "%identity" - -external unsafe_of_int : int -> t = "%identity" - -let int_size = 8 - -let max_int = (1 lsl (int_size - 1)) - 1 - -let min_int = -1 lsl (int_size - 1) - -let mask = (1 lsl int_size) - 1 - -let[@inline] of_int i = unsafe_of_int (((i - min_int) land mask) + min_int) - -let zero = of_int 0 - -let one = of_int 1 - -let minus_one = of_int (-1) - -let neg x = of_int (Int.neg (to_int x)) - -let add x y = of_int (Int.add (to_int x) (to_int y)) - -let sub x y = of_int (Int.sub (to_int x) (to_int y)) - -let mul x y = of_int (Int.mul (to_int x) (to_int y)) - -let div x y = of_int (Int.div (to_int x) (to_int y)) - -let rem x y = of_int (Int.rem (to_int x) (to_int y)) - -let succ x = of_int (Int.succ (to_int x)) - -let pred x = of_int (Int.pred (to_int x)) - -let abs x = of_int (Int.abs (to_int x)) - -let logand x y = unsafe_of_int (Int.logand (to_int x) (to_int y)) - -let logor x y = unsafe_of_int (Int.logor (to_int x) (to_int y)) - -let logxor x y = unsafe_of_int (Int.logxor (to_int x) (to_int y)) - -let lognot x = unsafe_of_int (Int.lognot (to_int x)) - -let shift_left x y = of_int (Int.shift_left (to_int x) y) - -let shift_right x y = of_int (Int.shift_right (to_int x) y) - -let shift_right_logical x y = - of_int (Int.shift_right_logical (to_int x land mask) y) - -let equal x y = Int.equal (to_int x) (to_int y) - -let compare x y = Int.compare (to_int x) (to_int y) - -let min x y = if to_int x > to_int y then y else x - -let max x y = if to_int x < to_int y then y else x - -let of_float f = - let i = Int.of_float f in - if min_int <= i && i <= max_int then unsafe_of_int i else zero +type t = int8 -let to_float t = Int.to_float (to_int t) +include + Int_wrapper.Make + (Int_wrapper) + (struct + type nonrec t = t -let to_string t = Int.to_string (to_int t) + let int_size = 8 -let max_int = of_int max_int + external inject : t -> int = "%identity" -let min_int = of_int min_int + external unchecked_project : int -> t = "%identity" + end) diff --git a/otherlibs/stdlib_beta/int8.mli b/otherlibs/stdlib_beta/int8.mli index 10041a11cdb..b461517acfd 100644 --- a/otherlibs/stdlib_beta/int8.mli +++ b/otherlibs/stdlib_beta/int8.mli @@ -2,11 +2,9 @@ (* *) (* OCaml *) (* *) -(* The OCaml programmers *) (* Jacob Van Buren, Jane Street, New York *) (* *) -(* Copyright 2018 Institut National de Recherche en Informatique et *) -(* en Automatique. *) +(* Copyright 2024 Jane Street Group LLC *) (* *) (* All rights reserved. This file is distributed under the terms of *) (* the GNU Lesser General Public License version 2.1, with the *) @@ -16,121 +14,11 @@ (** Signed 8-bit integer values. - These integers are {8} bits wide and use two's complement - representation. All operations are taken modulo - 2{^8}. They do not fail on overflow. *) - -(** {1:ints 8-bit Integers} *) + These integers are {8} bits wide and use two's complement representation. + All operations are taken modulo 2{^8}. They do not fail on overflow. *) (** The type for 8-bit integer values. *) -type t = int8 - -(** [int_size] is the number of bits in an integer (i.e., 8). *) -val int_size : int - -(** [zero] is the integer [0]. *) -val zero : int8 - -(** [one] is the integer [1]. *) -val one : int8 - -(** [minus_one] is the integer [-1]. *) -val minus_one : int8 - -(** [neg x] is [~-x]. *) -val neg : int8 -> int8 - -(** [add x y] is the addition [x + y]. *) -val add : int8 -> int8 -> int8 - -(** [sub x y] is the subtraction [x - y]. *) -val sub : int8 -> int8 -> int8 - -(** [mul x y] is the multiplication [x * y]. *) -val mul : int8 -> int8 -> int8 - -(** [div x y] is the division [x / y]. See {!Stdlib.( / )} for details. *) -val div : int8 -> int8 -> int8 - -(** [rem x y] is the remainder [x mod y]. See {!Stdlib.( mod )} for details. *) -val rem : int8 -> int8 -> int8 - -(** [succ x] is [add x 1]. *) -val succ : int8 -> int8 - -(** [pred x] is [sub x 1]. *) -val pred : int8 -> int8 - -(** [abs x] is the absolute value of [x]. That is [x] if [x] is positive - and [neg x] if [x] is negative. {b Warning.} This may be negative if - the argument is {!min_int}. *) -val abs : int8 -> int8 - -(** [max_int] is the greatest representable integer, - [2{^[8 - 1]} - 1]. *) -val max_int : int8 - -(** [min_int] is the smallest representable integer, - [-2{^[8 - 1]}]. *) -val min_int : int8 - -(** [logand x y] is the bitwise logical and of [x] and [y]. *) -val logand : int8 -> int8 -> int8 - -(** [logor x y] is the bitwise logical or of [x] and [y]. *) -val logor : int8 -> int8 -> int8 - -(** [logxor x y] is the bitwise logical exclusive or of [x] and [y]. *) -val logxor : int8 -> int8 -> int8 - -(** [lognot x] is the bitwise logical negation of [x]. *) -val lognot : int8 -> int8 - -(** [shift_left x n] shifts [x] to the left by [n] bits. The result - is unspecified if [n < 0] or [n > ]{!8}. *) -val shift_left : int8 -> int -> int8 - -(** [shift_right x n] shifts [x] to the right by [n] bits. This is an - arithmetic shift: the sign bit of [x] is replicated and inserted - in the vacated bits. The result is unspecified if [n < 0] or - [n > ]{!8}. *) -val shift_right : int8 -> int -> int8 - -(** [shift_right x n] shifts [x] to the right by [n] bits. This is a - logical shift: zeroes are inserted in the vacated bits regardless - of the sign of [x]. The result is unspecified if [n < 0] or - [n > ]{!8}. *) -val shift_right_logical : int8 -> int -> int8 - -(** {1:preds Predicates and comparisons} *) - -(** [equal x y] is [true] if and only if [x = y]. *) -val equal : int8 -> int8 -> bool - -(** [compare x y] is {!Stdlib.compare}[ x y] but more efficient. *) -val compare : int8 -> int8 -> int - -(** Return the smaller of the two arguments. *) -val min : int8 -> int8 -> int8 - -(** Return the greater of the two arguments. *) -val max : int8 -> int8 -> int8 - -(** {1:convert Converting} *) - -(** [to_int x] is [x] as an {!int}. *) -val to_int : int8 -> int - -(** [of_int x] represents [x] as an 8-bit integer. *) -val of_int : int -> int8 - -(** [to_float x] is [x] as a floating point number. *) -val to_float : int8 -> float - -(** [of_float x] truncates [x] to an integer. The result is - unspecified if the argument is [nan] or falls outside the range of - representable integers. *) -val of_float : float -> int8 +type t = int8 [@@immediate] -(** [to_string x] is the written representation of [x] in decimal. *) -val to_string : int8 -> string +(** @inline *) +include Int_wrapper.S with type t := int8 diff --git a/otherlibs/stdlib_beta/int_wrapper.ml b/otherlibs/stdlib_beta/int_wrapper.ml new file mode 100644 index 00000000000..baf31d5d07f --- /dev/null +++ b/otherlibs/stdlib_beta/int_wrapper.ml @@ -0,0 +1,321 @@ +(**************************************************************************) +(* *) +(* OCaml *) +(* *) +(* The OCaml programmers *) +(* Jacob Van Buren, Jane Street, New York *) +(* *) +(* Copyright 2018 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(* Copyright 2024 Jane Street Group LLC *) +(* *) +(* All rights reserved. This file is distributed under the terms of *) +(* the GNU Lesser General Public License version 2.1, with the *) +(* special exception on linking described in the file LICENSE. *) +(* *) +(**************************************************************************) + +[@@@ocaml.flambda_o3] + +include Stdlib.Int + +let int_size = Sys.int_size + +let[@inline available] of_int t = t + +let[@inline available] to_int t = t + +let[@inline available] unsigned_to_int t = t + +let[@inline available] unsigned_compare n m = + compare (sub n min_int) (sub m min_int) + +let[@inline] unsigned_lt n m = sub n min_int < sub m min_int + +(* Unsigned division from signed division of the same bitness. See Warren Jr., + Henry S. (2013). Hacker's Delight (2 ed.), Sec 9-3. *) +let[@inline available] unsigned_div n d = + if d < zero + then if unsigned_lt n d then zero else one + else + let q = shift_left (div (shift_right_logical n 1) d) 1 in + let r = sub n (mul q d) in + if unsigned_lt r d then q else succ q + +let[@inline available] unsigned_rem n d = + sub n (mul ((unsigned_div [@inlined]) n d) d) + +let seeded_hash seed x = Stdlib.Hashtbl.seeded_hash seed (x : int) + +let hash x = Stdlib.Hashtbl.hash (x : int) + +module type S = sig + (** Signed {n}-bit tagged integer values. + + These integers are {n} bits wide and use two's complement representation. + All operations are taken modulo 2{^n}. They do not fail on overflow. *) + + (** {1:ints n-bit Integers} *) + + (** The type for n-bit integer values. *) + type t + + (** The number of bits in an integer of type {!t}. *) + val int_size : int + + val zero : t + + val one : t + + val minus_one : t + + val neg : t -> t + + val add : t -> t -> t + + val sub : t -> t -> t + + val mul : t -> t -> t + + (** Integer division. This division rounds the real quotient of + its arguments towards zero, as specified for {!Stdlib.(/)}. + @raise Division_by_zero if the second argument is zero. *) + val div : t -> t -> t + + (** Same as {!div}, except that arguments and result are interpreted as {e + unsigned} integers. *) + val unsigned_div : t -> t -> t + + (** Integer remainder. If [y] is not zero, [rem x y = sub x (mul (div x y) + y)]. If [y] is zero, [rem x y] raises [Division_by_zero]. *) + val rem : t -> t -> t + + (** Same as {!rem}, except that arguments and result are interpreted as {e + unsigned} integers. *) + val unsigned_rem : t -> t -> t + + (** [succ x] is [add x 1]. *) + val succ : t -> t + + (** [pred x] is [sub x 1]. *) + val pred : t -> t + + (** [abs x] is the absolute value of [x]. That is [x] if [x] is positive and + [neg x] if [x] is negative. {b Warning.} This may be negative if the + argument is {!min_int}. *) + val abs : t -> t + + (** [max_int] is the greatest representable integer, + [2{^[int_size - 1]} - 1]. *) + val max_int : t + + (** [min_int] is the smallest representable integer, + [-2{^[int_size - 1]}]. *) + val min_int : t + + (** Bitwise logical and. *) + val logand : t -> t -> t + + (** Bitwise logical or. *) + val logor : t -> t -> t + + (** Bitwise logical exclusive or. *) + val logxor : t -> t -> t + + (** Bitwise logical negation. *) + val lognot : t -> t + + (** [shift_left x n] shifts [x] to the left by [n] bits. The result + is unspecified if [n < 0] or [n >= ]{!int_size}. *) + val shift_left : t -> int -> t + + (** [shift_right x n] shifts [x] to the right by [n] bits. This is an + arithmetic shift: the sign bit of [x] is replicated and inserted + in the vacated bits. The result is unspecified if [n < 0] or + [n >=]{!int_size}. *) + val shift_right : t -> int -> t + + (** [shift_right x n] shifts [x] to the right by [n] bits. This is a + logical shift: zeroes are inserted in the vacated bits regardless + of the sign of [x]. The result is unspecified if [n < 0] or + [n >=]{!int_size}. *) + val shift_right_logical : t -> int -> t + + (** {1:preds Predicates and comparisons} *) + + (** [equal x y] is [true] if and only if [x = y]. *) + val equal : t -> t -> bool + + (** [compare x y] is {!Stdlib.compare}[ x y] but more efficient. *) + val compare : t -> t -> int + + (** Same as {!compare}, except that arguments are interpreted as {e unsigned} integers. *) + val unsigned_compare : t -> t -> int + + (** Return the lesser of the two arguments. *) + val min : t -> t -> t + + (** Return the greater of the two arguments. *) + val max : t -> t -> t + + (** {1:convert Converting} *) + + (** [to_int x] is [x] as an {!int}. If [int_size > Sys.int_size], the topmost + bits will be lost in the conversion *) + val to_int : t -> int + + (** [of_int x] truncates the representation of [x] to fit in {!t}. *) + val of_int : int -> t + + (** Same as {!to_int}, but interprets the argument as an {e unsigned} integer. *) + val unsigned_to_int : t -> int + + (** [to_float x] is [x] as a floating point number. *) + val to_float : t -> float + + (** [of_float x] truncates [x] to an integer. The result is + unspecified if the argument is [nan] or falls outside the range of + representable integers. *) + val of_float : float -> t + + (** [to_string x] is the written representation of [x] in decimal. *) + val to_string : t -> string + + (** A seeded hash function for ints, with the same output value as + {!Hashtbl.seeded_hash}. This function allows this module to be passed as + argument to the functor {!Hashtbl.MakeSeeded}. *) + val seeded_hash : int -> t -> int + + (** An unseeded hash function for ints, with the same output value as + {!Hashtbl.hash}. This function allows this module to be passed as argument + to the functor {!Hashtbl.Make}. *) + val hash : t -> int +end + +module Make + (Container : S) (Spec : sig + type t + + val int_size : int + + val inject : t -> Container.t + + val unchecked_project : Container.t -> t + end) : S with type t := Spec.t = struct + include Spec + + let () = assert (0 < int_size && int_size <= Container.int_size) + + let unused_bits = Container.int_size - int_size + + let[@inline] sign_extend i = + unchecked_project + (Container.shift_right (Container.shift_left i unused_bits) unused_bits) + + let[@inline] project i = + let t = sign_extend i in + if Container.equal i (inject t) then Some t else None + + let[@inline] zero_extend t = + Container.shift_right_logical + (Container.shift_left (inject t) unused_bits) + unused_bits + + let zero = sign_extend Container.zero + + let one = sign_extend Container.one + + let minus_one = sign_extend Container.minus_one + + let[@inline available] neg x = sign_extend (Container.neg (inject x)) + + let[@inline available] add x y = + sign_extend (Container.add (inject x) (inject y)) + + let[@inline available] sub x y = + sign_extend (Container.sub (inject x) (inject y)) + + let[@inline available] mul x y = + sign_extend (Container.mul (inject x) (inject y)) + + let[@inline available] div x y = + sign_extend (Container.div (inject x) (inject y)) + + let[@inline available] rem x y = + sign_extend (Container.rem (inject x) (inject y)) + + let[@inline available] succ x = sign_extend (Container.succ (inject x)) + + let[@inline available] pred x = sign_extend (Container.pred (inject x)) + + let[@inline available] abs x = sign_extend (Container.abs (inject x)) + + let[@inline available] equal x y = Container.equal (inject x) (inject y) + + let[@inline available] compare x y = Container.compare (inject x) (inject y) + + (* since the values are stored sign-extended, we can skip the sign-extension + for bitwise operations as the sign bits will all be treated the same *) + + let[@inline available] logand x y = + unchecked_project (Container.logand (inject x) (inject y)) + + let[@inline available] logor x y = + unchecked_project (Container.logor (inject x) (inject y)) + + let[@inline available] logxor x y = + unchecked_project (Container.logxor (inject x) (inject y)) + + let[@inline available] lognot x = + unchecked_project (Container.lognot (inject x)) + + let[@inline available] shift_left x y = + sign_extend (Container.shift_left (inject x) y) + + let[@inline available] shift_right x y = + sign_extend (Container.shift_right (inject x) y) + + let[@inline available] shift_right_logical x y = + sign_extend (Container.shift_right_logical (zero_extend x) y) + + let max_int = shift_right_logical minus_one 1 + + let min_int = succ max_int + + let[@inline available] unsigned_compare n m = + Container.unsigned_compare (zero_extend n) (zero_extend m) + + (* Unsigned division from signed division of the same bitness. See Warren Jr., + Henry S. (2013). Hacker's Delight (2 ed.), Sec 9-3. *) + let[@inline available] unsigned_div n d = + sign_extend (Container.unsigned_div (zero_extend n) (zero_extend d)) + + let[@inline available] unsigned_rem n d = + sign_extend (Container.unsigned_rem (zero_extend n) (zero_extend d)) + + let[@inline available] min x y = + unchecked_project (Container.min (inject x) (inject y)) + + let[@inline available] max x y = + unchecked_project (Container.max (inject x) (inject y)) + + let[@inline available] of_float f = + Option.value (project (Container.of_float f)) ~default:zero + + let[@inline available] to_float t = Container.to_float (inject t) + + let[@inline available] to_string t = Container.to_string (inject t) + + let[@inline available] seeded_hash seed t = + Container.seeded_hash seed (inject t) + + let[@inline available] hash t = Container.hash (inject t) + + let[@inline available] to_int t = Container.to_int (inject t) + + let[@inline available] of_int i = sign_extend (Container.of_int i) + + let[@inline available] unsigned_to_int t = + Container.unsigned_to_int (zero_extend t) +end +[@@inline available] diff --git a/otherlibs/stdlib_beta/stdlib_beta.ml b/otherlibs/stdlib_beta/stdlib_beta.ml index d3ead8bb464..2ef32596265 100644 --- a/otherlibs/stdlib_beta/stdlib_beta.ml +++ b/otherlibs/stdlib_beta/stdlib_beta.ml @@ -2,3 +2,4 @@ module Int8 = Int8 module Int8_u = Int8_u module Int16 = Int16 module Int16_u = Int16_u +module Int_wrapper = Int_wrapper diff --git a/otherlibs/stdlib_beta/stdlib_beta.mli b/otherlibs/stdlib_beta/stdlib_beta.mli index d3ead8bb464..2ef32596265 100644 --- a/otherlibs/stdlib_beta/stdlib_beta.mli +++ b/otherlibs/stdlib_beta/stdlib_beta.mli @@ -2,3 +2,4 @@ module Int8 = Int8 module Int8_u = Int8_u module Int16 = Int16 module Int16_u = Int16_u +module Int_wrapper = Int_wrapper diff --git a/runtime/caml/osdeps.h b/runtime/caml/osdeps.h index 4f8007712ca..9728770bcbb 100644 --- a/runtime/caml/osdeps.h +++ b/runtime/caml/osdeps.h @@ -106,9 +106,9 @@ extern int caml_format_timestamp(char* buf, size_t sz, int formatted); /* Memory management platform-specific operations */ -void *caml_plat_mem_map(uintnat, int); -void *caml_plat_mem_commit(void *, uintnat); -void caml_plat_mem_decommit(void *, uintnat); +void *caml_plat_mem_map(uintnat, int, const char*); +void *caml_plat_mem_commit(void *, uintnat, const char*); +void caml_plat_mem_decommit(void *, uintnat, const char*); void caml_plat_mem_unmap(void *, uintnat); #ifdef _WIN32 diff --git a/runtime/caml/platform.h b/runtime/caml/platform.h index ab44885b296..4db2d19e901 100644 --- a/runtime/caml/platform.h +++ b/runtime/caml/platform.h @@ -434,9 +434,9 @@ uintnat caml_mem_round_up_mapping_size(uintnat size); caml_plat_pagesize. The size given to caml_mem_unmap and caml_mem_decommit must match the size given to caml_mem_map/caml_mem_commit for mem. */ -void* caml_mem_map(uintnat size, int reserve_only); -void* caml_mem_commit(void* mem, uintnat size); -void caml_mem_decommit(void* mem, uintnat size); +void* caml_mem_map(uintnat size, int reserve_only, const char* name); +void* caml_mem_commit(void* mem, uintnat size, const char* name); +void caml_mem_decommit(void* mem, uintnat size, const char* name); void caml_mem_unmap(void* mem, uintnat size); diff --git a/runtime/domain.c b/runtime/domain.c index 1a57defb07c..f5a667bce2b 100644 --- a/runtime/domain.c +++ b/runtime/domain.c @@ -464,7 +464,8 @@ static void free_minor_heap(void) { no race whereby other code could attempt to reuse the memory. */ caml_mem_decommit( (void*)domain_self->minor_heap_area_start, - Bsize_wsize(domain_state->minor_heap_wsz)); + Bsize_wsize(domain_state->minor_heap_wsz), + "minor reservation"); domain_state->young_start = NULL; domain_state->young_end = NULL; @@ -487,8 +488,10 @@ static int allocate_minor_heap(asize_t wsize) { caml_gc_log ("trying to allocate minor heap: %" ARCH_SIZET_PRINTF_FORMAT "uk words", wsize / 1024); + char name[32]; + snprintf(name, sizeof name, "minor heap %d", domain_self->id); if (!caml_mem_commit( - (void*)domain_self->minor_heap_area_start, Bsize_wsize(wsize))) { + (void*)domain_self->minor_heap_area_start, Bsize_wsize(wsize), name)) { return -1; } @@ -827,7 +830,7 @@ static void reserve_minor_heaps_from_stw_single(void) { minor_heap_reservation_bsize = minor_heap_max_bsz * Max_domains; /* reserve memory space for minor heaps */ - heaps_base = caml_mem_map(minor_heap_reservation_bsize, 1 /* reserve_only */); + heaps_base = caml_mem_map(minor_heap_reservation_bsize, 1 /* reserve_only */, "minor reservation"); if (heaps_base == NULL) caml_fatal_error("Not enough heap memory to reserve minor heaps"); diff --git a/runtime/fiber.c b/runtime/fiber.c index 72e8cd9b07d..9ff5091a6e3 100644 --- a/runtime/fiber.c +++ b/runtime/fiber.c @@ -20,6 +20,7 @@ #include "caml/config.h" #include +#include #ifdef HAS_UNISTD #include #endif @@ -44,6 +45,11 @@ #if defined(USE_MMAP_MAP_STACK) || !defined(STACK_CHECKS_ENABLED) #include #endif +#ifdef __linux__ +/* for gettid */ +#include +#include +#endif #ifdef DEBUG #define fiber_debug_log(...) caml_gc_log(__VA_ARGS__) @@ -183,7 +189,15 @@ Caml_inline struct stack_info* alloc_for_stack (mlsize_t wosize) // 2Mb (= extra_size) // -------------------- <- [stack], returned from [mmap], page-aligned char* stack; - stack = caml_mem_map(len + stack_extra_size_for_mmap, 0); +#ifdef __linux__ + /* On Linux, record the current TID in the mapping name */ + char mapping_name[64]; + snprintf(mapping_name, sizeof mapping_name, + "stack (tid %ld)", (long)syscall(SYS_gettid)); +#else + const char* mapping_name = "stack"; +#endif + stack = caml_mem_map(len + stack_extra_size_for_mmap, 0, mapping_name); if (stack == MAP_FAILED) { return NULL; } diff --git a/runtime/gc_ctrl.c b/runtime/gc_ctrl.c index 3dfd0d274d9..87f4d7cb8ac 100644 --- a/runtime/gc_ctrl.c +++ b/runtime/gc_ctrl.c @@ -132,9 +132,13 @@ CAMLprim value caml_gc_get(value v) res = caml_alloc_tuple (11); Store_field (res, 0, Val_long (Caml_state->minor_heap_wsz)); /* s */ + Store_field (res, 1, Val_long (0)); Store_field (res, 2, Val_long (caml_percent_free)); /* o */ Store_field (res, 3, Val_long (atomic_load_relaxed(&caml_verb_gc))); /* v */ + Store_field (res, 4, Val_long (0)); Store_field (res, 5, Val_long (caml_max_stack_wsize)); /* l */ + Store_field (res, 6, Val_long (0)); + Store_field (res, 7, Val_long (0)); Store_field (res, 8, Val_long (caml_custom_major_ratio)); /* M */ Store_field (res, 9, Val_long (caml_custom_minor_ratio)); /* m */ Store_field (res, 10, Val_long (caml_custom_minor_max_bsz)); /* n */ diff --git a/runtime/platform.c b/runtime/platform.c index 1e541d1354f..4dbacae1345 100644 --- a/runtime/platform.c +++ b/runtime/platform.c @@ -400,9 +400,9 @@ uintnat caml_mem_round_up_mapping_size(uintnat size) #define Is_page_aligned(size) ((size & (caml_plat_pagesize - 1)) == 0) -void* caml_mem_map(uintnat size, int reserve_only) +void* caml_mem_map(uintnat size, int reserve_only, const char* name) { - void* mem = caml_plat_mem_map(size, reserve_only); + void* mem = caml_plat_mem_map(size, reserve_only, name); if (mem == 0) { caml_gc_message(0x1000, "mmap %" ARCH_INTNAT_PRINTF_FORMAT "d bytes failed", @@ -411,32 +411,32 @@ void* caml_mem_map(uintnat size, int reserve_only) } caml_gc_message(0x1000, "mmap %" ARCH_INTNAT_PRINTF_FORMAT "d" - " bytes at %p for heaps\n", size, mem); + " bytes at %p for %s\n", size, mem, name); return mem; } -void* caml_mem_commit(void* mem, uintnat size) +void* caml_mem_commit(void* mem, uintnat size, const char* name) { CAMLassert(Is_page_aligned(size)); caml_gc_message(0x1000, "commit %" ARCH_INTNAT_PRINTF_FORMAT "d" - " bytes at %p for heaps\n", size, mem); - return caml_plat_mem_commit(mem, size); + " bytes at %p for %s\n", size, mem, name); + return caml_plat_mem_commit(mem, size, name); } -void caml_mem_decommit(void* mem, uintnat size) +void caml_mem_decommit(void* mem, uintnat size, const char* name) { if (size) { caml_gc_message(0x1000, "decommit %" ARCH_INTNAT_PRINTF_FORMAT "d" - " bytes at %p for heaps\n", size, mem); - caml_plat_mem_decommit(mem, size); + " bytes at %p for %s\n", size, mem, name); + caml_plat_mem_decommit(mem, size, name); } } void caml_mem_unmap(void* mem, uintnat size) { caml_gc_message(0x1000, "munmap %" ARCH_INTNAT_PRINTF_FORMAT "d" - " bytes at %p for heaps\n", size, mem); + " bytes at %p\n", size, mem); caml_plat_mem_unmap(mem, size); } diff --git a/runtime/shared_heap.c b/runtime/shared_heap.c index db0199723a5..85c25e5d0a6 100644 --- a/runtime/shared_heap.c +++ b/runtime/shared_heap.c @@ -75,6 +75,7 @@ static struct { /* Mapped but not yet active pools */ uintnat fresh_pools; char* next_fresh_pool; + uintnat next_chunk_index; /* Count of all pools in use across all domains and the global lists below. @@ -93,6 +94,7 @@ static struct { 0, NULL, 0, + 0, { 0, }, { NULL, }, { NULL, }, @@ -210,8 +212,11 @@ static pool* pool_acquire(struct caml_heap_state* local) { uintnat mapping_size = caml_mem_round_up_mapping_size(Bsize_wsize(POOL_WSIZE) * new_pools); new_pools = mapping_size / Bsize_wsize(POOL_WSIZE); - - void* mem = caml_mem_map(mapping_size, 0); + uintnat chunk_ix = pool_freelist.next_chunk_index++; + char mapping_name[64]; + snprintf(mapping_name, sizeof mapping_name, + "major heap (chunk %lu)", (unsigned long)chunk_ix); + void* mem = caml_mem_map(mapping_size, 0, mapping_name); if (mem) { pool_freelist.fresh_pools = new_pools; pool_freelist.next_fresh_pool = mem; diff --git a/runtime/unix.c b/runtime/unix.c index 546eb25efc8..a940f223e07 100644 --- a/runtime/unix.c +++ b/runtime/unix.c @@ -61,6 +61,9 @@ #ifdef HAS_SYS_MMAN_H #include #endif +#ifdef __linux__ +#include +#endif #include "caml/fail.h" #include "caml/memory.h" #include "caml/misc.h" @@ -508,7 +511,31 @@ void caml_init_os_params(void) #ifndef __CYGWIN__ -void *caml_plat_mem_map(uintnat size, int reserve_only) +static void* mmap_named(void* addr, size_t length, int prot, int flags, + int fd, off_t offset, const char* name) +{ + void* p = mmap(addr, length, prot, flags, fd, offset); +#ifdef __linux__ + if (p != MAP_FAILED) { + /* On Linux, use PR_SET_VMA_ANON_NAME to name the allocation */ + char buf[80]; + snprintf(buf, sizeof buf, "OCaml: %s", name); + /* The constants PR_SET_VMA and PR_SET_VMA_ANON_NAME are stable + (part of the Linux kernel ABI), but may not be provided by the + libc headers (e.g. a newer kernel with an older userspace). + So, it's more portable to hardcode these numbers */ + enum { PR_SET_VMA_ = 0x53564d41, PR_SET_VMA_ANON_NAME_ = 0 }; + prctl(PR_SET_VMA_, PR_SET_VMA_ANON_NAME_, + (unsigned long)p, length, (unsigned long)buf); + /* No error checking or reporting here: it's a best-effort tool + for debugging, and may fail if e.g. this prctl is not supported + on this kernel version. */ + } +#endif + return p; +} + +void *caml_plat_mem_map(uintnat size, int reserve_only, const char* name) { void* mem; int prot = reserve_only ? PROT_NONE : (PROT_READ | PROT_WRITE); @@ -518,7 +545,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only) if (size < alignment || alignment < caml_plat_pagesize) { /* Short mapping or unknown/bad hugepagesize. Either way, not worth bothering with alignment. */ - mem = mmap(0, size, prot, flags, -1, 0); + mem = mmap_named(0, size, prot, flags, -1, 0, name); if (mem == MAP_FAILED) mem = NULL; return mem; } @@ -526,7 +553,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only) /* Sensible kernels (on Linux, that means >= 6.7) will always provide aligned mappings. To avoid penalising such kernels, try mapping the exact desired size first and see if it happens to be aligned. */ - mem = mmap(0, size, prot, flags, -1, 0); + mem = mmap_named(0, size, prot, flags, -1, 0, name); if (mem == MAP_FAILED) return NULL; if ((((uintnat)mem) & (alignment - 1)) == 0) return mem; @@ -536,7 +563,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only) munmap(mem, size); /* Allocate a longer region than needed and trim it afterwards */ - mem = mmap(0, size + alignment, prot, flags, -1, 0); + mem = mmap_named(0, size + alignment, prot, flags, -1, 0, name); if (mem == MAP_FAILED) return NULL; uintnat aligned = ((uintnat)mem + alignment) & ~(alignment - 1); @@ -546,10 +573,10 @@ void *caml_plat_mem_map(uintnat size, int reserve_only) return (void*)aligned; } -static void* map_fixed(void* mem, uintnat size, int prot) +static void* map_fixed(void* mem, uintnat size, int prot, const char* name) { - if (mmap(mem, size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - -1, 0) == MAP_FAILED) { + if (mmap_named(mem, size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + -1, 0, name) == MAP_FAILED) { return 0; } else { return mem; @@ -564,7 +591,7 @@ static void* map_fixed(void* mem, uintnat size, int prot) done using mprotect, since Cygwin's mmap doesn't implement the required functions for committing using mmap. */ -void *caml_plat_mem_map(uintnat size, int reserve_only) +void *caml_plat_mem_map(uintnat size, int reserve_only, const char* name) { void* mem; @@ -576,7 +603,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only) return mem; } -static void* map_fixed(void* mem, uintnat size, int prot) +static void* map_fixed(void* mem, uintnat size, int prot, const char* name) { if (mprotect(mem, size, prot) != 0) { return 0; @@ -587,9 +614,9 @@ static void* map_fixed(void* mem, uintnat size, int prot) #endif /* !__CYGWIN__ */ -void* caml_plat_mem_commit(void* mem, uintnat size) +void* caml_plat_mem_commit(void* mem, uintnat size, const char* name) { - void* p = map_fixed(mem, size, PROT_READ | PROT_WRITE); + void* p = map_fixed(mem, size, PROT_READ | PROT_WRITE, name); /* FIXME: On Linux, it might be useful to populate page tables with MAP_POPULATE to reduce the time spent blocking on page faults at @@ -598,9 +625,9 @@ void* caml_plat_mem_commit(void* mem, uintnat size) return p; } -void caml_plat_mem_decommit(void* mem, uintnat size) +void caml_plat_mem_decommit(void* mem, uintnat size, const char* name) { - map_fixed(mem, size, PROT_NONE); + map_fixed(mem, size, PROT_NONE, name); } void caml_plat_mem_unmap(void* mem, uintnat size) diff --git a/stdlib/gc.mli b/stdlib/gc.mli index 9037008e99e..5cb8a66e738 100644 --- a/stdlib/gc.mli +++ b/stdlib/gc.mli @@ -45,7 +45,7 @@ type stat = heap_chunks : int; (** Number of contiguous pieces of memory that make up the major heap. - This metrics is currently not available in OCaml 5: the field value is + This metric is currently not available in OCaml 5: the field value is always [0]. *) live_words : int; @@ -74,12 +74,12 @@ type stat = free_blocks : int; (** Number of blocks in the free list. - This metrics is currently not available in OCaml 5: the field value is + This metric is currently not available in OCaml 5: the field value is always [0]. *) largest_free : int; (** Size (in words) of the largest block in the free list. - This metrics is currently not available in OCaml 5: the field value + This metric is currently not available in OCaml 5: the field value is always [0]. *) fragments : int; @@ -95,7 +95,7 @@ type stat = stack_size: int; (** Current size of the stack, in words. - This metrics is currently not available in OCaml 5: the field value is + This metric is currently not available in OCaml 5: the field value is always [0]. @since 3.12 *) @@ -127,7 +127,9 @@ type control = number is less than or equal to 1000, it is a percentage of the current heap size (i.e. setting it to 100 will double the heap size at each increase). If it is more than 1000, it is a fixed - number of words that will be added to the heap. Default: 15. *) + number of words that will be added to the heap. Default: 15. + This metric is currently not available in OCaml 5: the field value is + always [0]. *) space_overhead : int; (** The major GC speed is computed from this parameter. @@ -164,7 +166,9 @@ type control = If [max_overhead >= 1000000], compaction is never triggered. On runtime4, if compaction is permanently disabled, it is strongly suggested to set [allocation_policy] to 2. - Default: 500. *) + Default: 500. + This metric is currently not available in OCaml 5: the field value is + always [0]. *) stack_limit : int; (** The maximum size of the fiber stacks (in words). @@ -217,6 +221,9 @@ type control = Default: 2. + This metric is currently not available in OCaml 5: the field value is + always [0]. + ---------------------------------------------------------------- @since 3.11 *) @@ -226,6 +233,8 @@ type control = out variations in its workload. This is an integer between 1 and 50. Default: 1. + This metric is currently not available in OCaml 5: the field value is + always [0]. @since 4.03 *) custom_major_ratio : int; diff --git a/testsuite/tests/capsule-api/condition.ml b/testsuite/tests/capsule-api/condition.ml new file mode 100644 index 00000000000..1f7e33cafed --- /dev/null +++ b/testsuite/tests/capsule-api/condition.ml @@ -0,0 +1,53 @@ +(* TEST + include stdlib_alpha; + flags = "-extension-universe alpha"; + runtime5; + { bytecode; } + { native; } +*) + +[@@@ocaml.alert "-unsafe_parallelism"] + +module Capsule = Stdlib_alpha.Capsule + +external ref : 'a -> 'a ref @@ portable = "%makemutable" +external ( ! ) : 'a ref -> 'a @@ portable = "%field0" +external ( := ) : 'a ref -> 'a -> unit @@ portable = "%setfield0" + +let () = (* Signal *) + let (P mut) = Capsule.create_with_mutex () in + let cond = Capsule.Condition.create () in + let go = Capsule.Data.create (fun () -> ref true) in + let wait = Atomic.make true in + let domain = Domain.spawn (fun () -> + Capsule.Mutex.with_lock mut (fun password -> + Atomic.set wait false; + while Capsule.Data.extract password (fun go : bool -> !go) go do + Capsule.Condition.wait cond mut password + done)) + in + while Atomic.get wait do () done; + Capsule.Mutex.with_lock mut (fun password -> + Capsule.Data.iter password (fun go -> go := false) go; + Capsule.Condition.signal cond); + Domain.join domain +;; + +let () = (* Broadcast *) + let (P mut) = Capsule.create_with_mutex () in + let cond = Capsule.Condition.create () in + let go = Capsule.Data.create (fun () -> ref true) in + let ready = Atomic.make 0 in + let domains = List.init 4 (fun _ -> Domain.spawn (fun () -> + Capsule.Mutex.with_lock mut (fun password -> + Atomic.incr ready; + while Capsule.Data.extract password (fun go : bool -> !go) go do + Capsule.Condition.wait cond mut password + done))) + in + while Atomic.get ready < 4 do () done; + Capsule.Mutex.with_lock mut (fun password -> + Capsule.Data.iter password (fun go -> go := false) go; + Capsule.Condition.broadcast cond); + List.iter Domain.join domains +;; diff --git a/testsuite/tests/capsule-api/data.ml b/testsuite/tests/capsule-api/data.ml index 15368fd1921..24a9e5f58ce 100644 --- a/testsuite/tests/capsule-api/data.ml +++ b/testsuite/tests/capsule-api/data.ml @@ -176,3 +176,31 @@ let ptr' : (int, lost_capsule) Capsule.Data.t = let () = assert (Capsule.Data.project ptr' = 111) ;; + + +(* [protect]. *) +exception Exn of string + +let () = + match Capsule.protect (fun () -> "ok") with + | s -> assert (s = "ok") + | exception _ -> assert false +;; + +let () = + match Capsule.protect (fun () -> Exn "ok") with + | Exn s -> assert (s = "ok") + | _ -> assert false +;; + +let () = + match Capsule.protect (fun () -> reraise (Exn "fail")) with + | exception (Capsule.Protected (mut, exn)) -> + let s = Capsule.Mutex.with_lock mut (fun password -> + Capsule.Data.extract password (fun exn -> + match exn with + | Exn s -> s + | _ -> assert false) exn) in + assert (s = "fail") + | _ -> assert false +;; diff --git a/testsuite/tests/lib-smallint/.ocamlformat-enable b/testsuite/tests/lib-smallint/.ocamlformat-enable index a0b681cc67c..6ae035c5dad 100644 --- a/testsuite/tests/lib-smallint/.ocamlformat-enable +++ b/testsuite/tests/lib-smallint/.ocamlformat-enable @@ -1,3 +1,2 @@ -test_int8.ml -test_int16.ml +test_smallint.ml diff --git a/testsuite/tests/lib-smallint/test_int16.ml b/testsuite/tests/lib-smallint/test_int16.ml index c2eb1503a03..767bde7d724 100644 --- a/testsuite/tests/lib-smallint/test_int16.ml +++ b/testsuite/tests/lib-smallint/test_int16.ml @@ -1,143 +1,11 @@ -(* TEST include stdlib_beta; flags = "-extension small_numbers_beta"; *) - -module Int16 = Stdlib_beta.Int16 - -let int_size = Int16.int_size - -let () = assert (0 < int_size && int_size < Sys.int_size) - -let max_int = (1 lsl (int_size - 1)) - 1 - -let min_int = lnot max_int - -let mask = (1 lsl int_size) - 1 - -let to_int x : int = - let i : int = Int16.to_int x in - assert (Obj.repr i == Obj.repr x); - assert (min_int <= i && i <= max_int); - i - -let of_int (i : int) = - let x = Int16.of_int i in - assert (to_int x land mask == i land mask); - x - -let rng = Random.State.make [| int_size |] - -(** sparse test cases, concentrated around 0 and the endpoints *) -let test_cases = - let is_even = 1 - (int_size land 1) in - List.init (int_size - is_even) (fun size -> - let bit = 1 lsl size in - let rand () = - Random.State.int_in_range rng ~min:(bit lsr 1) ~max:(bit - 1) - in - [rand (); lnot (rand ()); max_int - rand (); lnot (max_int - rand ())]) - |> List.concat |> List.sort Int.compare - -let test1 f = ListLabels.iter test_cases ~f - -let test2 f = test1 (fun x -> test1 (fun y -> f x y)) - -let test_round_trip () = - let test hi lo = - let hi = hi lsl int_size in - assert (lo == to_int (of_int (hi lxor lo))) - in - test2 (fun hi lo -> - (* generate test cases with different hi bits *) - test hi lo; - test (Random.bits ()) lo) - -let equal_arith x i = x == of_int i - -let equal_logical x i = to_int x == i - -let same_float x y = Int64.equal (Int64.bits_of_float x) (Int64.bits_of_float y) - -let assert_equal equal x y = - let x = try Ok (x ()) with exn -> Error exn in - let y = try Ok (y ()) with exn -> Error exn in - match x, y with - | Ok x, Ok y -> assert (equal x y) - | Error exn, Error exn' -> assert (exn = exn') - | Ok _, Error exn | Error exn, Ok _ -> raise exn - -let test_conv1 int16_f int_f ~equal = - test1 (fun x -> - assert_equal equal (fun () -> int16_f (of_int x)) (fun () -> int_f x)) - -let test_conv2 int16_f int_f ~equal = - test2 (fun x y -> - assert_equal equal - (fun () -> int16_f (of_int x) (of_int y)) - (fun () -> int_f x y)) - -let test_arith1 = test_conv1 ~equal:equal_arith - -let test_arith2 = test_conv2 ~equal:equal_arith - -let test_logical1 = test_conv1 ~equal:equal_logical - -let test_logical2 = test_conv2 ~equal:equal_logical - -let reference_shift_right_logical x i = - (* we need to ensure that we shift in zero bytes, which is incompatible with - sign-extension *) - Int.shift_right_logical (if i > 0 then x land mask else x) i +(* TEST + include stdlib_beta; + modules = "test_smallint.ml"; + flags = "-extension small_numbers_beta"; +*) let () = - test_round_trip (); - assert (to_int Int16.zero == Int.zero); - assert (to_int Int16.one == Int.one); - assert (to_int Int16.minus_one == Int.minus_one); - test_arith2 Int16.add Int.add; - test_arith2 Int16.sub Int.sub; - test_arith2 Int16.mul Int.mul; - test_arith2 Int16.div Int.div; - test_arith2 Int16.rem Int.rem; - test_arith1 Int16.succ Int.succ; - test_arith1 Int16.pred Int.pred; - test_arith1 Int16.abs Int.abs; - test_logical2 Int16.logand Int.logand; - test_logical2 Int16.logor Int.logor; - test_logical2 Int16.logxor Int.logxor; - test_logical1 Int16.lognot Int.lognot; - for shift = 0 to int_size do - let apply_shift f x = f x shift in - test_logical1 (apply_shift Int16.shift_right) (apply_shift Int.shift_right); - test_conv1 - (apply_shift Int16.shift_right_logical) - (apply_shift reference_shift_right_logical) - ~equal:equal_logical; - test_conv1 - (apply_shift Int16.shift_left) - (apply_shift Int.shift_left) - ~equal:(if shift = 0 then equal_logical else equal_arith) - done; - test_conv2 Int16.equal Int.equal ~equal:Bool.equal; - test_conv2 Int16.compare Int.compare ~equal:Int.equal; - test_conv1 Int16.to_float Int.to_float ~equal:same_float; - assert (Int16.of_float (-0.) = Int16.zero); - test1 (fun x -> - let f = Int.to_float x in - assert (equal_logical (Int16.of_float f) x)); - test1 (fun x -> - (* test fractional values round toward zero *) - let f = Int.to_float x in - let f' = - let almost_one = Random.State.float rng (Float.pred 1.0) in - if x < 0 - then f -. almost_one - else if x > 0 - then f +. almost_one - else if Random.State.bool rng - then almost_one - else -.almost_one - in - assert (equal_logical (Int16.of_float f') x)); - test1 (fun x -> assert (Int16.to_string (of_int x) = Int.to_string x)); - test_logical2 Int16.min Int.min; - test_logical2 Int16.max Int.max; - () + Test_smallint.run + (module Stdlib_beta.Int16) + ~min_int:(-0x8000) + ~max_int:0x7fff diff --git a/testsuite/tests/lib-smallint/test_int8.ml b/testsuite/tests/lib-smallint/test_int8.ml index 785bc9e30ad..77b08a5274b 100644 --- a/testsuite/tests/lib-smallint/test_int8.ml +++ b/testsuite/tests/lib-smallint/test_int8.ml @@ -1,143 +1,11 @@ -(* TEST include stdlib_beta; flags = "-extension small_numbers_beta"; *) - -module Int8 = Stdlib_beta.Int8 - -let int_size = Int8.int_size - -let () = assert (0 < int_size && int_size < Sys.int_size) - -let max_int = (1 lsl (int_size - 1)) - 1 - -let min_int = lnot max_int - -let mask = (1 lsl int_size) - 1 - -let to_int x : int = - let i : int = Int8.to_int x in - assert (Obj.repr i == Obj.repr x); - assert (min_int <= i && i <= max_int); - i - -let of_int (i : int) = - let x = Int8.of_int i in - assert (to_int x land mask == i land mask); - x - -let rng = Random.State.make [| int_size |] - -(** sparse test cases, concentrated around 0 and the endpoints *) -let test_cases = - let is_even = 1 - (int_size land 1) in - List.init (int_size - is_even) (fun size -> - let bit = 1 lsl size in - let rand () = - Random.State.int_in_range rng ~min:(bit lsr 1) ~max:(bit - 1) - in - [rand (); lnot (rand ()); max_int - rand (); lnot (max_int - rand ())]) - |> List.concat |> List.sort Int.compare - -let test1 f = ListLabels.iter test_cases ~f - -let test2 f = test1 (fun x -> test1 (fun y -> f x y)) - -let test_round_trip () = - let test hi lo = - let hi = hi lsl int_size in - assert (lo == to_int (of_int (hi lxor lo))) - in - test2 (fun hi lo -> - (* generate test cases with different hi bits *) - test hi lo; - test (Random.bits ()) lo) - -let equal_arith x i = x == of_int i - -let equal_logical x i = to_int x == i - -let same_float x y = Int64.equal (Int64.bits_of_float x) (Int64.bits_of_float y) - -let assert_equal equal x y = - let x = try Ok (x ()) with exn -> Error exn in - let y = try Ok (y ()) with exn -> Error exn in - match x, y with - | Ok x, Ok y -> assert (equal x y) - | Error exn, Error exn' -> assert (exn = exn') - | Ok _, Error exn | Error exn, Ok _ -> raise exn - -let test_conv1 int8_f int_f ~equal = - test1 (fun x -> - assert_equal equal (fun () -> int8_f (of_int x)) (fun () -> int_f x)) - -let test_conv2 int8_f int_f ~equal = - test2 (fun x y -> - assert_equal equal - (fun () -> int8_f (of_int x) (of_int y)) - (fun () -> int_f x y)) - -let test_arith1 = test_conv1 ~equal:equal_arith - -let test_arith2 = test_conv2 ~equal:equal_arith - -let test_logical1 = test_conv1 ~equal:equal_logical - -let test_logical2 = test_conv2 ~equal:equal_logical - -let reference_shift_right_logical x i = - (* we need to ensure that we shift in zero bytes, which is incompatible with - sign-extension *) - Int.shift_right_logical (if i > 0 then x land mask else x) i +(* TEST + include stdlib_beta; + modules = "test_smallint.ml"; + flags = "-extension small_numbers_beta"; +*) let () = - test_round_trip (); - assert (to_int Int8.zero == Int.zero); - assert (to_int Int8.one == Int.one); - assert (to_int Int8.minus_one == Int.minus_one); - test_arith2 Int8.add Int.add; - test_arith2 Int8.sub Int.sub; - test_arith2 Int8.mul Int.mul; - test_arith2 Int8.div Int.div; - test_arith2 Int8.rem Int.rem; - test_arith1 Int8.succ Int.succ; - test_arith1 Int8.pred Int.pred; - test_arith1 Int8.abs Int.abs; - test_logical2 Int8.logand Int.logand; - test_logical2 Int8.logor Int.logor; - test_logical2 Int8.logxor Int.logxor; - test_logical1 Int8.lognot Int.lognot; - for shift = 0 to int_size do - let apply_shift f x = f x shift in - test_logical1 (apply_shift Int8.shift_right) (apply_shift Int.shift_right); - test_conv1 - (apply_shift Int8.shift_right_logical) - (apply_shift reference_shift_right_logical) - ~equal:equal_logical; - test_conv1 - (apply_shift Int8.shift_left) - (apply_shift Int.shift_left) - ~equal:(if shift = 0 then equal_logical else equal_arith) - done; - test_conv2 Int8.equal Int.equal ~equal:Bool.equal; - test_conv2 Int8.compare Int.compare ~equal:Int.equal; - test_conv1 Int8.to_float Int.to_float ~equal:same_float; - assert (Int8.of_float (-0.) = Int8.zero); - test1 (fun x -> - let f = Int.to_float x in - assert (equal_logical (Int8.of_float f) x)); - test1 (fun x -> - (* test fractional values round toward zero *) - let f = Int.to_float x in - let f' = - let almost_one = Random.State.float rng (Float.pred 1.0) in - if x < 0 - then f -. almost_one - else if x > 0 - then f +. almost_one - else if Random.State.bool rng - then almost_one - else -.almost_one - in - assert (equal_logical (Int8.of_float f') x)); - test1 (fun x -> assert (Int8.to_string (of_int x) = Int.to_string x)); - test_logical2 Int8.min Int.min; - test_logical2 Int8.max Int.max; - () + Test_smallint.run + (module Stdlib_beta.Int8) + ~min_int:(-0x80) + ~max_int:0x7f diff --git a/testsuite/tests/lib-smallint/test_smallint.ml b/testsuite/tests/lib-smallint/test_smallint.ml new file mode 100644 index 00000000000..ec2b1a4cb64 --- /dev/null +++ b/testsuite/tests/lib-smallint/test_smallint.ml @@ -0,0 +1,156 @@ +module Int = Stdlib_beta.Int_wrapper + +let same_float x y = Int64.equal (Int64.bits_of_float x) (Int64.bits_of_float y) + +let phys_same x y = Obj.repr x == Obj.repr y + +(** generates a random float that rounds toward zero to the same integer value *) +let nudge rng f = + let f_pos = Float.abs f in + if not (Float.is_finite f) + then f + else if f_pos < 1. + then Random.State.float rng (Float.pred 1.0) + else + let lo = Float.floor f_pos in + let hi = Float.pred (lo +. 1.) in + if not (lo < hi) + then f + else + (* the mantissa is the low bits, and we are only generating normal + fractional values so we never need to change the exponent *) + let lo = Int64.bits_of_float lo in + let hi = Int64.bits_of_float hi in + assert (Int64.shift_right lo 52 = Int64.shift_right hi 52); + Float.copy_sign + (Int64.float_of_bits (Random.State.int64_in_range rng ~min:lo ~max:hi)) + f + +let run (module Smallint : Int.S) ~min_int ~max_int = + let int_size = Smallint.int_size in + assert (0 < int_size && int_size <= Sys.int_size); + assert (max_int = (1 lsl (int_size - 1)) - 1); + assert (min_int = lnot max_int); + let mask = (1 lsl int_size) - 1 in + let to_int x : int = + let i : int = Smallint.to_int x in + assert (phys_same i x); + assert (min_int <= i && i <= max_int); + i + in + let of_int (i : int) = + let x = Smallint.of_int i in + if not (to_int x land mask = i land mask) + then + failwith + (Printf.sprintf "%x (%x) <> %x (%x)" + (to_int x land mask) + (to_int x) (i land mask) i); + x + in + let rng = Random.State.make [| int_size |] in + let test_cases = + (* sparse test cases, concentrated around 0 and the endpoints *) + List.init (int_size - 1) (fun size -> + let bit = 1 lsl size in + let rand () = + Random.State.int_in_range rng ~min:(bit lsr 1) ~max:(bit - 1) + in + [rand (); lnot (rand ()); max_int - rand (); lnot (max_int - rand ())]) + |> List.concat |> List.sort Int.compare + in + let special_floats = Float.[infinity; nan; neg_infinity; epsilon; -0.; 0.] in + let test1 f = ListLabels.iter test_cases ~f in + let test2 f = test1 (fun x -> test1 (fun y -> f x y)) in + let test_round_trip () = + let test hi lo = + let hi = hi lsl int_size in + assert (phys_same lo (to_int (of_int (hi lxor lo)))) + in + test2 (fun hi lo -> + (* generate test cases with different hi bits *) + test hi lo; + test (Random.bits ()) lo) + in + let equal_arith x i = phys_same x (of_int i) in + let equal_logical x i = to_int x == i in + let assert_equal equal x y = + let x = try Ok (x ()) with exn -> Error exn in + let y = try Ok (y ()) with exn -> Error exn in + match x, y with + | Ok x, Ok y -> assert (equal x y) + | Error exn, Error exn' -> assert (exn = exn') + | Ok _, Error exn | Error exn, Ok _ -> raise exn + in + let test_conv1 int16_f int_f ~equal = + test1 (fun x -> + assert_equal equal (fun () -> int16_f (of_int x)) (fun () -> int_f x)) + in + let test_conv2 ?(unsigned = false) int16_f int_f ~equal = + test2 (fun x y -> + assert_equal equal + (fun () -> int16_f (of_int x) (of_int y)) + (fun () -> + if unsigned then int_f (x land mask) (y land mask) else int_f x y)) + in + let test_arith1 = test_conv1 ~equal:equal_arith in + let test_arith2 ?__LINE__ = test_conv2 ~equal:equal_arith in + let test_logical1 = test_conv1 ~equal:equal_logical in + let test_logical2 = test_conv2 ~equal:equal_logical in + let reference_shift_right_logical x i = + (* we need to ensure that we shift in zero bytes, which is incompatible with + sign-extension *) + Int.shift_right_logical (if i > 0 then x land mask else x) i + in + test_round_trip (); + assert (phys_same Smallint.zero Int.zero); + assert (phys_same Smallint.one Int.one); + assert (phys_same Smallint.minus_one Int.minus_one); + assert (phys_same Smallint.max_int max_int); + assert (phys_same Smallint.min_int min_int); + assert (phys_same Smallint.(abs min_int) Smallint.min_int); + assert (phys_same (Smallint.succ Smallint.max_int) Smallint.min_int); + assert (phys_same (Smallint.pred Smallint.min_int) Smallint.max_int); + test_arith2 Smallint.add Int.add; + test_arith2 Smallint.sub Int.sub; + test_arith2 Smallint.mul Int.mul; + test_arith2 Smallint.div Int.div; + test_arith2 Smallint.unsigned_div Int.unsigned_div ~unsigned:true; + test_arith2 Smallint.unsigned_rem Int.unsigned_rem ~unsigned:true; + test_arith2 Smallint.rem Int.rem; + test_arith1 Smallint.succ Int.succ; + test_arith1 Smallint.pred Int.pred; + test_arith1 Smallint.abs Int.abs; + test_arith1 Smallint.neg Int.neg; + test_logical2 Smallint.logand Int.logand; + test_logical2 Smallint.logor Int.logor; + test_logical2 Smallint.logxor Int.logxor; + test_logical1 Smallint.lognot Int.lognot; + for shift = 0 to int_size - 1 do + let apply_shift f x = f x shift in + test_logical1 + (apply_shift Smallint.shift_right) + (apply_shift Int.shift_right); + test_logical1 + (apply_shift Smallint.shift_right_logical) + (apply_shift reference_shift_right_logical); + test_conv1 + (apply_shift Smallint.shift_left) + (apply_shift Int.shift_left) + ~equal:(if shift = 0 then equal_logical else equal_arith) + done; + test_conv2 Smallint.equal Int.equal ~equal:Bool.equal; + test_conv2 Smallint.compare Int.compare ~equal:Int.equal; + test_conv2 Smallint.unsigned_compare Int.unsigned_compare ~equal:Int.equal; + test_conv1 Smallint.to_float Int.to_float ~equal:same_float; + ListLabels.iter + (special_floats @ List.map Int.to_float test_cases) + ~f:(fun f -> assert (equal_arith (Smallint.of_float f) (Int.of_float f))); + test1 (fun x -> + (* test that fractional values round toward zero *) + let f = nudge rng (Int.to_float x) in + assert (equal_logical (Smallint.of_float f) x)); + test1 (fun x -> assert (Smallint.to_string (of_int x) = Int.to_string x)); + test_logical2 Smallint.min Int.min; + test_logical2 Smallint.max Int.max; + () diff --git a/testsuite/tests/parsetree/source_jane_street.ml b/testsuite/tests/parsetree/source_jane_street.ml index c6010930211..55d134d925f 100644 --- a/testsuite/tests/parsetree/source_jane_street.ml +++ b/testsuite/tests/parsetree/source_jane_street.ml @@ -24,10 +24,10 @@ let f (type (a : immediate) (b : immediate)) (x : a) = x;; let f (type (a : immediate) (b : immediate) c) (x : a) = x;; [%%expect{| -val f : ('a : immediate). 'a -> 'a @@ global many = -val f : ('a : immediate). 'a -> 'a @@ global many = -val f : ('a : immediate). 'a -> 'a @@ global many = -val f : ('a : immediate). 'a -> 'a @@ global many = +val f : ('a : immediate). 'a -> 'a = +val f : ('a : immediate). 'a -> 'a = +val f : ('a : immediate). 'a -> 'a = +val f : ('a : immediate). 'a -> 'a = |}] let f y (type a : immediate) (x : a) = x;; @@ -35,12 +35,9 @@ let f y (type (a : immediate)) (x : a) = x;; let f y (type (a : immediate) (b : immediate)) (x : a) = x;; [%%expect{| -val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a @@ global many = - -val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a @@ global many = - -val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a @@ global many = - +val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a = +val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a = +val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a = |}] let f y (type a : immediate) = y;; @@ -49,10 +46,10 @@ let f y (type (a : immediate) (b : immediate)) = y;; let f y (type (a : immediate) (b : immediate) c) = y;; [%%expect{| -val f : ('a : value_or_null). 'a -> 'a @@ global many = -val f : ('a : value_or_null). 'a -> 'a @@ global many = -val f : ('a : value_or_null). 'a -> 'a @@ global many = -val f : ('a : value_or_null). 'a -> 'a @@ global many = +val f : ('a : value_or_null). 'a -> 'a = +val f : ('a : value_or_null). 'a -> 'a = +val f : ('a : value_or_null). 'a -> 'a = +val f : ('a : value_or_null). 'a -> 'a = |}] (* Just newtypes, no value parameters *) @@ -61,7 +58,7 @@ let f (type a : immediate) (type b : immediate) = ();; [%%expect{| -val f : unit @@ global many = () +val f : unit = () |}] module type S_for_layouts = sig @@ -94,10 +91,10 @@ let f : (unit as (_ : immediate)) -> unit = fun () -> () [%%expect{| type ('a : immediate) for_layouts = 'a -val f : ('a : float64) ('b : bits64). 'a -> 'b -> 'a @@ global many = -val f : ('a : float64). 'a -> 'a @@ global many = -val f : (('a : float64). 'a -> 'a) -> int @@ global many = -val f : unit -> unit @@ global many = +val f : ('a : float64) ('b : bits64). 'a -> 'b -> 'a = +val f : ('a : float64). 'a -> 'a = +val f : (('a : float64). 'a -> 'a) -> int = +val f : unit -> unit = |}] type (_ : any) ignore = K1 @@ -160,8 +157,7 @@ let f xs = match xs with [%%expect{| type t = #(int * float#) -val f : ('a : value_or_null). #('a * float#) -> #('a * float#) @@ global many = - +val f : ('a : value_or_null). #('a * float#) -> #('a * float#) = |}] module M = struct @@ -171,7 +167,7 @@ let x () = #( M.Null, M.This "hi" ) [%%expect{| module M : sig type 'a t = 'a or_null = Null | This of 'a end -val x : unit -> #('a M.t * string M.t) @@ global many = +val x : unit -> #('a M.t * string M.t) = |}] external id : ('a : any). 'a -> 'a = "%identity" [@@layout_poly] @@ -189,21 +185,21 @@ let nums = [| x for x = 0 to 100 |];; let nums = [ x for x = 0 to 100 ];; [%%expect{| -val nums : int iarray @@ global many = +val nums : int iarray = [:0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68; 69; 70; 71; 72; 73; 74; 75; 76; 77; 78; 79; 80; 81; 82; 83; 84; 85; 86; 87; 88; 89; 90; 91; 92; 93; 94; 95; 96; 97; 98; 99; 100:] -val nums : int array @@ global many = +val nums : int array = [|0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68; 69; 70; 71; 72; 73; 74; 75; 76; 77; 78; 79; 80; 81; 82; 83; 84; 85; 86; 87; 88; 89; 90; 91; 92; 93; 94; 95; 96; 97; 98; 99; 100|] -val nums : int list @@ global many = +val nums : int list = [0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 54; 55; 56; @@ -218,9 +214,9 @@ let nums = [| x for x in [| 1; 2; 3 |] |];; let nums = [ x for x in [ 1; 2; 3 ] ];; [%%expect{| -val nums : int iarray @@ global many = [:1; 2; 3:] -val nums : int array @@ global many = [|1; 2; 3|] -val nums : int list @@ global many = [1; 2; 3] +val nums : int iarray = [:1; 2; 3:] +val nums : int array = [|1; 2; 3|] +val nums : int list = [1; 2; 3] |}] (* complex comprehension *) @@ -251,10 +247,10 @@ let nums = [| n, m, x |];; [%%expect{| -val nums : int iarray @@ global many = [:7; 7; 8; 7; 8; 9:] -val nums : int array @@ global many = [|7; 7; 8; 7; 8; 9|] -val nums : int list @@ global many = [7; 7; 8; 7; 8; 9] -val nums : (int * int * int) array @@ global many = +val nums : int iarray = [:7; 7; 8; 7; 8; 9:] +val nums : int array = [|7; 7; 8; 7; 8; 9|] +val nums : int list = [7; 7; 8; 7; 8; 9] +val nums : (int * int * int) array = [|(1, 5, 6); (1, 5, 2); (1, 2, 3); (1, 2, 3); (2, 4, 2); (2, 1, 3); (3, 3, 4); (3, 3, 2); (3, 0, 1); (3, 0, 3); (4, 5, 3); (4, 2, 2); (5, 4, 5); (5, 4, 3); (5, 1, 2); (5, 1, 2); (6, 3, 3); (6, 0, 2); @@ -279,7 +275,7 @@ let nums = ([(x[@test.attr1]) for (x[@test.attr2]) in ([][@test.attr3])] [@test.attr4]);; [%%expect{| -val nums : 'a list @@ global many = [] +val nums : 'a list = [] |}] (*********) @@ -296,8 +292,8 @@ val f : local_ 'a @ unique -> y:local_ 'b @ once -> z:'c @ once unique -> - ?foo:local_ int @ once unique -> ?bar:local_ int -> unit -> unit - @@ global many = + ?foo:local_ int @ once unique -> ?bar:local_ int -> unit -> unit = + |}] (* bindings *) @@ -366,7 +362,7 @@ Line 12, characters 6-9: ^^^ Warning 26 [unused-var]: unused variable foo. -val g : unit -> unit @@ global many = +val g : unit -> unit = |}] (* expressions *) @@ -386,7 +382,7 @@ Line 3, characters 6-7: ^ Warning 26 [unused-var]: unused variable f. -val g : unit -> local_ unit @ once @@ global many = +val g : unit -> local_ unit @ once = |}] (* types *) @@ -463,16 +459,16 @@ val f : local_ 'd -> local_ 'b * string * (string -> string) * ('e -> 'e) * 'c * string * string * int array * string * (int -> local_ (int -> int)) * - (int -> local_ (int -> int)) @ contended - @@ global many = + (int -> local_ (int -> int)) @ contended = + |}] let f1 (_ @ local) = () let f2 () = let x @ local = [1; 2; 3] in f1 x [@nontail] [%%expect{| -val f1 : ('a : value_or_null). local_ 'a -> unit @@ global many = -val f2 : unit -> unit @@ global many = +val f1 : ('a : value_or_null). local_ 'a -> unit = +val f2 : unit -> unit = |}] module type S = sig @@ portable contended @@ -544,8 +540,7 @@ Error: This value escapes its region. let f2 (x @ local) (f @ once) : t2 = exclave_ { x; f } [%%expect{| -val f2 : local_ float -> (float -> float) @ once -> local_ t2 @ once @@ - global many = +val f2 : local_ float -> (float -> float) @ once -> local_ t2 @ once = |}] @@ -766,7 +761,7 @@ let f x = | _ -> assert false;; [%%expect{| -val f : 'a iarray -> 'a iarray @@ global many = +val f : 'a iarray -> 'a iarray = |}] (******************) @@ -782,25 +777,25 @@ let (x : (x:int * y:int)) = (~x:1, ~y:2) let (x : ((x:int * y:int) [@test.attr])) = (~x:1, ~y:2) [%%expect{| -val z : int @@ global many = 4 -val punned : int @@ global many = 5 -val x_must_be_even : ('a : value_or_null) ('b : value_or_null). 'a -> 'b @@ - global many = +val z : int = 4 +val punned : int = 5 +val x_must_be_even : ('a : value_or_null) ('b : value_or_null). 'a -> 'b = + exception Odd -val x : x:int * y:int @@ global many = (~x:1, ~y:2) -val x : x:int * y:int @@ global many = (~x:1, ~y:2) +val x : x:int * y:int = (~x:1, ~y:2) +val x : x:int * y:int = (~x:1, ~y:2) - : x:int * int * z:int * punned:int = (~x:5, 2, ~z:4, ~punned:5) -val x : x:int * y:int @@ global many = (~x:1, ~y:2) -val x : x:int * y:int @@ global many = (~x:1, ~y:2) +val x : x:int * y:int = (~x:1, ~y:2) +val x : x:int * y:int = (~x:1, ~y:2) |}] let (~x:x0, ~s, ~(y:int), ..) : (x:int * s:string * y:int * string) = (~x: 1, ~s: "a", ~y: 2, "ignore me") [%%expect{| -val x0 : int @@ global many portable = 1 -val s : string @@ global many portable = "a" -val y : int @@ global many portable = 2 +val x0 : int @@ portable = 1 +val s : string @@ portable = "a" +val y : int @@ portable = 2 |}] module M : sig @@ -837,14 +832,14 @@ let f ((~(x:int),y) : (x:int * int)) : int = x + y [%%expect{| val foo : ('a : value_or_null) ('b : value_or_null). - 'a -> (unit -> 'b) -> (unit -> 'b) -> 'b - @@ global many = -val x : int @@ global many portable = 1 -val y : int @@ global many = 2 -val x : int @@ global many portable = 1 -val y : int @@ global many = 2 -val f : (foo:int * bar:int) -> int @@ global many = -val f : (x:int * int) -> int @@ global many = + 'a -> (unit -> 'b) -> (unit -> 'b) -> 'b = + +val x : int @@ portable = 1 +val y : int = 2 +val x : int @@ portable = 1 +val y : int = 2 +val f : (foo:int * bar:int) -> int = +val f : (x:int * int) -> int = |}] type xy = (x:int * y:int) @@ -862,9 +857,9 @@ let matches = [%%expect{| type xy = x:int * y:int -val lt : x:int * y:int * x:int * int @@ global many = (~x:1, ~y:2, ~x:3, 4) -val matches : int @@ global many = 1 -val matches : int * int @@ global many = (1, 2) +val lt : x:int * y:int * x:int * int = (~x:1, ~y:2, ~x:3, 4) +val matches : int = 1 +val matches : int * int = (1, 2) |}] (********************) @@ -892,10 +887,10 @@ let test_nativeint s f = Format.printf "%s: %s\n" s (Nativeint_u.to_string f); Format.print_flush () [%%expect{| -val test_float : string -> Float_u.t -> unit @@ global many = -val test_int32 : string -> Int32_u.t -> unit @@ global many = -val test_int64 : string -> Int64_u.t -> unit @@ global many = -val test_nativeint : string -> Nativeint_u.t -> unit @@ global many = +val test_float : string -> Float_u.t -> unit = +val test_int32 : string -> Int32_u.t -> unit = +val test_int64 : string -> Int64_u.t -> unit = +val test_nativeint : string -> Nativeint_u.t -> unit = |}] (* Expressions *) @@ -924,45 +919,45 @@ let x = test_int64 "forty_two_in_binary" (#0b101010L) [%%expect{| e: 2.718282 -val x : unit @@ global many = () +val x : unit = () negative_one_half: -0.500000 -val x : unit @@ global many = () +val x : unit = () negative_one_half: -0.500000 -val x : unit @@ global many = () +val x : unit = () negative_one_half: -0.500000 -val x : unit @@ global many = () +val x : unit = () negative_one_half: -0.500000 -val x : unit @@ global many = () +val x : unit = () positive_one_dot: 1.000000 -val x : unit @@ global many = () +val x : unit = () positive_one_dot: 1.000000 -val x : unit @@ global many = () +val x : unit = () positive_one_dot: 1.000000 -val x : unit @@ global many = () +val x : unit = () positive_one_dot: 1.000000 -val x : unit @@ global many = () +val x : unit = () one_billion: 1000000000.000000 -val x : unit @@ global many = () +val x : unit = () one_twenty_seven_point_two_five_in_floating_hex: 127.250000 -val x : unit @@ global many = () +val x : unit = () five_point_three_seven_five_in_floating_hexponent: 5.375000 -val x : unit @@ global many = () +val x : unit = () zero: 0 -val x : unit @@ global many = () +val x : unit = () positive_one: 1 -val x : unit @@ global many = () +val x : unit = () positive_one: 1 -val x : unit @@ global many = () +val x : unit = () negative_one: -1 -val x : unit @@ global many = () +val x : unit = () negative_one: -1 -val x : unit @@ global many = () +val x : unit = () two_fifty_five_in_hex: 255 -val x : unit @@ global many = () +val x : unit = () twenty_five_in_octal: 25 -val x : unit @@ global many = () +val x : unit = () forty_two_in_binary: 42 -val x : unit @@ global many = () +val x : unit = () |}] (* Patterns *) @@ -987,9 +982,9 @@ let f x = ;; [%%expect{| -val f : float# -> [> `Five | `Four | `Other ] @@ global many = -val x : unit @@ global many = () -val f : float# -> float# @@ global many = +val f : float# -> [> `Five | `Four | `Other ] = +val x : unit = () +val f : float# -> float# = |}] ;; test_float "result" (f #7.);; @@ -1020,10 +1015,10 @@ let f x = [%%expect{| result: 7.000000 - : unit = () -val f : float# -> float# @@ global many = +val f : float# -> float# = larger match result: 3.000000 - : unit = () -val f : int64# -> [> `Five | `Four | `Other ] @@ global many = +val f : int64# -> [> `Five | `Four | `Other ] = |}] let x = @@ -1041,8 +1036,8 @@ let f x = test_int64 "result" (f #7L);; [%%expect{| -val x : unit @@ global many = () -val f : int64# -> int64# @@ global many = +val x : unit = () +val f : int64# -> int64# = result: 7 - : unit = () |}] @@ -1056,9 +1051,9 @@ let #{ data = payload; _ } = #{ data = "payload" ; i = 0 } let inc r = #{ r with i = r.#i + 1 } [%%expect{| type 'a with_idx = #{ data : 'a; i : int; } -val idx : 'a with_idx -> int @@ global many = -val payload : string @@ global many = "payload" -val inc : 'a with_idx -> 'a with_idx @@ global many = +val idx : 'a with_idx -> int = +val payload : string = "payload" +val inc : 'a with_idx -> 'a with_idx = |}] (***************) @@ -1088,9 +1083,10 @@ end supported. *) [%%expect{| ->> Fatal error: XXX unimplemented -Uncaught exception: Misc.Fatal_error - +Line 11, characters 17-39: +11 | type 'a list : immutable_data with 'a + ^^^^^^^^^^^^^^^^^^^^^^ +Error: Unimplemented kind syntax |}] (**************************) @@ -1102,7 +1098,7 @@ let f (_ : 'a. 'a -> 'a) = () [%%expect{| type t = ('a. 'a -> 'a) -> int -val f : ('a. 'a -> 'a) -> unit @@ global many = +val f : ('a. 'a -> 'a) -> unit = |}] (************************) @@ -1140,8 +1136,8 @@ let x () = #3.14s [%%expect{| type t1 = float32 type t2 = float32# -val x : float32 @@ global many = 3.1400001s -val x : unit -> float32# @@ global many = +val x : float32 = 3.1400001s +val x : unit -> float32# = |}] (********) diff --git a/testsuite/tests/parsetree/test_ppx.compilers.reference b/testsuite/tests/parsetree/test_ppx.compilers.reference index adf2b0368aa..055bcefb550 100644 --- a/testsuite/tests/parsetree/test_ppx.compilers.reference +++ b/testsuite/tests/parsetree/test_ppx.compilers.reference @@ -1,5 +1,5 @@ -File "source_jane_street.ml", line 156, characters 0-24: -156 | type t = #(int * float#) +File "source_jane_street.ml", line 153, characters 0-24: +153 | type t = #(int * float#) ^^^^^^^^^^^^^^^^^^^^^^^^ Error: Multiple definition of the type name "t". Names must be unique in a given structure or signature. diff --git a/testsuite/tests/typing-gadts/existential_as_pattern.ml b/testsuite/tests/typing-gadts/existential_as_pattern.ml new file mode 100644 index 00000000000..d371cb9b380 --- /dev/null +++ b/testsuite/tests/typing-gadts/existential_as_pattern.ml @@ -0,0 +1,65 @@ +(* TEST + expect; +*) + +(** Test that as-patterns let us re-specialize the type of a constructor packing an existential *) + +(* No payload *) +type 'a t = + | Left : [> `Left ] t + | Right : [> `Right ] t +[%%expect {| +type 'a t = Left : [> `Left ] t | Right : [> `Right ] t +|}] + +let left : [ `Left | `Right ] t -> [ `Left ] t = function + | Left as t -> t + | Right -> assert false +[%%expect {| +val left : [ `Left | `Right ] t -> [ `Left ] t = +|}] + +(* Concrete payload *) +type ('a, 'e) t = + | Left : 'e -> ([> `Left ], 'e) t + | Right : 'e -> ([> `Right ], 'e) t +[%%expect {| +type ('a, 'e) t = + Left : 'e -> ([> `Left ], 'e) t + | Right : 'e -> ([> `Right ], 'e) t +|}] + +let left : ([ `Left | `Right ], 'e) t -> ([ `Left ], 'e) t = function + | Left _ as t -> t + | Right _ -> assert false +[%%expect {| +val left : ([ `Left | `Right ], 'e) t -> ([ `Left ], 'e) t = +|}] + +(* Pack payload *) +type 'a t2 = P : ('a, 'e) t -> 'a t2 [@@unboxed] +[%%expect {| +type 'a t2 = P : ('a, 'e) t -> 'a t2 [@@unboxed] +|}] + +let left : [ `Left | `Right ] t2 -> [ `Left ] t2 = function + | P (Left _ as t) -> P t + | P (Right _) -> assert false +[%%expect {| +val left : [ `Left | `Right ] t2 -> [ `Left ] t2 = +|}] + +(* Existential payload - equivalent to packed concrete payload *) +type 'a t = + | Left : 'e -> [> `Left ] t + | Right : 'e -> [> `Right ] t +[%%expect {| +type 'a t = Left : 'e -> [> `Left ] t | Right : 'e -> [> `Right ] t +|}] + +let left : [ `Left | `Right ] t -> [ `Left ] t = function + | Left _ as t -> t + | Right _ -> assert false +[%%expect {| +val left : [ `Left | `Right ] t -> [ `Left ] t = +|}] diff --git a/testsuite/tests/typing-layouts-products/exhaustiveness.ml b/testsuite/tests/typing-layouts-products/exhaustiveness.ml new file mode 100644 index 00000000000..289088f5ea6 --- /dev/null +++ b/testsuite/tests/typing-layouts-products/exhaustiveness.ml @@ -0,0 +1,18 @@ +(* TEST + flags = "-w +8"; + expect; +*) + +(* This is a regression test. The example below used to give an exhaustiveness + warning because we forgot a case in [Parmatch.simple_match]. *) + +type t = A | B + +let f t t' = + match #(t,t') with + | #(A, _) -> true + | #(B, _) -> false +[%%expect{| +type t = A | B +val f : t -> 'a -> bool = +|}] diff --git a/testsuite/tests/typing-layouts-unboxed-records/exhaustiveness.ml b/testsuite/tests/typing-layouts-unboxed-records/exhaustiveness.ml new file mode 100644 index 00000000000..55307a90abf --- /dev/null +++ b/testsuite/tests/typing-layouts-unboxed-records/exhaustiveness.ml @@ -0,0 +1,38 @@ +(* TEST + flags = "-w +8 -extension layouts_beta"; + expect; +*) + +(* This is a regression test. The example below used to give an exhaustiveness + warning because we forgot a case in [Parmatch.simple_match]. *) + +type t = A | B +type r = #{ x : t; y : t } + +let f t t' = + match #{ x = t; y = t' } with + | #{ x = A; y = _ } -> true + | #{ x = B; y = _ } -> false +[%%expect{| +type t = A | B +type r = #{ x : t; y : t; } +val f : t -> t -> bool = +|}] + +(* This is a regression test. The example below used to give + #{y=A; _ } as a counterexample instead of #{y=A; x=B}. *) +let g t t' = + match #{ x = t; y = t' } with + | #{ x = A; _ } -> true + | #{ y = B; _ } -> false +[%%expect{| +Lines 2-4, characters 2-26: +2 | ..match #{ x = t; y = t' } with +3 | | #{ x = A; _ } -> true +4 | | #{ y = B; _ } -> false +Warning 8 [partial-match]: this pattern-matching is not exhaustive. +Here is an example of a case that is not matched: +#{y=A; x=B} + +val g : t -> t -> bool = +|}] diff --git a/testsuite/tests/typing-layouts/jkinds.ml b/testsuite/tests/typing-layouts/jkinds.ml index a210a685095..eec92625b42 100644 --- a/testsuite/tests/typing-layouts/jkinds.ml +++ b/testsuite/tests/typing-layouts/jkinds.ml @@ -9,33 +9,37 @@ type 'a list : immutable_data with 'a [%%expect{| ->> Fatal error: XXX unimplemented -Uncaught exception: Misc.Fatal_error - +Line 1, characters 15-37: +1 | type 'a list : immutable_data with 'a + ^^^^^^^^^^^^^^^^^^^^^^ +Error: Unimplemented kind syntax |}] type ('a, 'b) either : immutable_data with 'a * 'b [%%expect{| ->> Fatal error: XXX unimplemented -Uncaught exception: Misc.Fatal_error - +Line 1, characters 23-50: +1 | type ('a, 'b) either : immutable_data with 'a * 'b + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Error: Unimplemented kind syntax |}] type 'a gel : kind_of_ 'a mod global [%%expect{| ->> Fatal error: XXX unimplemented -Uncaught exception: Misc.Fatal_error - +Line 1, characters 14-25: +1 | type 'a gel : kind_of_ 'a mod global + ^^^^^^^^^^^ +Error: Unimplemented kind syntax |}] type 'a t : _ [%%expect{| ->> Fatal error: XXX unimplemented -Uncaught exception: Misc.Fatal_error - +Line 1, characters 12-13: +1 | type 'a t : _ + ^ +Error: Unimplemented kind syntax |}] kind_abbrev_ immediate = value mod global unique many sync uncontended @@ -82,9 +86,10 @@ module type S = sig end [%%expect{| ->> Fatal error: XXX unimplemented -Uncaught exception: Misc.Fatal_error - +Line 2, characters 17-39: +2 | type 'a list : immutable_data with 'a + ^^^^^^^^^^^^^^^^^^^^^^ +Error: Unimplemented kind syntax |}] (**************************************) diff --git a/testsuite/tests/typing-modes/val_modalities.ml b/testsuite/tests/typing-modes/val_modalities.ml index 645f058e348..e105ca35d73 100644 --- a/testsuite/tests/typing-modes/val_modalities.ml +++ b/testsuite/tests/typing-modes/val_modalities.ml @@ -14,17 +14,17 @@ type r = { let uncontended_use (_ @ uncontended) = () [%%expect{| type r = { mutable x : string; } -val uncontended_use : 'a -> unit @@ global many = +val uncontended_use : 'a -> unit = |}] let share_use : 'a -> unit @@ portable = fun _ -> () [%%expect{| -val share_use : 'a -> unit @@ global many = +val share_use : 'a -> unit = |}] let (portable_use @ portable) (_ @ portable) = () [%%expect{| -val portable_use : 'a @ portable -> unit @@ global many = +val portable_use : 'a @ portable -> unit = |}] (* The compiler building itself is a comprehensive test of legacy modules/values. @@ -34,7 +34,7 @@ module M = struct let foo = {x = "hello"} end [%%expect{| -module M : sig val foo : r @@ global many end +module M : sig val foo : r end |}] module type S = sig @@ -62,7 +62,7 @@ module M = struct let x @ contended = "hello" end [%%expect{| -module M : sig val x : string @@ global many portable contended end +module M : sig val x : string @@ portable contended end |}] (* Testing the defaulting behaviour. @@ -90,76 +90,72 @@ Error: This value is "nonportable" but expected to be "portable". module Module_type_of_monadic = struct module M = struct - let x @ uncontended = "hello" + let x @ uncontended = ref "hello" end module M' : module type of M = M (* for monadic axes, we try to push to the id = join_with_min. The original modality is pushed to floor. *) module M' : module type of M = struct - let x @ contended = "hello" + let x @ contended = ref "hello" end end [%%expect{| Lines 8-10, characters 35-7: 8 | ...................................struct - 9 | let x @ contended = "hello" + 9 | let x @ contended = ref "hello" 10 | end Error: Signature mismatch: Modules do not match: - sig val x : string @@ global many portable contended end + sig val x : string ref @@ contended end is not included in - sig val x : string end + sig val x : string ref end Values do not match: - val x : string @@ global many portable contended + val x : string ref @@ contended is not included in - val x : string + val x : string ref The second is uncontended and the first is contended. |}] module Module_type_nested = struct module M = struct - let x @ contended portable = "hello" + let x @ portable = fun t -> t module N = struct - let y @ uncontended portable = "world" + let y @ uncontended = ref "hello" end end module M' : module type of M = struct - let x = "hello" + let x @ nonportable = fun t -> t module N = struct - let y @ contended = "hello" + let y @ contended = ref "hello" end end end [%%expect{| Lines 8-13, characters 35-7: 8 | ...................................struct - 9 | let x = "hello" + 9 | let x @ nonportable = fun t -> t 10 | module N = struct -11 | let y @ contended = "hello" +11 | let y @ contended = ref "hello" 12 | end 13 | end Error: Signature mismatch: Modules do not match: sig - val x : string @@ global many portable - module N : - sig val y : string @@ global many portable contended end + val x : 'a -> 'a + module N : sig val y : string ref @@ contended end end is not included in - sig - val x : string @@ contended - module N : sig val y : string end - end + sig val x : 'a -> 'a module N : sig val y : string ref end end In module "N": Modules do not match: - sig val y : string @@ global many portable contended end + sig val y : string ref @@ contended end is not included in - sig val y : string end + sig val y : string ref end In module "N": Values do not match: - val y : string @@ global many portable contended + val y : string ref @@ contended is not included in - val y : string + val y : string ref The second is uncontended and the first is contended. |}] @@ -176,7 +172,7 @@ module Without_inclusion = struct end [%%expect{| module Without_inclusion : - sig module M : sig val x : 'a -> 'a @@ global many portable end end + sig module M : sig val x : 'a -> 'a @@ portable end end |}] module Without_inclusion = struct @@ -194,25 +190,25 @@ Error: This value is "nonportable" but expected to be "portable". module Inclusion_fail = struct module M : sig - val x : string @@ uncontended + val x : string ref @@ uncontended end = struct - let x @ contended = "hello" + let x @ contended = ref "hello" end end [%%expect{| Lines 4-6, characters 10-7: 4 | ..........struct -5 | let x @ contended = "hello" +5 | let x @ contended = ref "hello" 6 | end Error: Signature mismatch: Modules do not match: - sig val x : string @@ global many portable contended end + sig val x : string ref @@ contended end is not included in - sig val x : string end + sig val x : string ref end Values do not match: - val x : string @@ global many portable contended + val x : string ref @@ contended is not included in - val x : string + val x : string ref The second is uncontended and the first is contended. |}] @@ -270,8 +266,8 @@ end [%%expect{| module Close_over_value : sig - module M : sig val x : string @@ global many portable end - val foo : unit -> unit @@ global many portable + module M : sig val x : string @@ portable end + val foo : unit -> unit @@ portable end |}] @@ -441,7 +437,7 @@ Lines 13-19, characters 6-3: Error: Signature mismatch: Modules do not match: sig - module Plain : sig val f : int -> int @@ global many end + module Plain : sig val f : int -> int end module type S_plain = sig module M : sig val f : int -> int end end end @@ -453,12 +449,12 @@ Error: Signature mismatch: end In module "Plain": Modules do not match: - sig val f : int -> int @@ global many end + sig val f : int -> int end is not included in sig val f : int -> int @@ portable end In module "Plain": Values do not match: - val f : int -> int @@ global many + val f : int -> int is not included in val f : int -> int @@ portable The second is portable and the first is nonportable. @@ -495,32 +491,65 @@ end module M : sig module N : sig val foo : 'a -> 'a @@ global many end end |}] -(* CR zqian: inclusion check should cross modes, if we are comparing modes. *) +(* inclusion check should cross modes, if we are comparing modes (instead of + modalities) *) +module M : sig + val foo : int @@ portable uncontended +end = struct + let foo @ nonportable contended = 42 +end +[%%expect{| +module M : sig val foo : int @@ portable end +|}] + +(* The RHS type (expected type) is used for mode crossing. The following still +passes because types are substituted. *) module M : sig - module N : sig val foo : int @@ portable end + type t + val t : t @@ portable uncontended end = struct - module N = struct let foo @ nonportable = 42 end + type t = int + let t @ nonportable contended = 42 +end +[%%expect{| +module M : sig type t val t : t @@ portable end +|}] + +(* LHS type is a subtype of RHS type, which means more type-level information. +That doesn't matter for mode crossing for most cases, except for poly variants. +The following examples seem to suggest that we should use LHS type for mode +crossing, but I couldn't find examples to really demonstrate that. *) +module M : sig + val t : [`Foo | `Bar] @@ portable uncontended +end = struct + let t @ nonportable contended = `Foo +end +[%%expect{| +module M : sig val t : [ `Bar | `Foo ] @@ portable end +|}] + +module M : sig + val t : [`Foo | `Bar of 'a -> 'a | `Baz of string ref] @@ portable uncontended +end = struct + let t @ nonportable contended = `Foo end [%%expect{| Lines 3-5, characters 6-3: 3 | ......struct -4 | module N = struct let foo @ nonportable = 42 end +4 | let t @ nonportable contended = `Foo 5 | end Error: Signature mismatch: Modules do not match: - sig module N : sig val foo : int @@ global many end end + sig val t : [> `Foo ] @@ contended end is not included in - sig module N : sig val foo : int @@ portable end end - In module "N": - Modules do not match: - sig val foo : int @@ global many end - is not included in - sig val foo : int @@ portable end - In module "N": + sig + val t : [ `Bar of 'a -> 'a | `Baz of string ref | `Foo ] @@ + portable + end Values do not match: - val foo : int @@ global many + val t : [> `Foo ] @@ contended is not included in - val foo : int @@ portable + val t : [ `Bar of 'a -> 'a | `Baz of string ref | `Foo ] @@ portable The second is portable and the first is nonportable. |}] @@ -586,7 +615,7 @@ let f (x : (module S)) = (x : (module S) :> (module S')) [%%expect{| module type S = sig val foo : 'a -> 'a @@ global many end module type S' = sig val foo : 'a -> 'a end -val f : (module S) -> (module S') @@ global many = +val f : (module S) -> (module S') = |}] let f (x : (module S')) = (x : (module S') :> (module S)) @@ -825,8 +854,8 @@ module M_portable = struct let f @ portable = fun () -> () end [%%expect{| -module M_nonportable : sig val f : unit -> unit @@ global many end -module M_portable : sig val f : unit -> unit @@ global many portable end +module M_nonportable : sig val f : unit -> unit end +module M_portable : sig val f : unit -> unit @@ portable end |}] let (foo @ portable) () = @@ -846,7 +875,7 @@ let (_foo @ portable) () = () [%%expect{| -val _foo : unit -> unit @@ global many = +val _foo : unit -> unit = |}] let () = diff --git a/testsuite/tests/typing-small-numbers/test_beta.ml b/testsuite/tests/typing-small-numbers/test_beta.ml index 7ff80a115dc..d20ba0991d8 100644 --- a/testsuite/tests/typing-small-numbers/test_beta.ml +++ b/testsuite/tests/typing-small-numbers/test_beta.ml @@ -199,17 +199,13 @@ type t = int16;; type t = int16 |}];; -let i16 = Stdlib_beta.Int16.minus_one;; +let i8 = Stdlib_beta.Int8.minus_one;; [%%expect{| -val i16 : int16 = -1 +val i8 : int8 = -1 |}];; -let () = - match #0.0s with -| #0.0s -> () -| _ -> () -;; +let i16 = Stdlib_beta.Int16.minus_one;; [%%expect{| -Line 1: -Error: float32 literal patterns are not supported. +val i16 : int16 = -1 |}];; + diff --git a/testsuite/tests/typing-unique/overwriting.ml b/testsuite/tests/typing-unique/overwriting.ml index a653ad412c8..d81535c442a 100644 --- a/testsuite/tests/typing-unique/overwriting.ml +++ b/testsuite/tests/typing-unique/overwriting.ml @@ -42,7 +42,7 @@ let overwrite_shared (r : record_update) = let x = overwrite_ r with { x = "foo" } in x.x [%%expect{| -val id : ('a : value_or_null). 'a -> 'a @@ global many = +val id : ('a : value_or_null). 'a -> 'a = Line 5, characters 21-22: 5 | let x = overwrite_ r with { x = "foo" } in ^ @@ -727,7 +727,7 @@ let guards_good = function end | OptionB _ -> false [%%expect{| -val is_option_a : options -> bool @@ global many = +val is_option_a : options -> bool = Line 8, characters 30-59: 8 | | Some s when is_option_a (overwrite_ v with OptionA s) -> true ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -807,8 +807,7 @@ let mutable_field_aliased r = [%%expect{| type 'a mutable_record = { mutable m : 'a; } val mutable_field_aliased : - options mutable_record @ unique -> options mutable_record * options @@ - global many = + options mutable_record @ unique -> options mutable_record * options = |}] let mutable_field_aliased r = diff --git a/testsuite/tests/typing-unique/overwriting_lift_constants.ml b/testsuite/tests/typing-unique/overwriting_lift_constants.ml index 2018d2ed4af..27a1bc6a030 100644 --- a/testsuite/tests/typing-unique/overwriting_lift_constants.ml +++ b/testsuite/tests/typing-unique/overwriting_lift_constants.ml @@ -73,7 +73,7 @@ let () = [%%expect{| type point = { mutable dim : int; x : float; y : float; z : float; } -val unsafe_dup : '_a @ unique -> '_a * '_a @ unique @@ global many = +val unsafe_dup : '_a @ unique -> '_a * '_a @ unique = Line 9, characters 10-66: 9 | let p = overwrite_ p with { dim = 4; x = 1.0; y = 2.0; z = 3.0 } in ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml b/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml index e48b22188f3..5167c0bea10 100644 --- a/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml +++ b/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml @@ -19,15 +19,14 @@ let aliased_use x = x [%%expect{| (let (aliased_use/282 = (function {nlocal = 0} x/284 x/284)) (apply (field_imm 1 (global Toploop!)) "aliased_use" aliased_use/282)) -val aliased_use : ('a : value_or_null). 'a -> 'a @@ global many = +val aliased_use : ('a : value_or_null). 'a -> 'a = |}] let unique_use (unique_ x) = x [%%expect{| (let (unique_use/285 = (function {nlocal = 0} x/287 x/287)) (apply (field_imm 1 (global Toploop!)) "unique_use" unique_use/285)) -val unique_use : ('a : value_or_null). 'a @ unique -> 'a @@ global many = - +val unique_use : ('a : value_or_null). 'a @ unique -> 'a = |}] (* This output is fine with overwriting: The [r.y] is not pushed down. *) @@ -48,7 +47,7 @@ let proj_aliased r = (apply aliased_use/282 r/290)) (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/292 y/291)))) (apply (field_imm 1 (global Toploop!)) "proj_aliased" proj_aliased/288)) -val proj_aliased : record -> record * string @@ global many = +val proj_aliased : record -> record * string = |}] let proj_unique r = @@ -68,7 +67,7 @@ let proj_unique r = (apply unique_use/285 r/295)) (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/297 y/296)))) (apply (field_imm 1 (global Toploop!)) "proj_unique" proj_unique/293)) -val proj_unique : record @ unique -> record * string @@ global many = +val proj_unique : record @ unique -> record * string = |}] (* This output would be unsound if [aliased_use] was able to overwrite [r] @@ -91,7 +90,7 @@ let match_aliased r = (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/302 (field_imm 1 r/300))))) (apply (field_imm 1 (global Toploop!)) "match_aliased" match_aliased/298)) -val match_aliased : record -> record * string @@ global many = +val match_aliased : record -> record * string = |}] (* This is sound since we bind [y] before the [unique_use] *) @@ -113,7 +112,7 @@ let match_unique r = (apply unique_use/285 r/306)) (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/308 y/307)))) (apply (field_imm 1 (global Toploop!)) "match_unique" match_unique/304)) -val match_unique : record @ unique -> record * string @@ global many = +val match_unique : record @ unique -> record * string = |}] (* Similarly, this would be unsound since Lambda performs a mini ANF pass. *) @@ -139,7 +138,7 @@ let match_mini_anf_aliased r = (field_imm 1 r/312))))) (apply (field_imm 1 (global Toploop!)) "match_mini_anf_aliased" match_mini_anf_aliased/310)) -val match_mini_anf_aliased : record -> record * string @@ global many = +val match_mini_anf_aliased : record -> record * string = |}] (* This is sound since we bind [y] before the [unique_use] *) @@ -165,8 +164,7 @@ let match_mini_anf_unique r = (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/325 y/324)))) (apply (field_imm 1 (global Toploop!)) "match_mini_anf_unique" match_mini_anf_unique/320)) -val match_mini_anf_unique : record @ unique -> record * string @@ global many = - +val match_mini_anf_unique : record @ unique -> record * string = |}] let match_anf_aliased r = @@ -196,7 +194,7 @@ let match_anf_aliased r = y/333))))) (apply (field_imm 1 (global Toploop!)) "match_anf_aliased" match_anf_aliased/330)) -val match_anf_aliased : record -> record * string @@ global many = +val match_anf_aliased : record -> record * string = |}] (* This is sound since we bind [y] using [field_mut] *) @@ -228,8 +226,7 @@ let match_anf_unique r = y/345))))) (apply (field_imm 1 (global Toploop!)) "match_anf_unique" match_anf_unique/342)) -val match_anf_unique : record @ unique -> record * string @@ global many = - +val match_anf_unique : record @ unique -> record * string = |}] type tree = @@ -325,7 +322,7 @@ let swap_inner (t : tree) = (exit 19)) with (19) t/362))) (apply (field_imm 1 (global Toploop!)) "swap_inner" swap_inner/360)) -val swap_inner : tree -> tree @@ global many = +val swap_inner : tree -> tree = |}] (* CR uniqueness: Update this test once overwriting is fully implemented. @@ -374,7 +371,7 @@ let match_guard r = (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/453 y/382)))))) (apply (field_imm 1 (global Toploop!)) "match_guard" match_guard/378)) -val match_guard : record @ unique -> record * string @@ global many = +val match_guard : record @ unique -> record * string = |}] let match_guard_unique (unique_ r) = diff --git a/testsuite/tests/typing-unique/overwriting_syntax.ml b/testsuite/tests/typing-unique/overwriting_syntax.ml index 8b2ff6eeb5f..462442d51ae 100644 --- a/testsuite/tests/typing-unique/overwriting_syntax.ml +++ b/testsuite/tests/typing-unique/overwriting_syntax.ml @@ -42,7 +42,7 @@ Uncaught exception: File "parsing/location.ml", line 1107, characters 2-8: Asser let with_record = function { a; b } as t -> { t with b = a } [%%expect{| -val with_record : record -> record @@ global many = +val with_record : record -> record = |}] let overwrite_record = function diff --git a/testsuite/tests/typing-unique/rbtree.ml b/testsuite/tests/typing-unique/rbtree.ml index dfc4f9ac938..1e2e47cd62d 100644 --- a/testsuite/tests/typing-unique/rbtree.ml +++ b/testsuite/tests/typing-unique/rbtree.ml @@ -472,13 +472,13 @@ type ('k, 'v) tree = | Leaf val fold : 'a 'b ('c : value_or_null). - ('a -> 'b -> 'c -> 'c) -> 'c -> ('a, 'b) tree -> 'c - @@ global many = + ('a -> 'b -> 'c -> 'c) -> 'c -> ('a, 'b) tree -> 'c = + val work : ('a : value_or_null) ('b : value_or_null) ('c : value_or_null). insert:(int -> bool -> 'a -> 'a) -> - fold:(('b -> bool -> int -> int) -> int -> 'a -> 'c) -> empty:'a -> 'c - @@ global many = + fold:(('b -> bool -> int -> int) -> int -> 'a -> 'c) -> empty:'a -> 'c = + Line 85, characters 16-71: 85 | balance_right (Node { t with right = ins k v t.right }) [@nontail] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -508,16 +508,11 @@ module Make_Okasaki : val fold : 'a 'b ('c : value_or_null). ('a -> 'b -> 'c -> 'c) -> 'c -> ('a, 'b) tree -> 'c - @@ global many - val balance_left : ('a, 'b) tree -> ('a, 'b) tree @@ global many - portable - val balance_right : ('a, 'b) tree -> ('a, 'b) tree @@ global many - portable - val ins : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree @@ global - many - val set_black : ('a, 'b) tree -> ('a, 'b) tree @@ global many portable - val insert : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree @@ - global many + val balance_left : ('a, 'b) tree -> ('a, 'b) tree @@ portable + val balance_right : ('a, 'b) tree -> ('a, 'b) tree @@ portable + val ins : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree + val set_black : ('a, 'b) tree -> ('a, 'b) tree @@ portable + val insert : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree end Line 110, characters 16-52: 110 | | Node _ -> overwrite_ t with Node { color = c } diff --git a/toplevel/genprintval.ml b/toplevel/genprintval.ml index 9de4991762d..e17b6f876f9 100644 --- a/toplevel/genprintval.ml +++ b/toplevel/genprintval.ml @@ -255,16 +255,9 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct | Print_as_value (* can interpret as a value and print *) | Print_as of string (* can't print *) - let get_and_default_jkind_for_printing jkind = - let layout = Jkind.get_layout_defaulting_to_value jkind in - match layout with - (* CR layouts v3.0: [Value_or_null] should probably require special - printing to avoid descending into NULL. (This module uses - lots of unsafe Obj features.) - *) + let print_sort : Jkind.Sort.Const.t -> _ = function | Base Value -> Print_as_value | Base Void -> Print_as "" - | Any -> Print_as "" | Base (Float64 | Float32 | Bits8 | Bits16 | Bits32 | Bits64 | Vec128 | Word) -> Print_as "" | Product _ -> Print_as "" @@ -464,9 +457,8 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct instantiate_types env type_params ty_list l in let ty_args = List.map2 - (fun { ca_jkind } ty_arg -> - (ty_arg, - get_and_default_jkind_for_printing ca_jkind) + (fun { ca_sort } ty_arg -> + (ty_arg, print_sort ca_sort) ) l ty_args in tree_of_constr_with_args (tree_of_constr env path) @@ -581,12 +573,12 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct lbl_list pos obj rep = let rec tree_of_fields first pos = function | [] -> [] - | {ld_id; ld_type; ld_jkind} :: remainder -> + | {ld_id; ld_type; ld_sort} :: remainder -> let ty_arg = instantiate_type env type_params ty_list ld_type in let name = Ident.name ld_id in (* PR#5722: print full module path only for first record field *) - let is_void = Jkind.is_void_defaulting ld_jkind in + let is_void = Jkind.Sort.Const.(equal void ld_sort) in let lid = if first then tree_of_label env path (Out_name.create name) else Oide_ident (Out_name.create name) @@ -627,17 +619,17 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct ty_list lbl_list pos obj = let rec tree_of_fields first pos = function | [] -> [] - | {ld_id; ld_type; ld_jkind} :: remainder -> + | {ld_id; ld_type; ld_sort} :: remainder -> let ty_arg = instantiate_type env type_params ty_list ld_type in let name = Ident.name ld_id in (* PR#5722: print full module path only for first record field *) - let is_void = Jkind.is_void_defaulting ld_jkind in + let is_void = Jkind.Sort.Const.(equal void ld_sort) in let lid = if first then tree_of_label env path (Out_name.create name) else Oide_ident (Out_name.create name) and v = - match get_and_default_jkind_for_printing ld_jkind with + match print_sort ld_sort with | Print_as msg -> Oval_stuff msg | Print_as_value -> match lbl_list with @@ -745,8 +737,8 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct | _ -> assert false in let args = instantiate_types env type_params ty_list cstr.cstr_args in - let args = List.map2 (fun { ca_jkind } arg -> - (arg, get_and_default_jkind_for_printing ca_jkind)) + let args = List.map2 (fun { ca_sort } arg -> + (arg, print_sort ca_sort)) cstr.cstr_args args in tree_of_constr_with_args diff --git a/typing/datarepr.ml b/typing/datarepr.ml index cb9daeac2d6..4d4ccb6ad01 100644 --- a/typing/datarepr.ml +++ b/typing/datarepr.ml @@ -70,7 +70,7 @@ let constructor_args ~current_unit priv cd_args cd_res path rep = in let type_params = TypeSet.elements arg_vars_set in let arity = List.length type_params in - let is_void_label lbl = Jkind.is_void_defaulting lbl.ld_jkind in + let is_void_label lbl = Jkind.Sort.Const.(equal void lbl.ld_sort) in let jkind = Jkind.for_boxed_record ~all_void:(List.for_all is_void_label lbls) in @@ -97,7 +97,7 @@ let constructor_args ~current_unit priv cd_args cd_res path rep = [ { ca_type = newgenconstr path type_params; - ca_jkind = jkind; + ca_sort = Jkind.Sort.Const.value; ca_modalities = Mode.Modality.Value.Const.id; ca_loc = Location.none } @@ -121,21 +121,21 @@ let constructor_descrs ~current_unit ty_path decl cstrs rep = written here should be irrelevant, and so would like to understand this interaction better. *) begin match cd_args with - | Cstr_tuple [{ ca_jkind = jkind }] - | Cstr_record [{ ld_jkind = jkind }] -> - [| Constructor_uniform_value, [| jkind |] |] + | Cstr_tuple [{ ca_sort = sort }] + | Cstr_record [{ ld_sort = sort }] -> + [| Constructor_uniform_value, [| sort |] |] | Cstr_tuple ([] | _ :: _) | Cstr_record ([] | _ :: _) -> - Misc.fatal_error "Multiple or 0 arguments in [@@unboxed] variant" + Misc.fatal_error "Multiple arguments in [@@unboxed] variant" end | Variant_unboxed, ([] | _ :: _) -> Misc.fatal_error "Multiple or 0 constructors in [@@unboxed] variant" in - let all_void jkinds = Array.for_all Jkind.is_void_defaulting jkinds in + let all_void sorts = Array.for_all Jkind.Sort.Const.(equal void) sorts in let num_consts = ref 0 and num_nonconsts = ref 0 in let cstr_constant = Array.map - (fun (_, jkinds) -> - let all_void = all_void jkinds in + (fun (_, sorts) -> + let all_void = all_void sorts in if all_void then incr num_consts else incr num_nonconsts; all_void) cstr_shapes_and_arg_jkinds @@ -229,7 +229,7 @@ let dummy_label (type rep) (record_form : rep record_form) in { lbl_name = ""; lbl_res = none; lbl_arg = none; lbl_mut = Immutable; lbl_modalities = Mode.Modality.Value.Const.id; - lbl_jkind = Jkind.Builtin.any ~why:Dummy_jkind; + lbl_sort = Jkind.Sort.Const.void; lbl_num = -1; lbl_pos = -1; lbl_all = [||]; lbl_repres = repres; lbl_private = Public; @@ -243,14 +243,14 @@ let label_descrs record_form ty_res lbls repres priv = let rec describe_labels num pos = function [] -> [] | l :: rest -> - let is_void = Jkind.is_void_defaulting l.ld_jkind in + let is_void = Jkind.Sort.Const.(equal void l.ld_sort) in let lbl = { lbl_name = Ident.name l.ld_id; lbl_res = ty_res; lbl_arg = l.ld_type; lbl_mut = l.ld_mutable; lbl_modalities = l.ld_modalities; - lbl_jkind = l.ld_jkind; + lbl_sort = l.ld_sort; lbl_pos = if is_void then lbl_pos_void else pos; lbl_num = num; lbl_all = all_labels; diff --git a/typing/includecore.ml b/typing/includecore.ml index a4d5cd2bb97..ba2b434ebae 100644 --- a/typing/includecore.ml +++ b/typing/includecore.ml @@ -51,6 +51,19 @@ type mmodes = | All | Legacy +(** Mode cross a right mode *) +(* This is very similar to Ctype.mode_cross_right. Any bugs here are likely bugs + there, too. *) +let right_mode_cross_jkind jkind mode = + let upper_bounds = Jkind.get_modal_upper_bounds jkind in + let upper_bounds = Const.alloc_as_value upper_bounds in + Value.imply upper_bounds mode + +let right_mode_cross env ty mode= + if not (Ctype.is_principal ty) then mode else + let jkind = Ctype.type_jkind_purely env ty in + right_mode_cross_jkind jkind mode + let native_repr_args nra1 nra2 = let rec loop i nra1 nra2 = match nra1, nra2 with @@ -119,11 +132,14 @@ let value_descriptions ~loc env name inferred modalities, which we need to workaround. *) () | Legacy -> - let mmode1, mmode2 = Mode.Value.legacy, Mode.Value.legacy in + let mmode1, mmode2 = + Mode.Value.(disallow_right legacy), Mode.Value.(disallow_left legacy) + in let mode1 = Mode.Modality.Value.apply vd1.val_modalities mmode1 in let mode2 = Mode.Modality.Value.(Const.apply (to_const_exn vd2.val_modalities) mmode2) in + let mode2 = right_mode_cross env vd2.val_type mode2 in begin match Mode.Value.submode mode1 mode2 with | Ok () -> () | Error e -> raise (Dont_match (Mode e)) diff --git a/typing/jkind.ml b/typing/jkind.ml index f7244f98cba..705387349fe 100644 --- a/typing/jkind.ml +++ b/typing/jkind.ml @@ -361,6 +361,7 @@ module Error = struct { from_annotation : Parsetree.jkind_annotation; from_attribute : Builtin_attributes.jkind_attribute Location.loc } + | Unimplemented_syntax exception User_error of Location.t * t end @@ -812,7 +813,8 @@ module Const = struct List.map (of_user_written_annotation_unchecked_level context) ts in jkind_of_product_annotations jkinds - | Default | With _ | Kind_of _ -> Misc.fatal_error "XXX unimplemented" + | Default | With _ | Kind_of _ -> + raise ~loc:jkind.pjkind_loc Unimplemented_syntax (* The [annotation_context] parameter can be used to allow annotations / kinds in different contexts to be enabled with different extension settings. @@ -2121,6 +2123,8 @@ let report_error ~loc : Error.t -> _ = function layouts extension.@;\ %t@]" Pprintast.jkind_annotation jkind hint) + | Unimplemented_syntax -> + Location.errorf ~loc "@[Unimplemented kind syntax@]" let () = Location.register_error_of_exn (function diff --git a/typing/jkind_intf.ml b/typing/jkind_intf.ml index 5d915abb56d..45bd5b77ffb 100644 --- a/typing/jkind_intf.ml +++ b/typing/jkind_intf.ml @@ -68,6 +68,54 @@ module type Sort = sig module Debug_printers : sig val t : Format.formatter -> t -> unit end + + (* CR layouts: These are sorts for the types of ocaml expressions that are + currently required to be values, but for which we expect to relax that + restriction in versions 2 and beyond. Naming them makes it easy to find + where in the translation to lambda they are assume to be value. *) + (* CR layouts: add similarly named jkinds and use those names everywhere (not + just the translation to lambda) rather than writing specific jkinds and + sorts in the code. *) + val for_class_arg : t + + val for_instance_var : t + + val for_lazy_body : t + + val for_tuple_element : t + + val for_variant_arg : t + + val for_record : t + + val for_block_element : t + + val for_array_get_result : t + + val for_array_comprehension_element : t + + val for_list_element : t + + (** These are sorts for the types of ocaml expressions that we expect will + always be "value". These names are used in the translation to lambda to + make the code clearer. *) + val for_function : t + + val for_probe_body : t + + val for_poly_variant : t + + val for_object : t + + val for_initializer : t + + val for_method : t + + val for_module : t + + val for_predef_value : t (* Predefined value types, e.g. int and string *) + + val for_tuple : t end module Var : sig @@ -125,6 +173,13 @@ module type Sort = sig it is set to [value] first. *) val default_to_value_and_get : t -> Const.t + (* CR layouts v12: Default this to void. *) + + (** [default_for_transl_and_get] extracts the sort as a `const`. If it's a variable, + it is set to [value] first. After we have support for [void], this will default to + [void] instead. *) + val default_for_transl_and_get : t -> Const.t + (** To record changes to sorts, for use with `Types.{snapshot, backtrack}` *) type change @@ -137,54 +192,6 @@ module type Sort = sig val var : Format.formatter -> var -> unit end - - (* CR layouts: These are sorts for the types of ocaml expressions that are - currently required to be values, but for which we expect to relax that - restriction in versions 2 and beyond. Naming them makes it easy to find - where in the translation to lambda they are assume to be value. *) - (* CR layouts: add similarly named jkinds and use those names everywhere (not - just the translation to lambda) rather than writing specific jkinds and - sorts in the code. *) - val for_class_arg : t - - val for_instance_var : t - - val for_lazy_body : t - - val for_tuple_element : t - - val for_variant_arg : t - - val for_record : t - - val for_block_element : t - - val for_array_get_result : t - - val for_array_comprehension_element : t - - val for_list_element : t - - (** These are sorts for the types of ocaml expressions that we expect will - always be "value". These names are used in the translation to lambda to - make the code clearer. *) - val for_function : t - - val for_probe_body : t - - val for_poly_variant : t - - val for_object : t - - val for_initializer : t - - val for_method : t - - val for_module : t - - val for_predef_value : t (* Predefined value types, e.g. int and string *) - - val for_tuple : t end module History = struct diff --git a/typing/jkind_types.ml b/typing/jkind_types.ml index 5c015e8605b..f8cd642ae48 100644 --- a/typing/jkind_types.ml +++ b/typing/jkind_types.ml @@ -129,6 +129,44 @@ module Sort = struct in pp_element ~nested:false ppf c end + + let for_function = value + + let for_predef_value = value + + let for_block_element = value + + let for_probe_body = value + + let for_poly_variant = value + + let for_record = value + + let for_object = value + + let for_lazy_body = value + + let for_tuple_element = value + + let for_variant_arg = value + + let for_instance_var = value + + let for_class_arg = value + + let for_method = value + + let for_initializer = value + + let for_module = value + + let for_tuple = value + + let for_array_get_result = value + + let for_array_comprehension_element = value + + let for_list_element = value end module Var = struct @@ -361,6 +399,9 @@ module Sort = struct (* path compression *) result) + (* CR layouts v12: Default to void instead. *) + let default_for_transl_and_get s = default_to_value_and_get s + (***********************) (* equality *) @@ -501,44 +542,6 @@ module Sort = struct pp_element ~nested:false ppf t include Static.T - - let for_function = value - - let for_predef_value = value - - let for_block_element = value - - let for_probe_body = value - - let for_poly_variant = value - - let for_record = value - - let for_object = value - - let for_lazy_body = value - - let for_tuple_element = value - - let for_variant_arg = value - - let for_instance_var = value - - let for_class_arg = value - - let for_method = value - - let for_initializer = value - - let for_module = value - - let for_tuple = value - - let for_array_get_result = value - - let for_array_comprehension_element = value - - let for_list_element = value end module Layout = struct diff --git a/typing/mode.ml b/typing/mode.ml index e344ca4c173..b72beee20f2 100644 --- a/typing/mode.ml +++ b/typing/mode.ml @@ -1335,9 +1335,9 @@ module Common (Obj : Obj) = struct let get_ceil m = Solver.get_ceil obj m - let get_conservative_floor m = Solver.get_conservative_floor obj m + let get_loose_floor m = Solver.get_loose_floor obj m - let get_conservative_ceil m = Solver.get_conservative_ceil obj m + let get_loose_ceil m = Solver.get_loose_ceil obj m end end [@@inline] @@ -1370,8 +1370,8 @@ module Locality = struct if Const.le ceil floor then Some ceil else None let check_const_conservative m = - let floor = Guts.get_conservative_floor m in - let ceil = Guts.get_conservative_ceil m in + let floor = Guts.get_loose_floor m in + let ceil = Guts.get_loose_ceil m in if Const.le ceil floor then Some ceil else None end end @@ -2251,28 +2251,22 @@ module Modality = struct let print ppf = function | Join_const c -> Format.fprintf ppf "join_const(%a)" Mode.Const.print c - - (** Given a modality and a guarantee that the modality will only be appled - on [x >= mm], we can find some lower modality that is equivalent on the - restricted range. This is similar to mode-crossing, where we can push a - mode lower given a restricted range of types. *) - let modality_cross_left ~mm = function - | Join_const c -> - (* We want to find the minimal [c'] such that [join c x <= join c' x] - for all [x >= mm]. By definition of join, this is equivalent to [c - <= join x c'] for all [x >= mm]. This is equivalent to [c <= join - mm c']. Equivalently [subtract c mm <= c']. Note that [mm] is a - mode variable, but we need a constant. Therefore, we conservatively - take its incomplete lower bound [mm.lower]. Also recall that we - want the smallest such [c']. So we take [c' = subtract c mm.lower]. - *) - let mm = Mode.Guts.get_floor mm in - Join_const (Mode.Const.subtract c mm) end + (* Similar to constant modalities, an inferred modality maps the mode of a + record/structure to the mode of a value therein. An inferred modality [f] is + inferred from the structure/record mode [mm] and the value mode [m]. + + Soundness: You should not get a value from a record/structure at a mode strictly + stronger than how it was put in. That is, [f mm >= m]. + + Completeness: You should be able to get a value from a record/structure at a mode + not strictly weaker than how it was put in. That is, [f mm <= m]. *) + type t = | Const of Const.t | Diff of Mode.lr * Mode.l + (** inferred modality. See [apply] for its behavior. *) | Undefined let sub_log left right ~log : (unit, error) Result.t = @@ -2322,10 +2316,13 @@ module Modality = struct | Const c -> c | Undefined -> Misc.fatal_error "modality Undefined should not be zapped." | Diff (mm, m) -> - let c = Mode.zap_to_floor m in - let m = Const.Join_const c in - (* To give the best modality, we try to cross modality. *) - Const.modality_cross_left ~mm m + let m = Mode.zap_to_floor m in + (* For soundness, we want some [c] such that [m <= join c mm], which + gives [subtract_mm m <= c]. Note that [mm] is a variable, but we need + a constant. Therefore, we take its floor [mm' <= mm], and we have + [subtract_mm m <= subtract_mm' m <= c]. *) + let mm' = Mode.Guts.get_floor mm in + Const.Join_const (Mode.Const.subtract m mm') let zap_to_id = zap_to_floor @@ -2402,6 +2399,7 @@ module Modality = struct | Const of Const.t | Undefined | Exactly of Mode.lr * Mode.l + (** inferred modality. See [apply] for its behavior. *) let sub_log left right ~log : (unit, error) Result.t = match left, right with @@ -2460,8 +2458,40 @@ module Modality = struct let zap_to_floor = function | Const c -> c | Undefined -> Misc.fatal_error "modality Undefined should not be zapped." - | Exactly (_, m) -> - let c = Mode.zap_to_floor m in + | Exactly (mm, m) -> + let m = Mode.zap_to_floor m in + (* We want some [c] such that: + - Soundness: [meet_with c mm >= m]. + - Completeness: [meet_with c mm <= m]. + - Simplicity: Optionally, we want [c] to be as high as possible to make + [meet_with c] a simpler modality. + + We first rewrite completeness condition to [c <= imply_with mm m]. + We will take [c] to be [imply_with mm m] and prove soundness for it. + + To prove soundness [meet_with (imply_with mm m) mm >= m], we need to prove: + - [imply_with mm m >= m], or equivalently [meet mm m <= m] which is trivial. + - [mm >= m], which is guaranteed by the caller of [infer]. + In fact, the soundness condition holds for any [c] taken to be + [imply_with _ m] where the underscore can be anything. + + Note that [imply_with] requires its first argument to be a constant, so we + need to get a constant out of [mm]. First recall that [imply_with] is antitone + in its first argument. Now, we have several choices: + - Take its floor [mm' <= mm], and then [c' = imply_with mm' m]. [c'] is higher + than [c] and thus might be incomplete. + - Take its ceil [mm' >= mm]. Then, [c'] is lower than [c] and thus complete, + but might be less simple than [c]. + - Zap to floor. This gives us a [c' = c] that is complete and simple, but we + are imposing extra constraint to [mm] not requested by the caller. + - Zap to ceil. This gives us a [c' = c] that is complete, but less simple than + zapping it to floor. Also, we are imposing extra constraint. + + We prioritize completeness and "not imposing extra constarint" over + simplicity. So we take its ceil [mm' >= mm]. + *) + let mm' = Mode.Guts.get_ceil mm in + let c = Mode.Const.imply mm' m in Const.Meet_const c let to_const_exn = function diff --git a/typing/mode_intf.mli b/typing/mode_intf.mli index cf30c298fb6..5b0f4897265 100644 --- a/typing/mode_intf.mli +++ b/typing/mode_intf.mli @@ -545,7 +545,9 @@ module type S = sig (** Given [md_mode] the mode of a module, and [mode] the mode of a value to be put in that module, return the inferred modality to be put on the - value description in the inferred module type. *) + value description in the inferred module type. + + The caller should ensure that for comonadic axes, [md_mode >= mode]. *) val infer : md_mode:Value.lr -> mode:Value.l -> t (* The following zapping functions possibly mutate a potentially inferred diff --git a/typing/parmatch.ml b/typing/parmatch.ml index 27e4c91445f..593a4d7ca21 100644 --- a/typing/parmatch.ml +++ b/typing/parmatch.ml @@ -177,9 +177,11 @@ let all_coherent column = | Const_unboxed_float32 _ | Const_string _), _ -> false end - | Tuple l1, Tuple l2 -> l1 = l2 + | Tuple l1, Tuple l2 -> + List.equal (Option.equal String.equal) l1 l2 | Unboxed_tuple l1, Unboxed_tuple l2 -> - List.equal (fun (lbl1, _) (lbl2, _) -> lbl1 = lbl2) l1 l2 + List.equal + (fun (lbl1, _) (lbl2, _) -> Option.equal String.equal lbl1 lbl2) l1 l2 | Record (lbl1 :: _), Record (lbl2 :: _) -> Array.length lbl1.lbl_all = Array.length lbl2.lbl_all | Record_unboxed_product (lbl1 :: _), Record_unboxed_product (lbl2 :: _) -> @@ -431,10 +433,17 @@ let simple_match d h = | Constant c1, Constant c2 -> const_compare c1 c2 = 0 | Lazy, Lazy -> true | Record _, Record _ -> true - | Tuple len1, Tuple len2 -> len1 = len2 + | Record_unboxed_product _, Record_unboxed_product _ -> true + | Tuple lbls1, Tuple lbls2 -> + List.equal (Option.equal String.equal) lbls1 lbls2 + | Unboxed_tuple lbls1, Unboxed_tuple lbls2 -> + List.equal (fun (l1, _) (l2, _) -> Option.equal String.equal l1 l2) + lbls1 lbls2 | Array (am1, _, len1), Array (am2, _, len2) -> am1 = am2 && len1 = len2 | _, Any -> true - | _, _ -> false + | ( Construct _ | Variant _ | Constant _ | Lazy | Record _ + | Record_unboxed_product _ | Tuple _ | Unboxed_tuple _ | Array _ | Any), + _ -> false @@ -524,9 +533,17 @@ let discr_pat q pss = let rec refine_pat acc = function | [] -> acc | ((head, _), _) :: rows -> + let append_unique lbls lbls_unique = + List.fold_right (fun lbl lbls_unique -> + if List.exists (fun l -> l.lbl_num = lbl.lbl_num) lbls_unique then + lbls_unique + else + lbl :: lbls_unique + ) lbls lbls_unique + in match head.pat_desc with | Any -> refine_pat acc rows - | Tuple _ | Lazy -> head + | Tuple _ | Unboxed_tuple _ | Lazy -> head | Record lbls -> (* N.B. we could make this case "simpler" by refining the record case using [all_record_args]. @@ -534,24 +551,22 @@ let discr_pat q pss = records. However it makes the witness we generate for the exhaustivity warning less pretty. *) - let fields = - List.fold_right (fun lbl r -> - if List.exists (fun l -> l.lbl_num = lbl.lbl_num) r then - r - else - lbl :: r - ) lbls (record_arg acc) - in + let fields = append_unique lbls (record_arg acc) in let d = { head with pat_desc = Record fields } in refine_pat d rows - | _ -> acc + | Record_unboxed_product lbls -> + let fields = append_unique lbls (record_unboxed_product_arg acc) in + let d = { head with pat_desc = Record_unboxed_product fields } in + refine_pat d rows + | Construct _ | Constant _ | Variant _ + | Array _ -> acc in let q, _ = deconstruct q in match q.pat_desc with (* short-circuiting: clearly if we have anything other than [Record] or [Any] to start with, we're not going to be able refine at all. So there's no point going over the matrix. *) - | Any | Record _ -> refine_pat q pss + | Any | Record _ | Record_unboxed_product _ -> refine_pat q pss | _ -> q (* @@ -779,7 +794,8 @@ let build_specialized_submatrices ~extend_row discr rows = let initial_constr_group = let open Patterns.Head in match discr.pat_desc with - | Record _ | Tuple _ | Lazy -> + | Record _ | Record_unboxed_product _ | Tuple _ | Unboxed_tuple _ + | Lazy -> (* [discr] comes from [discr_pat], and in this case subsumes any of the patterns we could find on the first column of [rows]. So it is better to use it for our initial environment than any of the normalized diff --git a/typing/predef.ml b/typing/predef.ml index c1ac0bb479e..c638b0178fb 100644 --- a/typing/predef.ml +++ b/typing/predef.ml @@ -220,14 +220,17 @@ and ident_some = ident_create "Some" and ident_null = ident_create "Null" and ident_this = ident_create "This" +let option_argument_sort = Jkind.Sort.Const.value let option_argument_jkind = Jkind.Builtin.value ~why:( Type_argument {parent_path = path_option; position = 1; arity = 1}) +let list_jkind = Jkind.Builtin.value ~why:Boxed_variant +let list_sort = Jkind.Sort.Const.value +let list_argument_sort = Jkind.Sort.Const.value let list_argument_jkind = Jkind.Builtin.value ~why:( Type_argument {parent_path = path_list; position = 1; arity = 1}) -let or_null_argument_jkind = Jkind.Builtin.value ~why:( - Type_argument {parent_path = path_or_null; position = 1; arity = 1}) +let or_null_argument_sort = Jkind.Sort.Const.value let mk_add_type add_type ?manifest type_ident @@ -290,16 +293,14 @@ let mk_add_type1 add_type type_ident add_type type_ident decl env let mk_add_extension add_extension id args = - List.iter (fun (_, jkind) -> + List.iter (fun (_, sort) -> let raise_error () = Misc.fatal_error "sanity check failed: non-value jkind in predef extension \ constructor; should this have Constructor_mixed shape?" in - match Jkind.get_layout jkind with - | Some (Base Value) -> () - | Some (Any - | Base (Void | Float32 | Float64 | Word | Bits8 | Bits16 | Bits32 | Bits64 | Vec128) - | Product _) - | None -> raise_error ()) + match (sort : Jkind.Sort.Const.t) with + | Base Value -> () + | Base (Void | Float32 | Float64 | Word | Bits8 | Bits16 | Bits32 | Bits64 | Vec128) + | Product _ -> raise_error ()) args; add_extension id { ext_type_path = path_exn; @@ -307,10 +308,10 @@ let mk_add_extension add_extension id args = ext_args = Cstr_tuple (List.map - (fun (ca_type, ca_jkind) -> + (fun (ca_type, ca_sort) -> { ca_type; - ca_jkind; + ca_sort; ca_modalities=Mode.Modality.Value.Const.id; ca_loc=Location.none }) @@ -328,20 +329,20 @@ let mk_add_extension add_extension id args = let variant constrs = let mk_elt { cd_args } = - let jkinds = match cd_args with + let sorts = match cd_args with | Cstr_tuple args -> - Misc.Stdlib.Array.of_list_map (fun { ca_jkind } -> ca_jkind) args + Misc.Stdlib.Array.of_list_map (fun { ca_sort } -> ca_sort) args | Cstr_record lbls -> - Misc.Stdlib.Array.of_list_map (fun { ld_jkind } -> ld_jkind) lbls + Misc.Stdlib.Array.of_list_map (fun { ld_sort } -> ld_sort) lbls in - Constructor_uniform_value, jkinds + Constructor_uniform_value, sorts in Type_variant (constrs, Variant_boxed (Misc.Stdlib.Array.of_list_map mk_elt constrs)) -let unrestricted tvar jkind = +let unrestricted tvar ca_sort = {ca_type=tvar; - ca_jkind=jkind; + ca_sort; ca_modalities=Mode.Modality.Value.Const.id; ca_loc=Location.none} @@ -351,7 +352,6 @@ let build_initial_env add_type add_extension empty_env = let add_type = mk_add_type add_type and add_type1 = mk_add_type1 add_type and add_extension = mk_add_extension add_extension in - let list_jkind = Jkind.Builtin.value ~why:Boxed_variant in empty_env (* Predefined types *) |> add_type1 ident_array @@ -383,8 +383,8 @@ let build_initial_env add_type add_extension empty_env = ~separability:Separability.Ind ~kind:(fun tvar -> variant [cstr ident_nil []; - cstr ident_cons [unrestricted tvar list_argument_jkind; - unrestricted (type_list tvar) list_jkind]]) + cstr ident_cons [unrestricted tvar list_argument_sort; + unrestricted (type_list tvar) list_sort]]) ~jkind:list_jkind |> add_type ident_nativeint ~jkind:Jkind.Const.Builtin.immutable_data @@ -393,35 +393,32 @@ let build_initial_env add_type add_extension empty_env = ~separability:Separability.Ind ~kind:(fun tvar -> variant [cstr ident_none []; - cstr ident_some [unrestricted tvar option_argument_jkind]]) + cstr ident_some [unrestricted tvar option_argument_sort]]) ~jkind:(Jkind.Builtin.value ~why:Boxed_variant) |> add_type ident_lexing_position ~kind:( - let lbl (field, field_type, jkind) = + let lbl (field, field_type) = let id = Ident.create_predef field in { ld_id=id; ld_mutable=Immutable; ld_modalities=Mode.Modality.Value.Const.id; ld_type=field_type; - ld_jkind=jkind; + ld_sort=Jkind.Sort.Const.value; ld_loc=Location.none; ld_attributes=[]; ld_uid=Uid.of_predef_id id; } in - let immediate = Jkind.Builtin.immediate ~why:(Primitive ident_int) in let labels = List.map lbl [ - ("pos_fname", type_string, - Jkind.of_builtin ~why:(Primitive ident_string) - Jkind.Const.Builtin.immutable_data); - ("pos_lnum", type_int, immediate); - ("pos_bol", type_int, immediate); - ("pos_cnum", type_int, immediate) ] + ("pos_fname", type_string); + ("pos_lnum", type_int); + ("pos_bol", type_int); + ("pos_cnum", type_int) ] in Type_record ( labels, - (Record_boxed (List.map (fun label -> label.ld_jkind) labels |> Array.of_list)) + (Record_boxed (List.map (fun label -> label.ld_sort) labels |> Array.of_list)) ) ) ~jkind:Jkind.Const.Builtin.immutable_data @@ -437,25 +434,25 @@ let build_initial_env add_type add_extension empty_env = (* Predefined exceptions - alphabetical order *) |> add_extension ident_assert_failure [newgenty (Ttuple[None, type_string; None, type_int; None, type_int]), - Jkind.Builtin.value ~why:Tuple] + Jkind.Sort.Const.value] |> add_extension ident_division_by_zero [] |> add_extension ident_end_of_file [] |> add_extension ident_failure [type_string, - Jkind.Builtin.value ~why:(Primitive ident_string)] + Jkind.Sort.Const.value] |> add_extension ident_invalid_argument [type_string, - Jkind.Builtin.value ~why:(Primitive ident_string)] + Jkind.Sort.Const.value] |> add_extension ident_match_failure [newgenty (Ttuple[None, type_string; None, type_int; None, type_int]), - Jkind.Builtin.value ~why:Tuple] + Jkind.Sort.Const.value] |> add_extension ident_not_found [] |> add_extension ident_out_of_memory [] |> add_extension ident_stack_overflow [] |> add_extension ident_sys_blocked_io [] |> add_extension ident_sys_error [type_string, - Jkind.Builtin.value ~why:(Primitive ident_string)] + Jkind.Sort.Const.value] |> add_extension ident_undefined_recursive_module [newgenty (Ttuple[None, type_string; None, type_int; None, type_int]), - Jkind.Builtin.value ~why:Tuple] + Jkind.Sort.Const.value] let add_simd_stable_extension_types add_type env = let add_type = mk_add_type add_type in @@ -489,7 +486,7 @@ let add_small_number_beta_extension_types add_type env = let or_null_kind tvar = variant [cstr ident_null []; - cstr ident_this [unrestricted tvar or_null_argument_jkind]] + cstr ident_this [unrestricted tvar or_null_argument_sort]] let add_or_null add_type env = let add_type1 = mk_add_type1 add_type in diff --git a/typing/printtyped.ml b/typing/printtyped.ml index af6d9915b4e..153dd95a48f 100644 --- a/typing/printtyped.ml +++ b/typing/printtyped.ml @@ -180,9 +180,9 @@ let tuple_component_label i ppf = function let typevars ppf vs = List.iter (typevar_jkind ~print_quote:true ppf) vs -let jkind_array i ppf jkinds = - array (i+1) (fun _ ppf l -> fprintf ppf "%a;@ " Jkind.format l) - ppf jkinds +let sort_array i ppf sorts = + array (i+1) (fun _ ppf l -> fprintf ppf "%a;@ " Jkind.Sort.Const.format l) + ppf sorts let tag ppf = let open Types in function | Ordinary {src_index;runtime_tag} -> @@ -194,8 +194,8 @@ let variant_representation i ppf = let open Types in function line i ppf "Variant_unboxed\n" | Variant_boxed cstrs -> line i ppf "Variant_boxed %a\n" - (array (i+1) (fun _ ppf (_cstr, jkinds) -> - jkind_array (i+1) ppf jkinds)) + (array (i+1) (fun _ ppf (_cstr, sorts) -> + sort_array (i+1) ppf sorts)) cstrs | Variant_extensible -> line i ppf "Variant_inlined\n" @@ -205,8 +205,8 @@ let flat_element i ppf flat_element = let record_representation i ppf = let open Types in function | Record_unboxed -> line i ppf "Record_unboxed\n" - | Record_boxed jkinds -> - line i ppf "Record_boxed %a\n" (jkind_array i) jkinds + | Record_boxed sorts -> + line i ppf "Record_boxed %a\n" (sort_array i) sorts | Record_inlined (t, _c, v) -> line i ppf "Record_inlined (%a, %a)\n" tag t (variant_representation i) v | Record_float -> line i ppf "Record_float\n" diff --git a/typing/solver.ml b/typing/solver.ml index fe57c143d40..ff972766eed 100644 --- a/typing/solver.ml +++ b/typing/solver.ml @@ -542,7 +542,7 @@ module Solver_mono (C : Lattices_mono) = struct in loop (C.max obj) VarMap.empty l - let get_conservative_ceil : type a l r. a C.obj -> (a, l * r) mode -> a = + let get_loose_ceil : type a l r. a C.obj -> (a, l * r) mode -> a = fun obj m -> match m with | Amode a -> a @@ -552,7 +552,7 @@ module Solver_mono (C : Lattices_mono) = struct | Amodejoin (a, mvs) -> VarMap.fold (fun _ mv acc -> C.join obj acc (mupper obj mv)) mvs a - let get_conservative_floor : type a l r. a C.obj -> (a, l * r) mode -> a = + let get_loose_floor : type a l r. a C.obj -> (a, l * r) mode -> a = fun obj m -> match m with | Amode a -> a @@ -563,7 +563,7 @@ module Solver_mono (C : Lattices_mono) = struct VarMap.fold (fun _ mv acc -> C.meet obj acc (mlower obj mv)) mvs a (* Due to our biased implementation, the ceil is precise. *) - let get_ceil = get_conservative_ceil + let get_ceil = get_loose_ceil let zap_to_ceil : type a l. a C.obj -> (a, l * allowed) mode -> log:_ -> a = fun obj m ~log -> @@ -641,8 +641,8 @@ module Solver_mono (C : Lattices_mono) = struct type a l r. ?verbose:bool -> a C.obj -> Format.formatter -> (a, l * r) mode -> unit = fun ?verbose (obj : a C.obj) ppf m -> - let ceil = get_conservative_ceil obj m in - let floor = get_conservative_floor obj m in + let ceil = get_loose_ceil obj m in + let floor = get_loose_floor obj m in if C.le obj ceil floor then C.print obj ppf ceil else print_raw ?verbose obj ppf m @@ -745,9 +745,9 @@ module Solvers_polarized (C : Lattices_mono) = struct let get_floor = S.get_floor - let get_conservative_ceil = S.get_conservative_ceil + let get_loose_ceil = S.get_loose_ceil - let get_conservative_floor = S.get_conservative_floor + let get_loose_floor = S.get_loose_floor let print ?(verbose = false) = S.print ~verbose @@ -807,9 +807,9 @@ module Solvers_polarized (C : Lattices_mono) = struct let get_floor = S.get_ceil - let get_conservative_ceil = S.get_conservative_floor + let get_loose_ceil = S.get_loose_floor - let get_conservative_floor = S.get_conservative_ceil + let get_loose_floor = S.get_loose_ceil let print ?(verbose = false) = S.print ~verbose diff --git a/typing/solver_intf.mli b/typing/solver_intf.mli index 177693da723..8eed401fab4 100644 --- a/typing/solver_intf.mli +++ b/typing/solver_intf.mli @@ -256,14 +256,14 @@ module type Solver_polarized = sig val get_ceil : 'a obj -> ('a, 'l * allowed) mode -> 'a (** Similar to [get_floor] but does not run the further constraining needed - for a precise bound. As a result, the returned bound is conservative; + for a precise bound. As a result, the returned bound is loose; i.e., it might be lower than the real floor. *) - val get_conservative_floor : 'a obj -> ('a, 'l * 'r) mode -> 'a + val get_loose_floor : 'a obj -> ('a, 'l * 'r) mode -> 'a (** Similar to [get_ceil] but does not run the further constraining needed - for a precise bound. As a result, the returned bound is conservative; + for a precise bound. As a result, the returned bound is loose; i.e., it might be higher than the real ceil. *) - val get_conservative_ceil : 'a obj -> ('a, 'l * 'r) mode -> 'a + val get_loose_ceil : 'a obj -> ('a, 'l * 'r) mode -> 'a (** Printing a mode for debugging. *) val print : diff --git a/typing/subst.ml b/typing/subst.ml index aba4f23965c..d063669535e 100644 --- a/typing/subst.ml +++ b/typing/subst.ml @@ -124,11 +124,6 @@ let with_additional_action = in { s with additional_action; last_compose = None } -let apply_prepare_jkind s lay loc = - match s.additional_action with - | Prepare_for_saving { prepare_jkind } -> prepare_jkind loc lay - | Duplicate_variables | No_action -> lay - let change_locs s loc = { s with loc = Some loc; last_compose = None } let loc s x = @@ -467,7 +462,7 @@ let label_declaration copy_scope s l = ld_id = l.ld_id; ld_mutable = l.ld_mutable; ld_modalities = l.ld_modalities; - ld_jkind = apply_prepare_jkind s l.ld_jkind l.ld_loc; + ld_sort = l.ld_sort; ld_type = typexp copy_scope s l.ld_loc l.ld_type; ld_loc = loc s l.ld_loc; ld_attributes = attrs s l.ld_attributes; @@ -477,13 +472,7 @@ let label_declaration copy_scope s l = let constructor_argument copy_scope s ca = { ca_type = typexp copy_scope s ca.ca_loc ca.ca_type; - ca_jkind = begin match s.additional_action with - | Prepare_for_saving { prepare_jkind } -> - prepare_jkind ca.ca_loc ca.ca_jkind - (* CR layouts v2.8: This will have to be copied once we - have with-types. *) - | Duplicate_variables | No_action -> ca.ca_jkind - end; + ca_sort = ca.ca_sort; ca_loc = loc s ca.ca_loc; ca_modalities = ca.ca_modalities; } @@ -504,30 +493,6 @@ let constructor_declaration copy_scope s c = cd_uid = c.cd_uid; } -(* called only when additional_action is [Prepare_for_saving] *) -let variant_representation ~prepare_jkind loc = function - | Variant_unboxed -> Variant_unboxed - | Variant_boxed cstrs_and_jkinds -> - Variant_boxed - (Array.map - (fun (cstr, jkinds) -> cstr, Array.map (prepare_jkind loc) jkinds) - cstrs_and_jkinds) - | Variant_extensible -> Variant_extensible - -(* called only when additional_action is [Prepare_for_saving] *) -let record_representation ~prepare_jkind loc = function - | Record_unboxed -> Record_unboxed - | Record_inlined (tag, constructor_rep, variant_rep) -> - Record_inlined (tag, - constructor_rep, - variant_representation ~prepare_jkind loc variant_rep) - | Record_boxed lays -> - Record_boxed (Array.map (prepare_jkind loc) lays) - | (Record_float | Record_ufloat | Record_mixed _) as rep -> rep - -let record_unboxed_product_representation ~prepare_jkind:_ _loc = function - | Record_unboxed_product -> Record_unboxed_product - let type_declaration' copy_scope s decl = { type_params = List.map (typexp copy_scope s decl.type_loc) decl.type_params; type_arity = decl.type_arity; @@ -535,30 +500,11 @@ let type_declaration' copy_scope s decl = begin match decl.type_kind with Type_abstract r -> Type_abstract r | Type_variant (cstrs, rep) -> - let rep = - match s.additional_action with - | No_action | Duplicate_variables -> rep - | Prepare_for_saving { prepare_jkind } -> - variant_representation ~prepare_jkind decl.type_loc rep - in Type_variant (List.map (constructor_declaration copy_scope s) cstrs, rep) | Type_record(lbls, rep) -> - let rep = - match s.additional_action with - | No_action | Duplicate_variables -> rep - | Prepare_for_saving { prepare_jkind } -> - record_representation ~prepare_jkind decl.type_loc rep - in Type_record (List.map (label_declaration copy_scope s) lbls, rep) | Type_record_unboxed_product(lbls, rep) -> - let rep = - match s.additional_action with - | No_action | Duplicate_variables -> rep - | Prepare_for_saving { prepare_jkind } -> - record_unboxed_product_representation - ~prepare_jkind decl.type_loc rep - in Type_record_unboxed_product (List.map (label_declaration copy_scope s) lbls, rep) | Type_open -> Type_open diff --git a/typing/typecore.ml b/typing/typecore.ml index 08610a2be8a..c6abb0b1a26 100644 --- a/typing/typecore.ml +++ b/typing/typecore.ml @@ -1456,7 +1456,7 @@ and build_as_type_aux (env : Env.t) p ~mode = else Value.newvar () in let keep = - priv || cstr.cstr_existentials <> [] || + priv || vto <> None (* be lazy and keep the type for node constraints *) in let ty = if keep then p.pat_type else @@ -9634,7 +9634,8 @@ and type_comprehension_expr ~loc ~env ~ty_expected ~attributes cexpr = container_type, (fun tcomp -> Texp_array_comprehension - (mut, Jkind.Sort.for_array_comprehension_element, tcomp)), + (mut, Jkind.Sort.of_const + Jkind.Sort.Const.for_array_comprehension_element, tcomp)), comp, (* CR layouts v4: When this changes from [value], you will also have to update the use of [transl_exp] in transl_array_comprehension.ml. See diff --git a/typing/typedecl.ml b/typing/typedecl.ml index 179f6fb15f2..4fa6cec55c9 100644 --- a/typing/typedecl.ml +++ b/typing/typedecl.ml @@ -484,8 +484,8 @@ let transl_labels (type rep) ~(record_form : rep record_form) ~new_var_jkind {Types.ld_id = ld.ld_id; ld_mutable = ld.ld_mutable; ld_modalities = ld.ld_modalities; - ld_jkind = Jkind.Builtin.any ~why:Dummy_jkind; - (* Updated by [update_label_jkinds] *) + ld_sort = Jkind.Sort.Const.void; + (* Updated by [update_label_sorts] *) ld_type = ty; ld_loc = ld.ld_loc; ld_attributes = ld.ld_attributes; @@ -516,8 +516,8 @@ let transl_types_gf ~new_var_jkind ~allow_unboxed Types.ca_modalities = ca.ca_modalities; ca_loc = ca.ca_loc; ca_type = ca.ca_type.ctyp_type; - ca_jkind = Jkind.Builtin.any ~why:Dummy_jkind; - (* Updated by [update_constructor_arguments_jkinds] *) + ca_sort = Jkind.Sort.Const.void; + (* Updated by [update_constructor_arguments_sorts] *) }) tyl_gfl in tyl_gfl, tyl_gfl' @@ -546,7 +546,7 @@ let transl_constructor_arguments ~new_var_jkind ~unboxed (* Note that [make_constructor] does not fill in the [ld_jkind] field of any computed record types, because it's called too early in the translation of a type declaration to compute accurate jkinds in the presence of recursively - defined types. It is updated later by [update_constructor_arguments_jkinds] + defined types. It is updated later by [update_constructor_arguments_sorts] *) let make_constructor env loc ~cstr_path ~type_path ~unboxed type_params svars @@ -797,7 +797,6 @@ let transl_declaration env sdecl (id, uid) = let cty = transl_simple_type ~new_var_jkind:Any env ~closed:no_row Mode.Alloc.Const.legacy sty in Some cty, Some cty.ctyp_type in - let any = Jkind.Builtin.any ~why:Initial_typedecl_env in (* jkind_default is the jkind to use for now as the type_jkind when there is no annotation and no manifest. See Note [Default jkinds in transl_declaration]. @@ -891,21 +890,21 @@ let transl_declaration env sdecl (id, uid) = Variant_unboxed, Jkind.of_new_legacy_sort ~why:Old_style_unboxed_type else - (* We mark all arg jkinds "any" here. They are updated later, - after the circular type checks make it safe to check jkinds. + (* We mark all arg sorts "void" here. They are updated later, + after the circular type checks make it safe to check sorts. Likewise, [Constructor_uniform_value] is potentially wrong and will be updated later. *) Variant_boxed ( Array.map (fun cstr -> - let jkinds = + let sorts = match Types.(cstr.cd_args) with | Cstr_tuple args -> - Array.make (List.length args) any - | Cstr_record _ -> [| any |] + Array.make (List.length args) Jkind.Sort.Const.void + | Cstr_record _ -> [| Jkind.Sort.Const.value |] in - Constructor_uniform_value, jkinds) + Constructor_uniform_value, sorts) (Array.of_list cstrs) ), Jkind.Builtin.value ~why:Boxed_variant @@ -927,8 +926,8 @@ let transl_declaration env sdecl (id, uid) = (* Note this is inaccurate, using `Record_boxed` in cases where the correct representation is [Record_float], [Record_ufloat], or [Record_mixed]. Those cases are fixed up after we can get - accurate jkinds for the fields, in [update_decl_jkind]. *) - Record_boxed (Array.make (List.length lbls) any), + accurate sorts for the fields, in [update_decl_jkind]. *) + Record_boxed (Array.make (List.length lbls) Jkind.Sort.Const.void), Jkind.Builtin.value ~why:Boxed_record in Ttype_record lbls, Type_record(lbls', rep), jkind @@ -942,7 +941,9 @@ let transl_declaration env sdecl (id, uid) = (* The jkinds below, and the ones in [lbls], are dummy jkinds which are replaced and made to correspond to each other in [update_decl_jkind]. *) - let jkind_ls = List.map (fun _ -> any) lbls in + let jkind_ls = + List.map (fun _ -> Jkind.Builtin.any ~why:Initial_typedecl_env) lbls + in let jkind = Jkind.Builtin.product ~why:Unboxed_record jkind_ls in Ttype_record_unboxed_product lbls, Type_record_unboxed_product(lbls', Record_unboxed_product), jkind @@ -1237,59 +1238,70 @@ let check_coherence env loc dpath decl = let check_abbrev env sdecl (id, decl) = (id, check_coherence env sdecl.ptype_loc (Path.Pident id) decl) -(* The [update_x_jkinds] functions infer more precise jkinds in the type kind, +(* The [update_x_sorts] functions infer more precise jkinds in the type kind, including which fields of a record are void. This would be hard to do during [transl_declaration] due to mutually recursive types. *) -(* [update_label_jkinds] additionally returns whether all the jkinds - were void *) -let update_label_jkinds env loc lbls named = - (* [named] is [Some jkinds] for top-level records (we will update the - jkinds) and [None] for inlined records. *) +(* [update_label_sorts] additionally returns whether all the jkinds + were void, and the jkinds of the labels *) +let update_label_sorts env loc lbls named = + (* [named] is [Some sorts] for top-level records (we will update the + sorts) and [None] for inlined records. *) (* CR layouts v5: it wouldn't be too hard to support records that are all void. just needs a bit of refactoring in translcore *) let update = match named with | None -> fun _ _ -> () - | Some jkinds -> fun idx jkind -> jkinds.(idx) <- jkind + | Some sorts -> fun idx sort -> sorts.(idx) <- sort in - let lbls = + let lbls_and_jkinds = List.mapi (fun idx (Types.{ld_type} as lbl) -> - let ld_jkind = Ctype.type_jkind env ld_type in - update idx ld_jkind; - {lbl with ld_jkind} + let jkind = Ctype.type_jkind env ld_type in + (* Next line guaranteed to be safe because of [check_representable] *) + let sort = Jkind.sort_of_jkind jkind in + let ld_sort = Jkind.Sort.default_to_value_and_get sort in + update idx ld_sort; + {lbl with ld_sort}, jkind ) lbls in - if List.for_all (fun l -> Jkind.is_void_defaulting l.ld_jkind) lbls then + let lbls, jkinds = List.split lbls_and_jkinds in + if List.for_all (fun l -> Jkind.Sort.Const.(equal void l.ld_sort)) lbls then raise (Error (loc, Jkind_empty_record)) - else lbls, false + else lbls, false, jkinds (* CR layouts v5: return true for a record with all voids *) (* In addition to updated constructor arguments, returns whether all arguments are void, useful for detecting enumerations that can be [immediate]. *) -let update_constructor_arguments_jkinds env loc cd_args jkinds = +let update_constructor_arguments_sorts env loc cd_args sorts = let update = - match jkinds with + match sorts with | None -> fun _ _ -> () - | Some jkinds -> fun idx jkind -> jkinds.(idx) <- jkind + | Some sorts -> fun idx sort -> sorts.(idx) <- sort in match cd_args with | Types.Cstr_tuple args -> - let args = + let args_and_jkinds = List.mapi (fun idx ({Types.ca_type; _} as arg) -> - let ca_jkind = Ctype.type_jkind env ca_type in - update idx ca_jkind; - {arg with ca_jkind}) args + let jkind = Ctype.type_jkind env ca_type in + (* Next line guaranteed to be safe because of [check_representable] *) + let sort = Jkind.sort_of_jkind jkind in + let ca_sort = Jkind.Sort.default_to_value_and_get sort in + update idx ca_sort; + {arg with ca_sort}, jkind) + args in + let args, jkinds = List.split args_and_jkinds in Types.Cstr_tuple args, - List.for_all (fun { ca_jkind } -> Jkind.is_void_defaulting ca_jkind) args + List.for_all + (fun { ca_sort } -> Jkind_types.Sort.Const.(equal void ca_sort)) args, + jkinds | Types.Cstr_record lbls -> - let lbls, all_void = - update_label_jkinds env loc lbls None + let lbls, all_void, jkinds = + update_label_sorts env loc lbls None in - update 0 (Jkind.Builtin.value ~why:Boxed_record); - Types.Cstr_record lbls, all_void + update 0 Jkind.Sort.Const.value; + Types.Cstr_record lbls, all_void, jkinds let assert_mixed_product_support = let required_reserved_header_bits = 8 in @@ -1449,17 +1461,17 @@ module Element_repr = struct end let update_constructor_representation - env (cd_args : Types.constructor_arguments) ~loc + env (cd_args : Types.constructor_arguments) arg_jkinds ~loc ~is_extension_constructor = let flat_suffix = match cd_args with | Cstr_tuple arg_types_and_modes -> let arg_reprs = - List.map (fun {Types.ca_type=arg_type; ca_jkind=arg_jkind; _} -> + List.map2 (fun {Types.ca_type=arg_type; _} arg_jkind -> let kloc : jkind_sort_loc = Cstr_tuple { unboxed = false } in Element_repr.classify env loc kloc arg_type arg_jkind, arg_type) - arg_types_and_modes + arg_types_and_modes arg_jkinds in Element_repr.mixed_product_shape loc arg_reprs Cstr_tuple ~on_flat_field_expected:(fun ~non_value ~boxed -> @@ -1472,11 +1484,11 @@ let update_constructor_representation raise (Error (loc, Illegal_mixed_product violation))) | Cstr_record fields -> let arg_reprs = - List.map (fun ld -> + List.map2 (fun ld arg_jkind -> let kloc = Inlined_record { unboxed = false } in - Element_repr.classify env loc kloc ld.Types.ld_type ld.ld_jkind, + Element_repr.classify env loc kloc ld.Types.ld_type arg_jkind, ld) - fields + fields arg_jkinds in Element_repr.mixed_product_shape loc arg_reprs Cstr_record ~on_flat_field_expected:(fun ~non_value ~boxed -> @@ -1528,20 +1540,24 @@ let update_decl_jkind env dpath decl = let update_record_kind loc lbls rep = match lbls, rep with | [Types.{ld_type} as lbl], Record_unboxed -> - let ld_jkind = Ctype.type_jkind env ld_type in - [{lbl with ld_jkind}], Record_unboxed, ld_jkind - | _, Record_boxed jkinds -> - let lbls, all_void = - update_label_jkinds env loc lbls (Some jkinds) + let jkind = Ctype.type_jkind env ld_type in + (* This next line is guaranteed to be OK because of a call to + [check_representable] *) + let sort = Jkind.sort_of_jkind jkind in + let ld_sort = Jkind.Sort.default_to_value_and_get sort in + [{lbl with ld_sort}], Record_unboxed, jkind + | _, Record_boxed sorts -> + let lbls, all_void, jkinds = + update_label_sorts env loc lbls (Some sorts) in let jkind = Jkind.for_boxed_record ~all_void in let reprs = - List.mapi - (fun i lbl -> + List.map2 + (fun lbl jkind -> let kloc = Record { unboxed = false } in - Element_repr.classify env loc kloc lbl.Types.ld_type jkinds.(i), + Element_repr.classify env loc kloc lbl.Types.ld_type jkind, lbl) - lbls + lbls jkinds in let repr_summary = { values = false; imms = false; floats = false; float64s = false; @@ -1642,16 +1658,20 @@ let update_decl_jkind env dpath decl = | [{Types.cd_args} as cstr], Variant_unboxed -> begin match cd_args with | Cstr_tuple [{ca_type=ty; _} as arg] -> begin - let ca_jkind = Ctype.type_jkind env ty in + let jkind = Ctype.type_jkind env ty in + let sort = Jkind.sort_of_jkind jkind in + let ca_sort = Jkind.Sort.default_to_value_and_get sort in [{ cstr with Types.cd_args = - Cstr_tuple [{ arg with ca_jkind }] }], - Variant_unboxed, ca_jkind + Cstr_tuple [{ arg with ca_sort }] }], + Variant_unboxed, jkind end | Cstr_record [{ld_type} as lbl] -> begin - let ld_jkind = Ctype.type_jkind env ld_type in + let jkind = Ctype.type_jkind env ld_type in + let sort = Jkind.sort_of_jkind jkind in + let ld_sort = Jkind.Sort.default_to_value_and_get sort in [{ cstr with Types.cd_args = - Cstr_record [{ lbl with ld_jkind }] }], - Variant_unboxed, ld_jkind + Cstr_record [{ lbl with ld_sort }] }], + Variant_unboxed, jkind end | (Cstr_tuple ([] | _ :: _ :: _) | Cstr_record ([] | _ :: _ :: _)) -> assert false @@ -1659,27 +1679,27 @@ let update_decl_jkind env dpath decl = | cstrs, Variant_boxed cstr_shapes -> let (_,cstrs,all_voids) = List.fold_left (fun (idx,cstrs,all_voids) cstr -> - let arg_jkinds = + let arg_sorts = match cstr_shapes.(idx) with - | Constructor_uniform_value, arg_jkinds -> arg_jkinds + | Constructor_uniform_value, arg_sorts -> arg_sorts | Constructor_mixed _, _ -> fatal_error "Typedecl.update_variant_kind doesn't expect mixed \ constructor as input" in - let cd_args, all_void = - update_constructor_arguments_jkinds env cstr.Types.cd_loc - cstr.Types.cd_args (Some arg_jkinds) + let cd_args, all_void, jkinds = + update_constructor_arguments_sorts env cstr.Types.cd_loc + cstr.Types.cd_args (Some arg_sorts) in let cstr_repr = - update_constructor_representation env cd_args + update_constructor_representation env cd_args jkinds ~is_extension_constructor:false ~loc:cstr.Types.cd_loc in let () = match cstr_repr with | Constructor_uniform_value -> () - | Constructor_mixed _ -> cstr_shapes.(idx) <- cstr_repr, arg_jkinds + | Constructor_mixed _ -> cstr_shapes.(idx) <- cstr_repr, arg_sorts in let cstr = { cstr with Types.cd_args } in (idx+1,cstr::cstrs,all_voids && all_void) @@ -1720,8 +1740,12 @@ let update_decl_jkind env dpath decl = | Record_unboxed_product -> let lbls, jkinds = List.map (fun (Types.{ld_type} as lbl) -> - let ld_jkind = Ctype.type_jkind env ld_type in - {lbl with ld_jkind}, ld_jkind + let jkind = Ctype.type_jkind env ld_type in + (* This next line is guaranteed to be OK because of a call to + [check_representable] *) + let sort = Jkind.sort_of_jkind jkind in + let ld_sort = Jkind.Sort.default_to_value_and_get sort in + {lbl with ld_sort}, jkind ) lbls |> List.split in @@ -2402,11 +2426,11 @@ let transl_extension_constructor_decl ~cstr_path:(Pident id) ~type_path ~unboxed:false typext_params svars sargs sret_type in - let args, constant = - update_constructor_arguments_jkinds env loc args None + let args, constant, jkinds = + update_constructor_arguments_sorts env loc args None in let constructor_shape = - update_constructor_representation env args ~loc + update_constructor_representation env args jkinds ~loc ~is_extension_constructor:true in args, constructor_shape, constant, ret_type, diff --git a/typing/typedtree.ml b/typing/typedtree.ml index 6263321b1b5..c78b97000c9 100644 --- a/typing/typedtree.ml +++ b/typing/typedtree.ml @@ -1082,18 +1082,31 @@ let rec iter_bound_idents { f = fun p -> iter_bound_idents f p } d -type full_bound_ident_action = - Ident.t -> string loc -> type_expr -> Uid.t -> Mode.Value.l -> Jkind.sort -> unit +type 'sort full_bound_ident_action = + Ident.t -> string loc -> type_expr -> Uid.t -> Mode.Value.l -> 'sort -> unit + +let for_transl f = + f ~of_sort:Jkind.Sort.default_for_transl_and_get ~of_const_sort:Fun.id + +let for_typing f = + f ~of_sort:Fun.id ~of_const_sort:Jkind.Sort.of_const (* The intent is that the sort should be the sort of the type of the pattern. It's used to avoid computing jkinds from types. `f` then gets passed the sorts of the variables. This is occasionally used in places where we don't actually know - about the sort of the pattern but `f` doesn't care about the sorts. *) -let iter_pattern_full ~both_sides_of_or f sort pat = + about the sort of the pattern but `f` doesn't care about the sorts. + + Because this should work both over [Jkind.Sort.t] and [Jkind.Sort.Const.t], + this takes conversion functions [of_sort : Jkind.Sort.t -> 'sort] and + [of_const_sort : Jkind.Sort.Const.t -> 'sort]. The need for these is somewhat + unfortunate, but it's worth it to allow [Jkind.Sort.Const.t] to be used + throughout the transl process. *) +let iter_pattern_full ~of_sort ~of_const_sort ~both_sides_of_or f sort pat = + let value = of_const_sort Jkind.Sort.Const.value in let rec loop : - type k . full_bound_ident_action -> Jkind.sort -> k general_pattern -> _ = + type k . 'sort full_bound_ident_action -> 'sort -> k general_pattern -> _ = fun f sort pat -> match pat.pat_desc with (* Cases where we push the sort inwards: *) @@ -1112,56 +1125,55 @@ let iter_pattern_full ~both_sides_of_or f sort pat = match cstr.cstr_repr with | Variant_unboxed -> [ sort ] | Variant_boxed _ | Variant_extensible -> - (List.map (fun { ca_jkind } -> Jkind.sort_of_jkind ca_jkind ) + (List.map (fun { ca_sort } -> of_const_sort ca_sort ) cstr.cstr_args) in List.iter2 (loop f) sorts patl | Tpat_record (lbl_pat_list, _) -> List.iter (fun (_, lbl, pat) -> - (loop f) (Jkind.sort_of_jkind lbl.lbl_jkind) pat) + (loop f) (of_const_sort lbl.lbl_sort) pat) lbl_pat_list | Tpat_record_unboxed_product (lbl_pat_list, _) -> List.iter (fun (_, lbl, pat) -> - (loop f) (Jkind.sort_of_jkind lbl.lbl_jkind) pat) + (loop f) (of_const_sort lbl.lbl_sort) pat) lbl_pat_list (* Cases where the inner things must be value: *) - | Tpat_variant (_, pat, _) -> Option.iter (loop f Jkind.Sort.value) pat + | Tpat_variant (_, pat, _) -> Option.iter (loop f value) pat | Tpat_tuple patl -> - List.iter (fun (_, pat) -> loop f Jkind.Sort.value pat) patl + List.iter (fun (_, pat) -> loop f value pat) patl (* CR layouts v5: tuple case to change when we allow non-values in tuples *) | Tpat_unboxed_tuple patl -> - List.iter (fun (_, pat, sort) -> loop f sort pat) patl - | Tpat_array (_, arg_sort, patl) -> List.iter (loop f arg_sort) patl - | Tpat_lazy p | Tpat_exception p -> loop f Jkind.Sort.value p + List.iter (fun (_, pat, sort) -> loop f (of_sort sort) pat) patl + | Tpat_array (_, arg_sort, patl) -> + List.iter (loop f (of_sort arg_sort)) patl + | Tpat_lazy p | Tpat_exception p -> loop f value p (* Cases without variables: *) | Tpat_any | Tpat_constant _ -> () in loop f sort pat -let rev_pat_bound_idents_full sort pat = +let rev_pat_bound_idents_full ~of_sort ~of_const_sort sort pat = let idents_full = ref [] in let add id sloc typ uid _ sort = idents_full := (id, sloc, typ, uid, sort) :: !idents_full in - iter_pattern_full ~both_sides_of_or:false add sort pat; + iter_pattern_full + ~both_sides_of_or:false ~of_sort ~of_const_sort + add sort pat; !idents_full let rev_only_idents idents_full = List.rev_map (fun (id,_,_,_,_) -> id) idents_full -let rev_only_idents_and_types idents_full = - List.rev_map (fun (id,_,ty,_,_) -> (id,ty)) idents_full - let pat_bound_idents_full sort pat = - List.rev (rev_pat_bound_idents_full sort pat) + List.rev (for_transl rev_pat_bound_idents_full sort pat) (* In these two, we don't know the sort, but the sort information isn't used so it's fine to lie. *) -let pat_bound_idents_with_types pat = - rev_only_idents_and_types (rev_pat_bound_idents_full Jkind.Sort.value pat) let pat_bound_idents pat = - rev_only_idents (rev_pat_bound_idents_full Jkind.Sort.value pat) + rev_only_idents + (for_typing rev_pat_bound_idents_full Jkind.Sort.value pat) let rev_let_bound_idents_full bindings = let idents_full = ref [] in @@ -1176,7 +1188,9 @@ let let_bound_idents_with_modes_sorts_and_checks bindings = in let checks = List.fold_left (fun checks vb -> - iter_pattern_full ~both_sides_of_or:true f vb.vb_sort vb.vb_pat; + for_typing iter_pattern_full + ~both_sides_of_or:true + f vb.vb_sort vb.vb_pat; match vb.vb_pat.pat_desc, vb.vb_expr.exp_desc with | Tpat_var (id, _, _, _), Texp_function fn -> let zero_alloc = diff --git a/typing/typedtree.mli b/typing/typedtree.mli index d95ca7173a0..1b51ff29bd5 100644 --- a/typing/typedtree.mli +++ b/typing/typedtree.mli @@ -1271,11 +1271,9 @@ val mknoloc: 'a -> 'a Asttypes.loc val mkloc: 'a -> Location.t -> 'a Asttypes.loc val pat_bound_idents: 'k general_pattern -> Ident.t list -val pat_bound_idents_with_types: - 'k general_pattern -> (Ident.t * Types.type_expr) list val pat_bound_idents_full: - Jkind.sort -> 'k general_pattern - -> (Ident.t * string loc * Types.type_expr * Types.Uid.t * Jkind.sort) list + Jkind.Sort.Const.t -> 'k general_pattern + -> (Ident.t * string loc * Types.type_expr * Types.Uid.t * Jkind.Sort.Const.t) list (** Splits an or pattern into its value (left) and exception (right) parts. *) val split_pattern: diff --git a/typing/typeopt.ml b/typing/typeopt.ml index 793f3270778..eae41b8c550 100644 --- a/typing/typeopt.ml +++ b/typing/typeopt.ml @@ -136,7 +136,7 @@ type 'a classification = See comment on [classification] above to understand [classify_product]. *) let classify ~classify_product env loc ty sort : _ classification = let ty = scrape_ty env ty in - match Jkind.(Sort.default_to_value_and_get sort) with + match (sort : Jkind.Sort.Const.t) with | Base Value -> begin if is_always_gc_ignorable env ty then Int else match get_desc ty with @@ -231,7 +231,8 @@ let array_kind_of_elt ~elt_sort env loc ty = match elt_sort with | Some s -> s | None -> - type_legacy_sort ~why:Array_element env loc ty + Jkind.Sort.default_for_transl_and_get + (type_legacy_sort ~why:Array_element env loc ty) in let classify_product ty sorts = if Language_extension.(is_at_least Layouts Alpha) then @@ -577,7 +578,7 @@ and value_kind_variant env ~loc ~visited ~depth ~num_nodes_visited value_kind env ~loc ~visited ~depth ~num_nodes_visited ty | _ -> assert false end - | Variant_boxed cstrs_and_jkinds -> + | Variant_boxed cstrs_and_sorts -> let depth = depth + 1 in let for_constructor_fields fields ~depth ~num_nodes_visited ~field_to_type = List.fold_left_map @@ -665,7 +666,7 @@ and value_kind_variant env ~loc ~visited ~depth ~num_nodes_visited | None -> None | Some (num_nodes_visited, next_const, consts, next_tag, non_consts) -> - let cstr_shape, _ = cstrs_and_jkinds.(idx) in + let cstr_shape, _ = cstrs_and_sorts.(idx) in let (is_mutable, num_nodes_visited), fields = for_one_constructor constructor ~depth ~num_nodes_visited ~cstr_shape @@ -834,8 +835,7 @@ let[@inline always] rec layout_of_const_sort_generic ~value_kind ~error error const let layout env loc sort ty = - layout_of_const_sort_generic - (Jkind.Sort.default_to_value_and_get sort) + layout_of_const_sort_generic sort ~value_kind:(lazy (value_kind env loc ty)) ~error:(function | Base Value -> assert false @@ -854,9 +854,7 @@ let layout env loc sort ty = ) let layout_of_sort loc sort = - layout_of_const_sort_generic - (Jkind.Sort.default_to_value_and_get sort) - ~value_kind:(lazy Lambda.generic_value) + layout_of_const_sort_generic sort ~value_kind:(lazy Lambda.generic_value) ~error:(function | Base Value -> assert false | Base Void -> @@ -872,7 +870,7 @@ let layout_of_sort loc sort = (Jkind.Sort.of_const const, Stable, None))) ) -let layout_of_const_sort c = +let layout_of_non_void_sort c = layout_of_const_sort_generic c ~value_kind:(lazy Lambda.generic_value) @@ -898,7 +896,7 @@ let function_arg_layout env loc sort ty = (** Whether a forward block is needed for a lazy thunk on a value, i.e. if the value can be represented as a float/forward/lazy *) let lazy_val_requires_forward env loc ty = - let sort = Jkind.Sort.for_lazy_body in + let sort = Jkind.Sort.Const.for_lazy_body in let classify_product _ sorts = let kind = Jkind.Sort.Const.Product sorts in raise (Error (loc, Unsupported_product_in_lazy kind)) diff --git a/typing/typeopt.mli b/typing/typeopt.mli index a990f654e53..5ee744b6698 100644 --- a/typing/typeopt.mli +++ b/typing/typeopt.mli @@ -26,16 +26,16 @@ val maybe_pointer : Typedtree.expression -> Lambda.immediate_or_pointer (* Supplying [None] for [elt_sort] should be avoided when possible. It will result in a call to [Ctype.type_sort] which can be expensive. *) val array_type_kind : - elt_sort:(Jkind.Sort.t option) + elt_sort:(Jkind.Sort.Const.t option) -> Env.t -> Location.t -> Types.type_expr -> Lambda.array_kind val array_type_mut : Env.t -> Types.type_expr -> Lambda.mutable_flag val array_kind_of_elt : - elt_sort:(Jkind.Sort.t option) + elt_sort:(Jkind.Sort.Const.t option) -> Env.t -> Location.t -> Types.type_expr -> Lambda.array_kind val array_kind : - Typedtree.expression -> Jkind.Sort.t -> Lambda.array_kind + Typedtree.expression -> Jkind.Sort.Const.t -> Lambda.array_kind val array_pattern_kind : - Typedtree.pattern -> Jkind.Sort.t -> Lambda.array_kind + Typedtree.pattern -> Jkind.Sort.Const.t -> Lambda.array_kind (* If [kind] or [layout] is unknown, attempt to specialize it by examining the type parameters of the bigarray. If [kind] or [length] is not unknown, returns @@ -50,30 +50,30 @@ val bigarray_specialize_kind_and_layout : take that check out. *) val layout : - Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout + Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout (* These translate a type system sort to a lambda layout. The function [layout] gives a more precise result---this should only be used when the kind is needed for compilation but the precise Lambda.layout isn't needed for optimization. [layout_of_sort] gracefully errors on void, while - [layout_of_base] loudly fails on void. *) -val layout_of_sort : Location.t -> Jkind.sort -> Lambda.layout -val layout_of_const_sort : Jkind.Sort.Const.t -> Lambda.layout + [layout_of_non_void_sort] loudly fails on void. *) +val layout_of_sort : Location.t -> Jkind.Sort.Const.t -> Lambda.layout +val layout_of_non_void_sort : Jkind.Sort.Const.t -> Lambda.layout (* Given a function type and the sort of its return type, compute the layout of its return type. *) val function_return_layout : - Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout + Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout (* Given a function type with two arguments and the sort of its return type, compute the layout of its return type. *) val function2_return_layout : - Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout + Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout (* Given a function type and the sort of its argument, compute the layout of its argument. Fails loudly if the type isn't a function type. *) val function_arg_layout : - Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout + Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout val value_kind : Env.t -> Location.t -> Types.type_expr -> Lambda.value_kind diff --git a/typing/types.ml b/typing/types.ml index ad08297c013..dde54bbfecc 100644 --- a/typing/types.ml +++ b/typing/types.ml @@ -313,7 +313,7 @@ and mixed_product_shape = and record_representation = | Record_unboxed | Record_inlined of tag * constructor_representation * variant_representation - | Record_boxed of (allowed * disallowed) jkind array + | Record_boxed of Jkind_types.Sort.Const.t array | Record_float | Record_ufloat | Record_mixed of mixed_product_shape @@ -324,7 +324,7 @@ and record_unboxed_product_representation = and variant_representation = | Variant_unboxed | Variant_boxed of (constructor_representation * - (allowed * disallowed) jkind array) array + Jkind_types.Sort.Const.t array) array | Variant_extensible and constructor_representation = @@ -337,7 +337,7 @@ and label_declaration = ld_mutable: mutability; ld_modalities: Mode.Modality.Value.Const.t; ld_type: type_expr; - ld_jkind : jkind_l; + ld_sort: Jkind_types.Sort.Const.t; ld_loc: Location.t; ld_attributes: Parsetree.attributes; ld_uid: Uid.t; @@ -357,7 +357,7 @@ and constructor_argument = { ca_modalities: Mode.Modality.Value.Const.t; ca_type: type_expr; - ca_jkind: jkind_l; + ca_sort: Jkind_types.Sort.Const.t; ca_loc: Location.t; } @@ -637,12 +637,13 @@ let equal_constructor_representation r1 r2 = r1 == r2 || match r1, r2 with let equal_variant_representation r1 r2 = r1 == r2 || match r1, r2 with | Variant_unboxed, Variant_unboxed -> true - | Variant_boxed cstrs_and_jkinds1, Variant_boxed cstrs_and_jkinds2 -> - Misc.Stdlib.Array.equal (fun (cstr1, jkinds1) (cstr2, jkinds2) -> + | Variant_boxed cstrs_and_sorts1, Variant_boxed cstrs_and_sorts2 -> + Misc.Stdlib.Array.equal (fun (cstr1, sorts1) (cstr2, sorts2) -> equal_constructor_representation cstr1 cstr2 - && Misc.Stdlib.Array.equal !jkind_equal jkinds1 jkinds2) - cstrs_and_jkinds1 - cstrs_and_jkinds2 + && Misc.Stdlib.Array.equal Jkind_types.Sort.Const.equal + sorts1 sorts2) + cstrs_and_sorts1 + cstrs_and_sorts2 | Variant_extensible, Variant_extensible -> true | (Variant_unboxed | Variant_boxed _ | Variant_extensible), _ -> @@ -657,8 +658,8 @@ let equal_record_representation r1 r2 = match r1, r2 with ignore (cr1 : constructor_representation); ignore (cr2 : constructor_representation); equal_tag tag1 tag2 && equal_variant_representation vr1 vr2 - | Record_boxed lays1, Record_boxed lays2 -> - Misc.Stdlib.Array.equal !jkind_equal lays1 lays2 + | Record_boxed sorts1, Record_boxed sorts2 -> + Misc.Stdlib.Array.equal Jkind_types.Sort.Const.equal sorts1 sorts2 | Record_float, Record_float -> true | Record_ufloat, Record_ufloat -> @@ -686,7 +687,7 @@ type 'a gen_label_description = lbl_arg: type_expr; (* Type of the argument *) lbl_mut: mutability; (* Is this a mutable field? *) lbl_modalities: Mode.Modality.Value.Const.t;(* Modalities on the field *) - lbl_jkind : jkind_l; (* Jkind of the argument *) + lbl_sort: Jkind_types.Sort.Const.t; (* Sort of the argument *) lbl_pos: int; (* Position in block *) lbl_num: int; (* Position in type *) lbl_all: 'a gen_label_description array; (* All the labels in this type *) diff --git a/typing/types.mli b/typing/types.mli index ef573d3293e..60c342a757a 100644 --- a/typing/types.mli +++ b/typing/types.mli @@ -605,7 +605,7 @@ and record_representation = (* For an inlined record, we record the representation of the variant that contains it and the tag/representation of the relevant constructor of that variant. *) - | Record_boxed of jkind_l array + | Record_boxed of Jkind_types.Sort.Const.t array | Record_float (* All fields are floats *) | Record_ufloat (* All fields are [float#]s. Same runtime representation as [Record_float], @@ -625,7 +625,7 @@ and record_unboxed_product_representation = and variant_representation = | Variant_unboxed | Variant_boxed of (constructor_representation * - jkind_l array) array + Jkind_types.Sort.Const.t array) array (* The outer array has an element for each constructor. Each inner array has a jkind for each argument of the corresponding constructor. @@ -652,7 +652,7 @@ and label_declaration = ld_mutable: mutability; ld_modalities: Mode.Modality.Value.Const.t; ld_type: type_expr; - ld_jkind : jkind_l; + ld_sort: Jkind_types.Sort.Const.t; ld_loc: Location.t; ld_attributes: Parsetree.attributes; ld_uid: Uid.t; @@ -672,7 +672,7 @@ and constructor_argument = { ca_modalities: Mode.Modality.Value.Const.t; ca_type: type_expr; - ca_jkind: jkind_l; + ca_sort: Jkind_types.Sort.Const.t; ca_loc: Location.t; } @@ -898,7 +898,7 @@ type 'a gen_label_description = lbl_mut: mutability; (* Is this a mutable field? *) lbl_modalities: Mode.Modality.Value.Const.t; (* Modalities on the field *) - lbl_jkind : jkind_l; (* Jkind of the argument *) + lbl_sort: Jkind_types.Sort.Const.t; (* Sort of the argument *) lbl_pos: int; (* Position in block *) lbl_num: int; (* Position in the type *) lbl_all: 'a gen_label_description array; (* All the labels in this type *) @@ -908,9 +908,6 @@ type 'a gen_label_description = lbl_attributes: Parsetree.attributes; lbl_uid: Uid.t; } -(* CR layouts v5: once we allow [any] in record fields, change [lbl_jkind] to - be a [sort option]. This will allow a fast path for representability checks - at record construction, and currently only the sort is used anyway. *) type label_description = record_representation gen_label_description diff --git a/typing/value_rec_check.ml b/typing/value_rec_check.ml index 4d9c86904e6..6b76def1e0a 100644 --- a/typing/value_rec_check.ml +++ b/typing/value_rec_check.ml @@ -718,11 +718,13 @@ let rec expression : Typedtree.expression -> term_judg = | Texp_unboxed_tuple exprs -> list expression (List.map (fun (_, e, _) -> e) exprs) << Return | Texp_array (_, elt_sort, exprs, _) -> + let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in list expression exprs << array_mode exp elt_sort | Texp_list_comprehension { comp_body; comp_clauses } -> join ((expression comp_body << Guard) :: comprehension_clauses comp_clauses) | Texp_array_comprehension (_, elt_sort, { comp_body; comp_clauses }) -> + let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in join ((expression comp_body << array_mode exp elt_sort) :: comprehension_clauses comp_clauses) | Texp_construct (_, desc, exprs, _) ->