diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 642675b0048..19b8646c36b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -109,8 +109,8 @@ jobs:
           - name: gi
             config: --enable-middle-end=flambda2
             os: ubuntu-latest
-            build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1'
-            ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1'
+            build_ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1,vectorize=1'
+            ocamlparam: '_,w=-46,regalloc=gi,regalloc-param=SPLIT_LIVE_RANGES:on,regalloc-param=GI_PRIORITY_HEURISTICS:interval-length,regalloc-param=GI_SELECTION_HEURISTICS:first-available,regalloc-param=GI_SPILLING_HEURISTICS:flat-uses,regalloc-validate=1,cfg-cse-optimize=1,vectorize=1'
             check_arch: true
 
           - name: cfg-selection
diff --git a/Makefile.common-jst b/Makefile.common-jst
index 6784deee559..1b7d025762f 100644
--- a/Makefile.common-jst
+++ b/Makefile.common-jst
@@ -78,6 +78,11 @@ runtime-stdlib: boot-compiler
 # Dune does not believe the compiler can make .cmxs unless the following file exists.
 	@touch _build/runtime_stdlib_install/lib/ocaml_runtime_stdlib/dynlink.cmxa
 
+# This target is a polling version of "make runtime-stdlib"
+.PHONY: runtime-stdlib-hacking
+runtime-stdlib-hacking: boot-compiler
+	RUNTIME_DIR=$(RUNTIME_DIR) $(dune) build -w $(ws_runstd) --only-package=ocaml_runtime_stdlib @install
+
 compiler: runtime-stdlib
 	RUNTIME_DIR=$(RUNTIME_DIR) SYSTEM=$(SYSTEM) MODEL=$(MODEL) \
           ASPP="$(ASPP)" ASPPFLAGS="$(ASPPFLAGS)" \
diff --git a/backend/.ocamlformat-enable b/backend/.ocamlformat-enable
index c992d5b7245..ac2ffe9e3d8 100644
--- a/backend/.ocamlformat-enable
+++ b/backend/.ocamlformat-enable
@@ -3,15 +3,21 @@ amd64/selection.ml
 amd64/selection_utils.ml
 amd64/simd*.ml
 amd64/stack_check.ml
+amd64/vectorize_specific.ml
 arm64/cfg_selection.ml
 arm64/selection.ml
 arm64/selection_utils.ml
 arm64/simd*.ml
 arm64/stack_check.ml
+arm64/vectorize_specific.ml
 asm_targets/**/*.ml
 asm_targets/**/*.mli
 cfg/**/*.ml
 cfg/**/*.mli
+cfg/vectorize.ml
+cfg/vectorize.mli
+vectorize_utils.ml
+vectorize_utils.mli
 cfg_selectgen.ml
 cfg_selectgen.mli
 cfg_selection.mli
diff --git a/backend/amd64/arch.ml b/backend/amd64/arch.ml
index 96bb4df7214..8098cbf7158 100644
--- a/backend/amd64/arch.ml
+++ b/backend/amd64/arch.ml
@@ -288,8 +288,9 @@ let win64 =
   | "win64" | "mingw64" | "cygwin" -> true
   | _                   -> false
 
-(* Specific operations that are pure *)
 
+(* Specific operations that are pure *)
+(* Keep in sync with [Vectorize_specific] *)
 let operation_is_pure = function
   | Ilea _ | Ibswap _ | Isextend32 | Izextend32
   | Ifloatarithmem _  -> true
@@ -300,7 +301,7 @@ let operation_is_pure = function
   | Isimd op -> Simd.is_pure op
 
 (* Specific operations that can raise *)
-
+(* Keep in sync with [Vectorize_specific] *)
 let operation_can_raise = function
   | Ilea _ | Ibswap _ | Isextend32 | Izextend32
   | Ifloatarithmem _
@@ -309,6 +310,7 @@ let operation_can_raise = function
   | Istore_int (_, _, _) | Ioffset_loc (_, _)
   | Icldemote _ | Iprefetch _ -> false
 
+(* Keep in sync with [Vectorize_specific] *)
 let operation_allocates = function
   | Ilea _ | Ibswap _ | Isextend32 | Izextend32
   | Ifloatarithmem _
@@ -410,84 +412,107 @@ let equal_specific_operation left right =
 
 (* addressing mode functions *)
 
-let compare_addressing_mode_without_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) =
-  (* Ignores displ when comparing to show that it is possible to calculate the offset *)
+let equal_addressing_mode_without_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) =
+  (* Ignores [displ] when comparing to show that it is possible to calculate the offset,
+     see [addressing_offset_in_bytes]. *)
   match addressing_mode_1, addressing_mode_2 with
   | Ibased (symbol1, global1, _), Ibased (symbol2, global2, _) -> (
     match global1, global2 with
     | Global, Global | Local, Local ->
-      String.compare symbol1 symbol2
-    | Global, Local -> -1
-    | Local, Global -> 1)
-  | Ibased _, _ -> -1
-  | _, Ibased _ -> 1
-  | Iindexed _, Iindexed _ -> 0
-  | Iindexed _, _ -> -1
-  | _, Iindexed _ -> 1
-  | Iindexed2 _, Iindexed2 _ -> 0
-  | Iindexed2 _, _ -> -1
-  | _, Iindexed2 _ -> 1
-  | Iscaled (scale1, _), Iscaled (scale2, _) -> Int.compare scale1 scale2
-  | Iscaled _, _ -> -1
-  | _, Iscaled _ -> 1
+      String.equal symbol1 symbol2
+    | (Global | Local), _ -> false)
+  | Iindexed _, Iindexed _ -> true
+  | Iindexed2 _, Iindexed2 _ -> true
+  | Iscaled (scale1, _), Iscaled (scale2, _) -> Int.equal scale1 scale2
   | Iindexed2scaled (scale1, _), Iindexed2scaled (scale2, _) ->
-    Int.compare scale1 scale2
-
-let compare_addressing_mode_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) =
-  match addressing_mode_1, addressing_mode_2 with
-  | Ibased (symbol1, global1, n1), Ibased (symbol2, global2, n2) -> (
-    match global1, global2 with
-    | Global, Global | Local, Local ->
-      if symbol1 = symbol2 then Some (Int.compare n1 n2) else None
-    | Global, Local | Local, Global -> None)
-  | Iindexed n1, Iindexed n2 -> Some (Int.compare n1 n2)
-  | Iindexed2 n1, Iindexed2 n2 -> Some (Int.compare n1 n2)
-  | Iscaled (scale1, n1), Iscaled (scale2, n2) ->
-    let scale_compare = scale1 - scale2 in
-    if scale_compare = 0 then Some (Int.compare n1 n2) else None
-  | Iindexed2scaled (scale1, n1), Iindexed2scaled (scale2, n2) ->
-    let scale_compare = scale1 - scale2 in
-    if scale_compare = 0 then Some (Int.compare n1 n2) else None
-  | Ibased _, _ -> None
-  | Iindexed _, _ -> None
-  | Iindexed2 _, _ -> None
-  | Iscaled _, _ -> None
-  | Iindexed2scaled _, _ -> None
-
-let addressing_offset_in_bytes (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) =
+    Int.equal scale1 scale2
+  | (Ibased _ | Iindexed _ | Iindexed2 _ | Iscaled _ | Iindexed2scaled _), _ -> false
+
+let addressing_offset_in_bytes
+      (addressing_mode_1: addressing_mode)
+      (addressing_mode_2 : addressing_mode)
+      ~arg_offset_in_bytes
+      args_1
+      args_2
+  =
+  let address_arg_offset_in_bytes index =
+    arg_offset_in_bytes args_1.(index) args_2.(index)
+  in
   match addressing_mode_1, addressing_mode_2 with
-  | Ibased (symbol1, global1, n1), Ibased (symbol2, global2, n2) -> (
-    match global1, global2 with
-    | Global, Global | Local, Local ->
-      if symbol1 = symbol2 then Some (n2 - n1) else None
-    | Global, Local | Local, Global -> None)
-  | Iindexed n1, Iindexed n2 -> Some (n2 - n1)
-  | Iindexed2 n1, Iindexed2 n2 -> Some (n2 - n1)
+  | Ibased (symbol1, global1, n1), Ibased (symbol2, global2, n2) ->
+    (* symbol + displ *)
+    (match global1, global2 with
+     | Global, Global | Local, Local ->
+       if String.equal symbol1 symbol2 then Some (n2 - n1) else None
+     | Global, Local | Local, Global -> None)
+  | Iindexed n1, Iindexed n2 ->
+    (* reg + displ *)
+    (match address_arg_offset_in_bytes 0 with
+     | Some base_off -> Some (base_off + (n2 - n1))
+     | None -> None)
+  | Iindexed2 n1, Iindexed2 n2 ->
+    (* reg + reg + displ *)
+    (match address_arg_offset_in_bytes 0, address_arg_offset_in_bytes 1 with
+     | Some arg0_offset, Some arg1_offset ->
+       Some (arg0_offset + arg1_offset + (n2 - n1))
+     | (None, _|Some _, _) -> None)
   | Iscaled (scale1, n1), Iscaled (scale2, n2) ->
-    let scale_compare = scale1 - scale2 in
-    if scale_compare = 0 then Some (n2 - n1) else None
+    (* reg * scale + displ *)
+    if not (Int.compare scale1 scale2 = 0) then None
+    else
+      (match address_arg_offset_in_bytes 0 with
+       | Some offset -> Some ((offset * scale1) + (n2 - n1))
+       | None -> None)
   | Iindexed2scaled (scale1, n1), Iindexed2scaled (scale2, n2) ->
-    let scale_compare = scale1 - scale2 in
-    if scale_compare = 0 then Some (n2 - n1) else None
+    (* reg + reg * scale + displ *)
+    if not (Int.compare scale1 scale2 = 0) then None else
+      (match address_arg_offset_in_bytes 0, address_arg_offset_in_bytes 1 with
+       | Some arg0_offset, Some arg1_offset ->
+         Some (arg0_offset + (arg1_offset*scale1) + (n2 - n1))
+       | (None, _|Some _, _) -> None)
   | Ibased _, _ -> None
   | Iindexed _, _ -> None
   | Iindexed2 _, _ -> None
   | Iscaled _, _ -> None
   | Iindexed2scaled _, _ -> None
 
-  let can_cross_loads_or_stores (specific_operation : specific_operation) =
-    match specific_operation with
-    | Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Isimd _ | Icldemote _
-    | Iprefetch _ ->
-      false
-    | Ibswap _ | Isextend32 | Izextend32 | Irdtsc  | Irdpmc | Ilfence | Isfence | Imfence
-    | Ipause ->
-      true
-
-  let may_break_alloc_freshness (specific_operation : specific_operation) =
-    match specific_operation with
-    | Isimd _ -> true
-    | Ilea  _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ | Isextend32
-    | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence | Ipause | Icldemote _
-    | Iprefetch _ ->
-      false
+let isomorphic_specific_operation op1 op2 =
+  match op1, op2 with
+  | Ilea a1, Ilea a2 -> equal_addressing_mode_without_displ a1 a2
+  | Istore_int (_n1, a1, is_assign1), Istore_int (_n2, a2, is_assign2) ->
+    equal_addressing_mode_without_displ a1 a2 && Bool.equal is_assign1 is_assign2
+  | Ioffset_loc (_n1, a1), Ioffset_loc (_n2, a2) ->
+    equal_addressing_mode_without_displ a1 a2
+  | Ifloatarithmem (w1, o1, a1), Ifloatarithmem (w2, o2, a2) ->
+    Cmm.equal_float_width w1 w2 &&
+    equal_float_operation o1 o2 &&
+    equal_addressing_mode_without_displ a1 a2
+  | Ibswap { bitwidth = left }, Ibswap { bitwidth = right } ->
+    Int.equal (int_of_bswap_bitwidth left) (int_of_bswap_bitwidth right)
+  | Isextend32, Isextend32 ->
+    true
+  | Izextend32, Izextend32 ->
+    true
+  | Irdtsc, Irdtsc ->
+    true
+  | Irdpmc, Irdpmc ->
+    true
+  | Ilfence, Ilfence ->
+    true
+  | Isfence, Isfence ->
+    true
+  | Imfence, Imfence ->
+    true
+  | Ipause, Ipause -> true
+  | Icldemote x, Icldemote x' -> equal_addressing_mode_without_displ x x'
+  | Iprefetch { is_write = left_is_write; locality = left_locality; addr = left_addr; },
+    Iprefetch { is_write = right_is_write; locality = right_locality; addr = right_addr; } ->
+    Bool.equal left_is_write right_is_write
+    && equal_prefetch_temporal_locality_hint left_locality right_locality
+    && equal_addressing_mode_without_displ left_addr right_addr
+  | Isimd l, Isimd r ->
+    Simd.equal_operation l r
+  | (Ilea _ | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ |
+     Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence |
+     Ipause | Isimd _ | Icldemote _ | Iprefetch _), _ ->
+    false
diff --git a/backend/amd64/arch.mli b/backend/amd64/arch.mli
index 27278669b45..91e3bbf348a 100644
--- a/backend/amd64/arch.mli
+++ b/backend/amd64/arch.mli
@@ -140,14 +140,15 @@ val operation_allocates : specific_operation -> bool
 val float_cond_and_need_swap
   :  Lambda.float_comparison -> X86_ast.float_condition * bool
 
+val isomorphic_specific_operation : specific_operation -> specific_operation -> bool
 (* addressing mode functions *)
 
-val compare_addressing_mode_without_displ : addressing_mode -> addressing_mode -> int
+val equal_addressing_mode_without_displ : addressing_mode -> addressing_mode -> bool
 
-val compare_addressing_mode_displ : addressing_mode -> addressing_mode -> int option
-
-val addressing_offset_in_bytes : addressing_mode -> addressing_mode -> int option
-
-val can_cross_loads_or_stores : specific_operation -> bool
-
-val may_break_alloc_freshness : specific_operation -> bool
+val addressing_offset_in_bytes
+  : addressing_mode
+  -> addressing_mode
+  -> arg_offset_in_bytes:('a -> 'a -> int option)
+  -> 'a array
+  -> 'a array
+  -> int option
diff --git a/backend/amd64/simd_selection.ml b/backend/amd64/simd_selection.ml
index d33c8aa0e7a..2955d27e6de 100644
--- a/backend/amd64/simd_selection.ml
+++ b/backend/amd64/simd_selection.ml
@@ -467,3 +467,387 @@ let () =
   Location.register_error_of_exn (function
     | Error err -> Some (Location.error_of_printer_file report_error err)
     | _ -> None)
+
+(* Vectorize operations *)
+
+let vector_width_in_bits = 128
+
+(* CR-soon gyorsh: [vectorize_operation] is too long, refactor / split up. *)
+let vectorize_operation (width_type : Vectorize_utils.Width_in_bits.t)
+    ~arg_count ~res_count (cfg_ops : Operation.t list) :
+    Vectorize_utils.Vectorized_instruction.t list option =
+  (* Assumes cfg_ops are isomorphic *)
+  let width_in_bits = Vectorize_utils.Width_in_bits.to_int width_type in
+  let length = List.length cfg_ops in
+  assert (length * width_in_bits = vector_width_in_bits);
+  let same_width memory_chunk =
+    Vectorize_utils.Width_in_bits.equal width_type
+      (Vectorize_utils.Width_in_bits.of_memory_chunk memory_chunk)
+  in
+  let make_default ~arg_count ~res_count operation :
+      Vectorize_utils.Vectorized_instruction.t list option =
+    Some
+      [ Vectorize_utils.Vectorized_instruction.make_default ~arg_count
+          ~res_count operation ]
+  in
+  let create_const_vec consts =
+    let highs, lows = Misc.Stdlib.List.split_at (length / 2) consts in
+    let pack_int64 nums =
+      let mask =
+        Int64.shift_right_logical Int64.minus_one (64 - width_in_bits)
+      in
+      List.fold_left
+        (fun target num ->
+          Int64.logor
+            (Int64.shift_left target width_in_bits)
+            (Int64.logand num mask))
+        0L nums
+    in
+    Operation.Const_vec128 { high = pack_int64 highs; low = pack_int64 lows }
+    |> make_default ~arg_count:0 ~res_count:1
+  in
+  let add_op =
+    let sse_op =
+      match width_type with
+      | W128 -> assert false
+      | W64 -> Add_i64
+      | W32 -> Add_i32
+      | W16 -> Add_i16
+      | W8 -> Add_i8
+    in
+    Some (Operation.Specific (Isimd (SSE2 sse_op)))
+  in
+  let mul_op =
+    match width_type with
+    | W128 -> None
+    | W64 -> None
+    | W32 -> Some (Operation.Specific (Isimd (SSE41 Mullo_i32)))
+    | W16 -> Some (Operation.Specific (Isimd (SSE2 Mullo_i16)))
+    | W8 -> None
+  in
+  let vectorize_intop (intop : Simple_operation.integer_operation) =
+    match intop with
+    | Iadd -> Option.bind add_op (make_default ~arg_count ~res_count)
+    | Isub ->
+      let sse_op =
+        match width_type with
+        | W128 -> assert false
+        | W64 -> Sub_i64
+        | W32 -> Sub_i32
+        | W16 -> Sub_i16
+        | W8 -> Sub_i8
+      in
+      Operation.Specific (Isimd (SSE2 sse_op))
+      |> make_default ~arg_count ~res_count
+    | Imul -> Option.bind mul_op (make_default ~arg_count ~res_count)
+    | Imulh { signed } -> (
+      match width_type with
+      | W128 -> None
+      | W64 -> None
+      | W32 -> None
+      | W16 ->
+        if signed
+        then
+          Operation.Specific (Isimd (SSE2 Mulhi_i16))
+          |> make_default ~arg_count ~res_count
+        else
+          Operation.Specific (Isimd (SSE2 Mulhi_unsigned_i16))
+          |> make_default ~arg_count ~res_count
+      | W8 -> None)
+    | Iand ->
+      Operation.Specific (Isimd (SSE2 And_bits))
+      |> make_default ~arg_count ~res_count
+    | Ior ->
+      Operation.Specific (Isimd (SSE2 Or_bits))
+      |> make_default ~arg_count ~res_count
+    | Ixor ->
+      Operation.Specific (Isimd (SSE2 Xor_bits))
+      |> make_default ~arg_count ~res_count
+    | Ilsl ->
+      let sse_op =
+        match width_type with
+        | W128 -> assert false
+        | W64 -> SLL_i64
+        | W32 -> SLL_i32
+        | W16 -> SLL_i16
+        | W8 -> assert false
+      in
+      Operation.Specific (Isimd (SSE2 sse_op))
+      |> make_default ~arg_count ~res_count
+    | Ilsr ->
+      let sse_op =
+        match width_type with
+        | W128 -> assert false
+        | W64 -> SRL_i64
+        | W32 -> SRL_i32
+        | W16 -> SRL_i16
+        | W8 -> assert false
+      in
+      Operation.Specific (Isimd (SSE2 sse_op))
+      |> make_default ~arg_count ~res_count
+    | Iasr ->
+      let sse_op =
+        match width_type with
+        | W128 -> assert false
+        | W64 -> assert false
+        | W32 -> SRA_i32
+        | W16 -> SRA_i16
+        | W8 -> assert false
+      in
+      Operation.Specific (Isimd (SSE2 sse_op))
+      |> make_default ~arg_count ~res_count
+    | Icomp (Isigned intcomp) -> (
+      match intcomp with
+      | Ceq ->
+        let sse_op =
+          match width_type with
+          | W128 -> assert false
+          | W64 -> SSE41 Cmpeq_i64
+          | W32 -> SSE2 Cmpeq_i32
+          | W16 -> SSE2 Cmpeq_i16
+          | W8 -> SSE2 Cmpeq_i8
+        in
+        Operation.Specific (Isimd sse_op) |> make_default ~arg_count ~res_count
+      | Cgt ->
+        let sse_op =
+          match width_type with
+          | W128 -> assert false
+          | W64 -> SSE42 Cmpgt_i64
+          | W32 -> SSE2 Cmpgt_i32
+          | W16 -> SSE2 Cmpgt_i16
+          | W8 -> SSE2 Cmpgt_i8
+        in
+        Operation.Specific (Isimd sse_op) |> make_default ~arg_count ~res_count
+      | Cne | Clt | Cle | Cge ->
+        None
+        (* These instructions seem to not have a simd counterpart yet, could
+           also implement as a combination of other instructions if needed in
+           the future *))
+    | Idiv | Imod | Iclz _ | Ictz _ | Ipopcnt | Icomp (Iunsigned _) -> None
+  in
+  match List.hd cfg_ops with
+  | Move -> Operation.Move |> make_default ~arg_count ~res_count
+  | Const_int _ ->
+    let extract_const_int (op : Operation.t) =
+      match op with
+      | Const_int n -> Int64.of_nativeint n
+      | Move | Load _ | Store _ | Intop _ | Intop_imm _ | Specific _ | Alloc _
+      | Reinterpret_cast _ | Static_cast _ | Spill | Reload | Const_float32 _
+      | Const_float _ | Const_symbol _ | Const_vec128 _ | Stackoffset _
+      | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ | Opaque
+      | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll ->
+        assert false
+    in
+    assert (arg_count = 0 && res_count = 1);
+    let consts = List.map extract_const_int cfg_ops in
+    create_const_vec consts
+  | Load { memory_chunk; addressing_mode; mutability; is_atomic } ->
+    if not (same_width memory_chunk)
+    then None
+    else
+      let num_args_addressing = Arch.num_args_addressing addressing_mode in
+      assert (arg_count = num_args_addressing && res_count = 1);
+      let operation =
+        Operation.Load
+          { memory_chunk = Onetwentyeight_unaligned;
+            addressing_mode;
+            mutability;
+            is_atomic
+          }
+      in
+      Some
+        [ { operation;
+            arguments =
+              Array.init num_args_addressing (fun i ->
+                  Vectorize_utils.Vectorized_instruction.Original i);
+            results = [| Result 0 |]
+          } ]
+  | Store (memory_chunk, addressing_mode, is_assignment) ->
+    if not (same_width memory_chunk)
+    then None
+    else
+      let num_args_addressing = Arch.num_args_addressing addressing_mode in
+      assert (arg_count = num_args_addressing + 1 && res_count = 0);
+      let operation =
+        Operation.Store
+          (Onetwentyeight_unaligned, addressing_mode, is_assignment)
+      in
+      Some
+        [ { operation;
+            arguments =
+              Array.append
+                [| Vectorize_utils.Vectorized_instruction.Argument 0 |]
+                (Array.init num_args_addressing (fun i ->
+                     Vectorize_utils.Vectorized_instruction.Original (i + 1)));
+            results = [||]
+          } ]
+  | Intop intop -> vectorize_intop intop
+  | Intop_imm (intop, _) -> (
+    let extract_intop_imm_int (op : Operation.t) =
+      match op with
+      | Intop_imm (_, n) -> Int64.of_int n
+      | Move | Load _ | Store _ | Intop _ | Specific _ | Alloc _
+      | Reinterpret_cast _ | Static_cast _ | Spill | Reload | Const_int _
+      | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _
+      | Stackoffset _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _
+      | Opaque | Begin_region | End_region | Name_for_debugger _ | Dls_get
+      | Poll ->
+        assert false
+    in
+    let consts = List.map extract_intop_imm_int cfg_ops in
+    match create_const_vec consts, vectorize_intop intop with
+    | Some [const_instruction], Some [intop_instruction] ->
+      if Array.length const_instruction.results = 1
+         && Array.length intop_instruction.arguments = 2
+      then (
+        assert (arg_count = 1 && res_count = 1);
+        const_instruction.results.(0)
+          <- Vectorize_utils.Vectorized_instruction.New 0;
+        intop_instruction.arguments.(1)
+          <- Vectorize_utils.Vectorized_instruction.New 0;
+        Some [const_instruction; intop_instruction])
+      else None
+    | _ -> None)
+  | Specific op -> (
+    match op with
+    | Ilea addressing_mode -> (
+      let extract_scale_displ (op : Operation.t) =
+        match op with
+        | Specific spec_op -> (
+          match spec_op with
+          | Ilea addressing_mode -> (
+            match addressing_mode with
+            | Iindexed displ -> None, Some displ
+            | Iindexed2 displ -> None, Some displ
+            | Iscaled (scale, displ) -> Some scale, Some displ
+            | Iindexed2scaled (scale, displ) -> Some scale, Some displ
+            | Ibased _ -> None, None)
+          | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _
+          | Isextend32 | Izextend32 | Irdtsc | Irdpmc | Ilfence | Isfence
+          | Imfence | Ipause | Isimd _ | Iprefetch _ | Icldemote _ ->
+            assert false)
+        | Move | Load _ | Store _ | Intop _ | Intop_imm _ | Alloc _
+        | Reinterpret_cast _ | Static_cast _ | Spill | Reload | Const_int _
+        | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _
+        | Stackoffset _ | Intop_atomic _ | Floatop _ | Csel _
+        | Probe_is_enabled _ | Opaque | Begin_region | End_region
+        | Name_for_debugger _ | Dls_get | Poll ->
+          assert false
+      in
+      let get_scale op =
+        match extract_scale_displ op with
+        | Some scale, _ -> scale |> Int64.of_int
+        | _ -> assert false
+      in
+      let get_displ op =
+        match extract_scale_displ op with
+        | _, Some displ -> displ |> Int64.of_int
+        | _ -> assert false
+      in
+      let make_move arg res =
+        { Vectorize_utils.Vectorized_instruction.operation = Move;
+          arguments = [| arg |];
+          results = [| res |]
+        }
+      in
+      let make_binary_operation arg_0 arg_1 res operation =
+        { Vectorize_utils.Vectorized_instruction.operation;
+          arguments = [| arg_0; arg_1 |];
+          results = [| res |]
+        }
+      in
+      let make_const res consts =
+        match create_const_vec consts with
+        | Some [const_instruction] ->
+          assert (
+            Array.length const_instruction.arguments = 0
+            && Array.length const_instruction.results = 1);
+          const_instruction.results.(0) <- res;
+          const_instruction
+        | _ -> assert false
+      in
+      match addressing_mode with
+      | Iindexed _ -> (
+        match add_op with
+        | Some add ->
+          assert (arg_count = 1 && res_count = 1);
+          let displs = List.map get_displ cfg_ops in
+          (* reg + displ *)
+          Some
+            [ make_move (Argument 0) (Result 0);
+              make_const (New 0) displs;
+              make_binary_operation (Result 0) (New 0) (Result 0) add ]
+        | None -> None)
+      | Iindexed2 _ -> (
+        match add_op with
+        | Some add ->
+          assert (arg_count = 2 && res_count = 1);
+          let displs = List.map get_displ cfg_ops in
+          (* reg + reg + displ *)
+          Some
+            [ make_move (Argument 0) (Result 0);
+              make_binary_operation (Result 0) (Argument 1) (Result 0) add;
+              make_const (New 0) displs;
+              make_binary_operation (Result 0) (New 0) (Result 0) add ]
+        | None -> None)
+      | Iscaled _ -> (
+        match add_op, mul_op with
+        | Some add, Some mul ->
+          assert (arg_count = 1 && res_count = 1);
+          let scales = List.map get_scale cfg_ops in
+          let displs = List.map get_displ cfg_ops in
+          (* reg * scale + displ *)
+          Some
+            [ make_move (Argument 0) (Result 0);
+              make_const (New 0) scales;
+              make_binary_operation (Result 0) (New 0) (Result 0) mul;
+              make_const (New 1) displs;
+              make_binary_operation (Result 0) (New 1) (Result 0) add ]
+        | _ -> None)
+      | Iindexed2scaled _ -> (
+        match add_op, mul_op with
+        | Some add, Some mul ->
+          assert (arg_count = 2 && res_count = 1);
+          let scales = List.map get_scale cfg_ops in
+          let displs = List.map get_displ cfg_ops in
+          (* reg + reg * scale + displ *)
+          Some
+            [ make_move (Argument 1) (Result 0);
+              make_const (New 0) scales;
+              make_binary_operation (Result 0) (New 0) (Result 0) mul;
+              make_binary_operation (Result 0) (Argument 0) (Result 0) add;
+              make_const (New 1) displs;
+              make_binary_operation (Result 0) (New 1) (Result 0) add ]
+        | _ -> None)
+      | Ibased _ -> None)
+    | Isextend32 -> (
+      match width_type with
+      | W128 -> None
+      | W64 ->
+        Operation.Specific (Isimd (SSE41 I32_sx_i64))
+        |> make_default ~arg_count ~res_count
+      | W32 ->
+        None
+        (* If the upper bits of the original register containing the smaller
+           register is determined to be unused without relying on this file,
+           these can also be vectorized to be a move *)
+      | W16 -> None
+      | W8 -> None)
+    | Izextend32 -> (
+      match width_type with
+      | W128 -> None
+      | W64 ->
+        Operation.Specific (Isimd (SSE41 I32_zx_i64))
+        |> make_default ~arg_count ~res_count
+      | W32 -> None (* See previous comment *)
+      | W16 -> None
+      | W8 -> None)
+    | Istore_int _ | Ioffset_loc _ | Ifloatarithmem _ | Ibswap _ | Irdtsc
+    | Irdpmc | Ilfence | Isfence | Imfence | Ipause | Isimd _ | Iprefetch _
+    | Icldemote _ ->
+      None)
+  | Alloc _ | Reinterpret_cast _ | Static_cast _ | Spill | Reload
+  | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _
+  | Stackoffset _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _
+  | Opaque | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll ->
+    None
diff --git a/backend/amd64/vectorize_specific.ml b/backend/amd64/vectorize_specific.ml
new file mode 100644
index 00000000000..2f535ca2834
--- /dev/null
+++ b/backend/amd64/vectorize_specific.ml
@@ -0,0 +1,53 @@
+(* Keep in sync with [Arch.operation_is_pure], [Arch.operation_can_raise],
+   [Arch.operation_allocates]. *)
+module Memory_access = Vectorize_utils.Memory_access
+
+let memory_access : Arch.specific_operation -> Memory_access.t option =
+ fun op ->
+  let create ?first_memory_arg_index desc =
+    Some (Memory_access.create ?first_memory_arg_index desc)
+  in
+  match op with
+  | Istore_int (_n, addressing_mode, is_assignment) ->
+    let desc =
+      Memory_access.Write
+        { width_in_bits = W64;
+          addressing_mode;
+          init_or_assign =
+            (if is_assignment then Assignment else Initialization)
+        }
+    in
+    create ~first_memory_arg_index:0 desc
+  | Ifloatarithmem (float_width, _float_op, addressing_mode) ->
+    let width_in_bits : Vectorize_utils.Width_in_bits.t =
+      match float_width with Float64 -> W64 | Float32 -> W32
+    in
+    let is_mutable =
+      (* CR-someday gyorsh: conservative, propagate mutability of Ifloatarithmem
+         from selection to make it precise. *)
+      true
+    in
+    let desc =
+      Memory_access.Read
+        { width_in_bits; addressing_mode; is_mutable; is_atomic = false }
+    in
+    create ~first_memory_arg_index:1 desc
+  | Ioffset_loc (_n, addressing_mode) ->
+    let desc =
+      Memory_access.Read_and_write
+        { width_in_bits = W64; addressing_mode; is_atomic = false }
+    in
+    create desc
+  | Iprefetch { is_write = _; locality = _; addr = _ } ->
+    (* Conservative, to prevent reordering anything around this instruction.
+       Using [addressing_mode] is tricky because it need not be the start of the
+       prefetch cache line and the interval would depend on cache line size. *)
+    create Memory_access.Arbitrary
+  | Icldemote _ | Irdtsc | Irdpmc | Ilfence | Isfence | Imfence | Ipause ->
+    (* Conservative, don't reorder around timing or ordering instructions. *)
+    create Memory_access.Arbitrary
+  | Isimd op ->
+    (* Conservative. we don't have any simd operations with memory operations at
+       the moment. *)
+    if Simd.is_pure op then None else create Memory_access.Arbitrary
+  | Ilea _ | Ibswap _ | Isextend32 | Izextend32 -> None
diff --git a/backend/arm64/arch.ml b/backend/arm64/arch.ml
index 8b83c064fc6..d8a4a1adafb 100644
--- a/backend/arm64/arch.ml
+++ b/backend/arm64/arch.ml
@@ -213,6 +213,9 @@ let equal_specific_operation left right =
     | Imuladd | Imulsub | Inegmulf | Imuladdf | Inegmuladdf | Imulsubf
     | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _), _ -> false
 
+let isomorphic_specific_operation op1 op2 =
+  equal_specific_operation op1 op2
+
 (* Recognition of logical immediate arguments *)
 
 (* An automaton to recognize ( 0+1+0* | 1+0+1* )
@@ -333,32 +336,12 @@ let operation_allocates = function
   | Ibswap _ -> false
 
 (* See `amd64/arch.ml`. *)
-
-let compare_addressing_mode_without_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) =
+let equal_addressing_mode_without_displ (addressing_mode_1: addressing_mode)
+      (addressing_mode_2 : addressing_mode) =
   match addressing_mode_1, addressing_mode_2 with
-  | Iindexed _, Iindexed _ -> 0
-  | Iindexed _ , _ -> -1
-  | _, Iindexed _ -> 1
-  | Ibased (var1, _), Ibased (var2, _) -> String.compare var1 var2
+  | Iindexed _, Iindexed _ -> true
+  | Ibased (var1, _), Ibased (var2, _) -> String.equal var1 var2
+  | (Iindexed _ | Ibased _), _ -> false
 
-let compare_addressing_mode_displ (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) =
-  match addressing_mode_1, addressing_mode_2 with
-  | Iindexed n1, Iindexed n2 -> Some (Int.compare n1 n2)
-  | Ibased (var1, n1), Ibased (var2, n2) ->
-    if String.compare var1 var2 = 0 then Some (Int.compare n1 n2) else None
-  | Iindexed _ , _ -> None
-  | Ibased _ , _ -> None
-
-let addressing_offset_in_bytes (addressing_mode_1: addressing_mode) (addressing_mode_2 : addressing_mode) = None
-
-let can_cross_loads_or_stores (specific_operation : specific_operation) =
-  match specific_operation with
-  | Ifar_poll _ | Ifar_alloc _ | Ishiftarith _ | Imuladd | Imulsub | Inegmulf | Imuladdf
-  | Inegmuladdf | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _ ->
-    true
-
-let may_break_alloc_freshness (specific_operation : specific_operation) =
-  match specific_operation with
-  | Ifar_poll _ | Ifar_alloc _ | Ishiftarith _ | Imuladd | Imulsub | Inegmulf | Imuladdf
-  | Inegmuladdf | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _ ->
-    false
+let addressing_offset_in_bytes _ _  ~arg_offset_in_bytes:_  _ _ =
+  None   (* conservative *)
diff --git a/backend/arm64/arch.mli b/backend/arm64/arch.mli
index d4292c59b24..e542b5df5dd 100644
--- a/backend/arm64/arch.mli
+++ b/backend/arm64/arch.mli
@@ -116,14 +116,15 @@ val operation_allocates : specific_operation -> bool
 
 val operation_can_raise : specific_operation -> bool
 
-(* See `amd64/arch.mli`. *)
-
-val compare_addressing_mode_without_displ : addressing_mode -> addressing_mode -> int
-
-val compare_addressing_mode_displ : addressing_mode -> addressing_mode -> int option
+val isomorphic_specific_operation : specific_operation -> specific_operation -> bool
 
-val addressing_offset_in_bytes : addressing_mode -> addressing_mode -> int option
-
-val can_cross_loads_or_stores : specific_operation -> bool
-
-val may_break_alloc_freshness : specific_operation -> bool
+(* See `amd64/arch.mli`. *)
+val equal_addressing_mode_without_displ : addressing_mode -> addressing_mode -> bool
+
+val addressing_offset_in_bytes
+  : addressing_mode
+  -> addressing_mode
+  -> arg_offset_in_bytes:('a -> 'a -> int option)
+  -> 'a array
+  -> 'a array
+  -> int option
diff --git a/backend/arm64/simd_selection.ml b/backend/arm64/simd_selection.ml
index 5fae7825965..3e18e247129 100644
--- a/backend/arm64/simd_selection.ml
+++ b/backend/arm64/simd_selection.ml
@@ -19,3 +19,11 @@
 let select_operation _ = None
 
 let pseudoregs_for_operation _ arg res = arg, res
+
+(* See `amd64/simd_selection.ml`. *)
+
+let vector_width_in_bits = 128
+
+let vectorize_operation _ ~arg_count:_ ~res_count:_ (_ : Operation.t list) :
+    Vectorize_utils.Vectorized_instruction.t list option =
+  None
diff --git a/backend/arm64/vectorize_specific.ml b/backend/arm64/vectorize_specific.ml
new file mode 100644
index 00000000000..5eb1ff3886e
--- /dev/null
+++ b/backend/arm64/vectorize_specific.ml
@@ -0,0 +1,21 @@
+(* Keep in sync with [Arch.operation_is_pure], [Arch.operation_can_raise],
+   [Arch.operation_allocates]. *)
+module Memory_access = Vectorize_utils.Memory_access
+
+let memory_access : Arch.specific_operation -> Memory_access.t option =
+ fun op ->
+  let create ?first_memory_arg_index desc =
+    Some (Memory_access.create ?first_memory_arg_index desc)
+  in
+  match op with
+  | Ifar_poll _ ->
+    (* Conservative, don't reorder across poll instructions. In practice, there
+       are not many poll instructions present at this stage, because poll
+       insertion pass currently happens after vectorize. *)
+    create Arbitrary
+  | Ifar_alloc _ -> create Alloc
+  | Ishiftarith _ | Imuladd | Imulsub | Inegmulf | Imuladdf | Inegmuladdf
+  | Imulsubf | Inegmulsubf | Isqrtf | Ibswap _ | Imove32 | Isignext _ ->
+    (* Conservative. we don't have any specific operations with memory
+       operations at the moment. *)
+    if Arch.operation_is_pure op then None else create Memory_access.Arbitrary
diff --git a/backend/cfg/cfg_polling.ml b/backend/cfg/cfg_polling.ml
index 4e9cf43df9f..d148cef743c 100644
--- a/backend/cfg/cfg_polling.ml
+++ b/backend/cfg/cfg_polling.ml
@@ -194,7 +194,10 @@ let instr_cfg_with_layout :
   let cfg = Cfg_with_layout.cfg cfg_with_layout in
   Cfg_loop_infos.EdgeSet.fold
     (fun { Cfg_loop_infos.Edge.src; dst } added_poll ->
-      let needs_poll = exists_unsafe_path cfg ~safe_map ~from:dst ~to_:src in
+      let needs_poll =
+        (not (Label.Tbl.find safe_map src))
+        && exists_unsafe_path cfg ~safe_map ~from:dst ~to_:src
+      in
       if needs_poll
       then (
         let after = Cfg.get_block_exn cfg src in
diff --git a/backend/cfg/cfg_with_layout.ml b/backend/cfg/cfg_with_layout.ml
index 92612c422ac..7815c31bc41 100644
--- a/backend/cfg/cfg_with_layout.ml
+++ b/backend/cfg/cfg_with_layout.ml
@@ -131,8 +131,11 @@ let dump ppf t ~msg =
   let print_block label =
     let block = Label.Tbl.find t.cfg.blocks label in
     fprintf ppf "\n%a:\n" Label.format label;
-    DLL.iter ~f:(fprintf ppf "%a\n" Cfg.print_basic) block.body;
-    Cfg.print_terminator ppf block.terminator;
+    let pp_with_id ppf ~pp (instr : _ Cfg.instruction) =
+      fprintf ppf "(id:%d) %a\n" instr.id pp instr
+    in
+    DLL.iter ~f:(pp_with_id ppf ~pp:Cfg.print_basic) block.body;
+    pp_with_id ppf ~pp:Cfg.print_terminator block.terminator;
     fprintf ppf "\npredecessors:";
     Label.Set.iter (fprintf ppf " %a" Label.format) block.predecessors;
     fprintf ppf "\nsuccessors:";
diff --git a/backend/cfg/vectorize.ml b/backend/cfg/vectorize.ml
index 10eede1b022..a225018a57f 100644
--- a/backend/cfg/vectorize.ml
+++ b/backend/cfg/vectorize.ml
@@ -2,62 +2,216 @@
 
 (* Finds independent scalar operations within the same basic block and tries to
    use vector operations if possible *)
-(* CR-soon tip: add documentation *)
+(* CR gyorsh: how does the info from [reg_map] flow between blocks? *)
 
 module DLL = Flambda_backend_utils.Doubly_linked_list
 
-let ( << ) f g x = f (g x)
+module State : sig
+  type t
+
+  type live_regs = Reg.Set.t
+
+  val create : Format.formatter -> Cfg_with_layout.t -> t
+
+  val next_available_instruction : t -> int
+
+  val liveness : t -> int -> live_regs
+
+  val dump_debug : t -> ('a, Format.formatter, unit) format -> 'a
+
+  val dump : t -> ('a, Format.formatter, unit) format -> 'a
+
+  val extra_debug : bool
+
+  val fun_name : t -> string
+
+  val fun_dbg : t -> Debuginfo.t
+end = struct
+  type t =
+    { ppf_dump : Format.formatter;
+      mutable max_instruction_id : int;
+      cfg_with_infos : Cfg_with_infos.t Lazy.t;
+      cfg_with_layout : Cfg_with_layout.t
+    }
+
+  type live_regs = Reg.Set.t
+
+  let fun_name t = Cfg.fun_name (Cfg_with_layout.cfg t.cfg_with_layout)
+
+  let fun_dbg t = (Cfg_with_layout.cfg t.cfg_with_layout).fun_dbg
+
+  let next_available_instruction t =
+    let id = t.max_instruction_id + 1 in
+    t.max_instruction_id <- id;
+    id
+
+  let init_instructon_max_id cl =
+    (* CR gyorsh: Duplicated from backend/regalloc/regalloc_utils.ml. Should
+       probably move it to Cfg or Cfg_with_infos. *)
+    let max_id = ref Int.min_int in
+    let update_max_id (instr : _ Cfg.instruction) : unit =
+      max_id := Int.max !max_id instr.id
+    in
+    Cfg_with_layout.iter_instructions cl ~instruction:update_max_id
+      ~terminator:update_max_id;
+    !max_id
+
+  let create ppf_dump cl =
+    (* CR-someday tip: the function may someday take a cfg_with_infos instead of
+       creating a new one *)
+    { ppf_dump;
+      max_instruction_id = init_instructon_max_id cl;
+      cfg_with_layout = cl;
+      cfg_with_infos = lazy (Cfg_with_infos.make cl)
+    }
+
+  let liveness t id =
+    Cfg_with_infos.(liveness_find (Lazy.force t.cfg_with_infos) id).before
+
+  let extra_debug = true
+
+  let dump_if c t =
+    if c && !Flambda_backend_flags.dump_vectorize
+    then Format.fprintf t.ppf_dump
+    else Format.ifprintf t.ppf_dump
+
+  let dump_debug t = dump_if extra_debug t
+
+  let dump t = dump_if true t
+end
+
+module Substitution : sig
+  (* CR-someday gyorsh: should be factored out with
+     [Regalloc_utils.Substitution]. *)
+  type t
+
+  val create : int -> t
+
+  val get_reg_exn : t -> Reg.t -> Reg.t
+
+  val get_reg_opt : t -> Reg.t -> Reg.t option
+
+  (* CR gyorsh: for SSA *)
+  (* (** [fresh_reg t r typ] assumes that [r] is not mapped, creates a fresh register [r'] of
+   *     type [typ] and maps [r] to [r']. *)
+   * val fresh_reg : t -> Reg.t -> Cmm.machtype_component -> Reg.t *)
 
-let vector_width_in_bytes = 16
+  (** [fresh_reg_for_pack t pack typ] assumes that none of the registers in [pack] are
+      mapped, creates a fresh register [r'] of type [typ] and maps all registers in [pack]
+      to [r]. *)
+  val fresh_reg_for_pack : t -> Reg.t list -> Cmm.machtype_component -> unit
+end = struct
+  type t = Reg.t Reg.Tbl.t
+
+  let create n = Reg.Tbl.create n
+
+  let get_reg_exn t (reg : Reg.t) = Reg.Tbl.find t reg
+
+  let get_reg_opt t (reg : Reg.t) = Reg.Tbl.find_opt t reg
+
+  let fresh_reg t reg machtype_component =
+    match get_reg_opt t reg with
+    | None ->
+      let new_reg = Reg.create machtype_component in
+      Reg.Tbl.add t reg new_reg;
+      new_reg
+    | Some old_reg ->
+      Misc.fatal_errorf "register %a is already mapped to %a" Printreg.reg reg
+        Printreg.reg old_reg
+
+  let fresh_reg_for_pack t regs machtype_component =
+    match regs with
+    | [] ->
+      Misc.fatal_error "State.fresh_reg_for_group: expects a non-empty group."
+    | hd :: tl ->
+      let new_reg = fresh_reg t hd machtype_component in
+      let add reg =
+        match get_reg_opt t reg with
+        | None -> Reg.Tbl.add t reg new_reg
+        | Some old_reg ->
+          if Reg.same new_reg old_reg
+          then
+            (* same register may appear multiple times in the group, this is
+               fine here but may not be desirable in the client code and should
+               be checked for there. *)
+            Misc.fatal_errorf
+              "fresh_reg_for_group: duplicate register %a in the group"
+              Printreg.reg reg
+          else
+            Misc.fatal_errorf
+              "fresh_reg_for_group: register %a is already mapped to %a"
+              Printreg.reg reg Printreg.reg old_reg
+      in
+      List.iter add tl
+end
 
 module Instruction : sig
   (* CR-someday tip: consider moving this to cfg or at least have something
      similar there *)
+  (* CR gyorsh: We don't really need the terminator when vectorizing basic
+     blocks only, and removing it would simplify the code and reduce
+     allocation. *)
   module Id : sig
     type t
 
     include Identifiable.S with type t := t
-
-    val to_int : t -> int
   end
 
-  type t =
-    | Basic of Cfg.basic Cfg.instruction
-    | Terminator of Cfg.terminator Cfg.instruction
+  type t
+
+  val basic : Cfg.basic Cfg.instruction -> t
+
+  val terminator : Cfg.terminator Cfg.instruction -> t
 
   val id : t -> Id.t
 
+  val equal_id : t -> t -> bool
+
   val arguments : t -> Reg.t Array.t
 
   val results : t -> Reg.t Array.t
 
   val destroyed : t -> Reg.t Array.t
 
-  val print : Format.formatter -> t -> unit
+  val op : t -> Operation.t option
 
-  val is_store : t -> bool
+  val have_isomorphic_op : t -> t -> bool
 
-  val is_alloc : t -> bool
+  val stack_offset : t -> int
+
+  val print : Format.formatter -> t -> unit
 
-  val can_cross_loads_or_stores : t -> bool
+  val print_id : Format.formatter -> t -> unit
 
-  val may_break_alloc_freshness : t -> bool
+  val copy :
+    Cfg.basic Cfg.instruction ->
+    arg:Reg.t array ->
+    res:Reg.t array ->
+    id:int ->
+    desc:Cfg.basic ->
+    Cfg.basic Cfg.instruction
 end = struct
   module Id = struct
     include Numbers.Int
 
-    let to_int t = t
+    let print ppf t = Format.fprintf ppf "(id:%d)" t
   end
 
   type t =
     | Basic of Cfg.basic Cfg.instruction
     | Terminator of Cfg.terminator Cfg.instruction
 
+  let basic i = Basic i
+
+  let terminator i = Terminator i
+
   let id (instruction : t) : Id.t =
     match instruction with
     | Basic instruction -> instruction.id
     | Terminator instruction -> instruction.id
 
+  let equal_id t1 t2 = Id.equal (id t1) (id t2)
+
   let arguments (instruction : t) : Reg.t Array.t =
     match instruction with
     | Basic instruction -> instruction.arg
@@ -73,808 +227,2857 @@ end = struct
     | Basic instruction -> Proc.destroyed_at_basic instruction.desc
     | Terminator instruction -> Proc.destroyed_at_terminator instruction.desc
 
-  let print ppf (instruction : t) : unit =
+  let stack_offset (instruction : t) : int =
     match instruction with
-    | Basic i -> Cfg.print_basic ppf i
-    | Terminator i -> Cfg.print_terminator ppf i
+    | Basic instruction -> instruction.stack_offset
+    | Terminator instruction -> instruction.stack_offset
 
-  let is_store (instruction : t) =
+  let op (instruction : t) : Operation.t option =
     match instruction with
     | Basic basic_instruction -> (
       let desc = basic_instruction.desc in
       match desc with
-      | Op op -> (
-        match op with
-        | Store _ -> true
-        | Load _ | Alloc _ | Move | Reinterpret_cast _ | Static_cast _ | Spill
-        | Reload | Const_int _ | Const_float32 _ | Const_float _
-        | Const_symbol _ | Const_vec128 _ | Stackoffset _ | Intop _
-        | Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _
-        | Opaque | Begin_region | End_region | Specific _ | Name_for_debugger _
-        | Dls_get | Poll ->
-          false)
-      | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> false
-      )
-    | Terminator _ -> false
+      | Op op -> Some op
+      | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> None)
+    | Terminator _ -> None
+
+  let copy (i : Cfg.basic Cfg.instruction) ~arg ~res ~id ~desc =
+    { i with desc; arg; res; id }
+
+  (* [op_isomorphic] is used to identify candidate operations that may be
+     grouped into a vector operation. It returns true if two operations have the
+     same opcode but allows different constant arguments or different constant
+     displacement in the address calculation. Subsequent checks ensure that the
+     corresponding vector instruction is supported by the target architecture,
+     where additional constraints on the constants may be needed (see
+     [Operation.is_adjacent] and [Simd_selection.vectorize_operation]. It is
+     safe to return [false] conservatively, but will prevent vectorization.
+     [Arch.isomorphic_specific_operation] may also conservatively return false
+     even if some operations are isomorphic. [Arch.equal_specific_operation] can
+     be used as a conservative approximation of [Arch.isomorphic_specific
+     operation], for example for SIMD operations with different constants, such
+     as [Simd.Clmul_64] in amd64. *)
+  let op_isomorphic (op1 : Operation.t) (op2 : Operation.t) =
+    match op1, op2 with
+    | Move, Move | Spill, Spill | Reload, Reload -> true
+    | Const_int _, Const_int _
+    | Const_float32 _, Const_float32 _
+    | Const_float _, Const_float _
+    | Const_symbol _, Const_symbol _
+    | Const_vec128 _, Const_vec128 _ ->
+      true
+    | ( Load
+          { memory_chunk = memory_chunk1;
+            addressing_mode = addressing_mode1;
+            mutability = mutability1;
+            is_atomic = is_atomic1
+          },
+        Load
+          { memory_chunk = memory_chunk2;
+            addressing_mode = addressing_mode2;
+            mutability = mutability2;
+            is_atomic = is_atomic2
+          } ) ->
+      Cmm.equal_memory_chunk memory_chunk1 memory_chunk2
+      && Arch.equal_addressing_mode_without_displ addressing_mode1
+           addressing_mode2
+      && mutability1 = mutability2 && is_atomic1 = is_atomic2
+    | ( Store (memory_chunk1, addressing_mode1, is_assignment1),
+        Store (memory_chunk2, addressing_mode2, is_assignment2) ) ->
+      Cmm.equal_memory_chunk memory_chunk1 memory_chunk2
+      && Arch.equal_addressing_mode_without_displ addressing_mode1
+           addressing_mode2
+      && is_assignment1 = is_assignment2
+    | Intop intop1, Intop intop2 ->
+      Simple_operation.equal_integer_operation intop1 intop2
+    | Intop_imm (intop1, _), Intop_imm (intop2, _) ->
+      Simple_operation.equal_integer_operation intop1 intop2
+    | Floatop (width1, floatop1), Floatop (width2, floatop2) ->
+      Simple_operation.equal_float_width width1 width2
+      && Simple_operation.equal_float_operation floatop1 floatop2
+    | Specific specific_operation1, Specific specific_operation2 ->
+      Arch.isomorphic_specific_operation specific_operation1 specific_operation2
+    | Move, _
+    | Spill, _
+    | Reload, _
+    | Const_int _, _
+    | Const_float32 _, _
+    | Const_float _, _
+    | Const_symbol _, _
+    | Const_vec128 _, _
+    | Stackoffset _, _
+    | Load _, _
+    | Store _, _
+    | Intop _, _
+    | Intop_imm _, _
+    | Intop_atomic _, _
+    | Floatop _, _
+    | Csel _, _
+    | Reinterpret_cast _, _
+    | Static_cast _, _
+    | Probe_is_enabled _, _
+    | Opaque, _
+    | Begin_region, _
+    | End_region, _
+    | Specific _, _
+    | Name_for_debugger _, _
+    | Dls_get, _
+    | Poll, _
+    | Alloc _, _ ->
+      false
+
+  let have_isomorphic_op instruction1 instruction2 =
+    match op instruction1, op instruction2 with
+    | Some op1, Some op2 -> op_isomorphic op1 op2
+    | _ -> false
+
+  let print ppf t =
+    match t with
+    | Basic i -> Cfg.print_basic ppf i
+    | Terminator i -> Cfg.print_terminator ppf i
 
-  let is_alloc (instruction : t) =
-    match instruction with
-    | Basic basic_instruction -> (
-      let desc = basic_instruction.desc in
-      match desc with
-      | Op op -> (
-        match op with
-        | Alloc _ -> true
-        | Load _ | Store _ | Move | Reinterpret_cast _ | Static_cast _ | Spill
-        | Reload | Const_int _ | Const_float32 _ | Const_float _
-        | Const_symbol _ | Const_vec128 _ | Stackoffset _ | Intop _
-        | Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _
-        | Opaque | Begin_region | End_region | Specific _ | Name_for_debugger _
-        | Dls_get | Poll ->
-          false)
-      | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> false
-      )
-    | Terminator _ -> false
-
-  let can_cross_loads_or_stores (instruction : t) =
-    (* CR-someday tip: some instructions may or may not cause issues for going
-       across a load or a store, for simplicity's sake, let's just return false
-       and not let them go across for now, but better handling can be added in
-       the future. Also, loads from an immuntable block has no coeffects and may
-       have less restrictions *)
-    match instruction with
-    | Basic basic_instruction -> (
-      let desc = basic_instruction.desc in
-      match desc with
-      | Op op -> (
-        match op with
-        | Load _ | Store _ | Intop_atomic _ | Alloc _ | Poll | Opaque
-        | Begin_region | End_region ->
-          false
-        | Specific specific_operation ->
-          Arch.can_cross_loads_or_stores specific_operation
-        | Move | Reinterpret_cast _ | Static_cast _ | Spill | Reload
-        | Const_int _ | Const_float32 _ | Const_float _ | Const_symbol _
-        | Const_vec128 _ | Stackoffset _ | Intop _ | Intop_imm _ | Floatop _
-        | Csel _ | Probe_is_enabled _ | Name_for_debugger _ | Dls_get ->
-          true)
-      | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> true)
-    | Terminator _ -> false
-
-  let may_break_alloc_freshness (instruction : t) =
-    match instruction with
-    | Basic basic_instruction -> (
-      let desc = basic_instruction.desc in
-      match desc with
-      | Op op -> (
-        match op with
-        | Load _ | Store _ -> true
-        | Specific specific_operation ->
-          Arch.may_break_alloc_freshness specific_operation
-        | Alloc _ | Move | Reinterpret_cast _ | Static_cast _ | Spill | Reload
-        | Const_int _ | Const_float32 _ | Const_float _ | Const_symbol _
-        | Const_vec128 _ | Stackoffset _ | Intop _ | Intop_imm _
-        | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ | Opaque
-        | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll ->
-          false)
-      | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ -> false
-      )
-    | Terminator _ -> false
+  let print ppf t = Format.fprintf ppf "%a %a" Id.print (id t) print t
+
+  let print_id ppf t = Format.fprintf ppf "%a" Id.print (id t)
 end
 
-module Dependency_graph : sig
-  (* The dependency graph shows dependencies between instructions within the
-     same basic block *)
+module Block : sig
   type t
 
-  val from_block : Cfg.basic_block -> t
+  val create : Cfg.basic_block -> State.t -> t
 
-  val get_all_dependencies_of_arg :
-    t -> Instruction.Id.t -> arg_i:int -> Instruction.Id.Set.t
+  val body : t -> Cfg.basic_instruction_list
 
-  val dump : Format.formatter -> t -> Cfg.basic_block -> unit
-end = struct
-  module Node = struct
-    module Reg_node = struct
-      type t =
-        { reg : Reg.t;
-          direct_dependency : Instruction.Id.t option
-              (* the most recent instruction in this basic block that may change
-                 the value of the argument *)
-        }
+  val terminator : t -> Instruction.t
 
-      let init reg : t = { reg; direct_dependency = None }
-    end
+  (** original size of the block before vectorization  *)
+  val size : t -> int
 
-    type t =
-      { instruction : Instruction.t;
-        reg_nodes : Reg_node.t array;
-        direct_dependencies : Instruction.Id.Set.t;
-            (* direct dependencies of all arguments of this instruction *)
-        all_dependencies : Instruction.Id.Set.t;
-            (* direct dependencies of this instruction and all dependencies of
-               each direct dependency of this instruction *)
-        is_direct_dependency_of : Instruction.Id.Set.t
-            (* all instructions that have this instruction as a direct
-               dependency *)
-      }
+  val find : t -> Instruction.Id.t -> Instruction.t
 
-    let init instruction : t =
-      let arguments = Instruction.arguments instruction in
-      { instruction;
-        reg_nodes =
-          Array.init (Array.length arguments) (fun i ->
-              arguments.(i) |> Reg_node.init);
-        direct_dependencies = Instruction.Id.Set.empty;
-        all_dependencies = Instruction.Id.Set.empty;
-        is_direct_dependency_of = Instruction.Id.Set.empty
-      }
-  end
+  (** [find_last_instruction t instrs] returns instruction [i]
+      from [instrs] such that [i] appears after
+      all other instructions from [instrs] according to the order of instructions
+      in this basic block.  Raises if [instrs] is empty. *)
+  val find_last_instruction : t -> Instruction.Id.t list -> Instruction.t
+
+  val get_live_regs_before_terminator : t -> State.live_regs
+
+  val state : t -> State.t
+
+  val reg_map : t -> Substitution.t
+
+  val start : t -> Label.t
+
+  (** [pos t id] returns the original position of [id] instruction within the body of [t].
+      Raises if [id] is not in the body. *)
+  val pos : t -> Instruction.Id.t -> int
+end = struct
+  type t =
+    { block : Cfg.basic_block;
+      id_to_instructions : Instruction.t Instruction.Id.Tbl.t;
+      id_to_body_pos : int Instruction.Id.Tbl.t;
+      size : int;
+      state : State.t;
+      reg_map : Substitution.t
+    }
+
+  let state t = t.state
 
-  type t = Node.t Instruction.Id.Tbl.t
+  let body t = t.block.body
 
-  let add = Instruction.Id.Tbl.add
+  let terminator t = Instruction.terminator t.block.terminator
 
-  let find = Instruction.Id.Tbl.find
+  let size t = t.size
 
-  let replace = Instruction.Id.Tbl.replace
+  let start (t : t) = t.block.start
 
-  let init ~size : t = Instruction.Id.Tbl.create size
+  let reg_map t = t.reg_map
 
-  let get_all_dependencies dependency_graph id =
-    let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in
-    node.all_dependencies
+  let find t id = Instruction.Id.Tbl.find t.id_to_instructions id
 
-  let get_all_dependencies_of_arg dependency_graph id ~arg_i =
-    let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in
-    match node.reg_nodes.(arg_i).direct_dependency with
-    | None -> Instruction.Id.Set.empty
-    | Some direct_dependency ->
-      get_all_dependencies dependency_graph direct_dependency
-      |> Instruction.Id.Set.add direct_dependency
+  let pos t id = Instruction.Id.Tbl.find t.id_to_body_pos id
 
-  let from_block (block : Cfg.basic_block) =
-    let dependency_graph = init ~size:(DLL.length block.body) in
-    let is_changed_in instruction reg =
-      Array.exists (Reg.same reg) (Instruction.results instruction)
-      || Array.exists (Reg.same reg) (Instruction.destroyed instruction)
+  let create (block : Cfg.basic_block) state =
+    let size = DLL.length block.body + 1 in
+    let id_to_instructions = Instruction.Id.Tbl.create size in
+    let add i =
+      Instruction.Id.Tbl.add id_to_instructions (Instruction.id i) i
     in
-    let latest_change ~(current : Instruction.Id.t) (reg : Reg.t) =
-      let starting_cell =
-        match
-          DLL.find_cell_opt block.body ~f:(fun instruction ->
-              Basic instruction |> Instruction.id
-              |> Instruction.Id.equal current)
-        with
-        | None -> DLL.last_cell block.body
-        | Some current_cell -> DLL.prev current_cell
-      in
-      let rec find_latest_change cell_option =
+    DLL.iter block.body ~f:(fun i -> add (Instruction.basic i));
+    add (Instruction.terminator block.terminator);
+    let id_to_body_pos = Instruction.Id.Tbl.create size in
+    DLL.iteri block.body ~f:(fun pos i ->
+        Instruction.Id.Tbl.add id_to_body_pos
+          (Instruction.id (Instruction.basic i))
+          pos);
+    let reg_map = Substitution.create 3 in
+    { block; size; id_to_instructions; id_to_body_pos; state; reg_map }
+
+  let get_live_regs_before_terminator t =
+    State.liveness t.state t.block.terminator.id
+
+  let find_last_instruction t instructions =
+    let instruction_set = Instruction.Id.Set.of_list instructions in
+    let terminator = terminator t in
+    if Instruction.Id.Set.mem (Instruction.id terminator) instruction_set
+    then terminator
+    else
+      let body = t.block.body in
+      let rec find_last cell_option =
         match cell_option with
-        | None -> None
+        | None ->
+          Misc.fatal_errorf "Vectorizer.find_last_instruction in block %a"
+            Label.print t.block.start ()
         | Some cell ->
-          let instruction = Instruction.Basic (DLL.value cell) in
-          if is_changed_in instruction reg
-          then Some instruction
-          else find_latest_change (DLL.prev cell)
+          let current_instruction = Instruction.basic (DLL.value cell) in
+          let current_instruction_id = Instruction.id current_instruction in
+          if Instruction.Id.Set.exists
+               (Instruction.Id.equal current_instruction_id)
+               instruction_set
+          then current_instruction
+          else find_last (DLL.prev cell)
       in
-      find_latest_change starting_cell
-    in
-    let add_arg_dependency instruction arg_i arg =
-      let id = Instruction.id instruction in
-      let dependency = latest_change ~current:id arg in
-      let node = find dependency_graph id in
-      let reg_node = node.reg_nodes.(arg_i) in
-      node.reg_nodes.(arg_i)
-        <- { reg_node with
-             direct_dependency =
-               Option.fold ~none:None
-                 ~some:(Option.some << Instruction.id)
-                 dependency
-           }
-    in
-    let add_arg_dependencies (instruction : Instruction.t) =
-      Array.iteri
-        (add_arg_dependency instruction)
-        (Instruction.arguments instruction)
-    in
-    let add_dependencies (instruction : Instruction.t) =
-      let id = Instruction.id instruction in
-      add dependency_graph id (Node.init instruction);
-      add_arg_dependencies instruction;
-      let arg_indices =
-        Instruction.arguments instruction |> Array.mapi (fun arg_i _ -> arg_i)
+      find_last (DLL.last_cell body)
+end
+
+(* CR-someday gyorsh: Dependencies computed below can be used for other
+   optimizations, not only vectorization. For example, peephole optimizations
+   within a basic block can replace reorder or replace instructions that are not
+   consecutive, as long as the transformation preserves dependencies. *)
+
+(**
+   Construct an overapproximation of transitive dependencies between instructions, and
+   use this information to identify instructions that can run in parallel and can be
+   reordered.
+
+   Approach:
+   =========
+
+   Record a direct dependency from instruction [i] to [j] if one of the following
+   conditions holds:
+
+   (a) Dependency via registers: [i] may read from a register [r] a value that was
+   previously written to [r] by instruction [j].
+
+   (b) Dependency via memory: [i] may read to a memory location that [j] may
+   write to (RAW memory dependency).
+
+   (c) Ordering via memory: [i] and [j] may access the same memory location,
+   and one of the accesses is a "write" (this covers WAR and WAW dependencies,
+   as well as dependencies of reads and writes on allocation).
+
+   Vectorizable computation
+   ========================
+   TODO add citation
+
+   The method for constructing vectorizable computations.
+   - Group: a sequence of scalar instructions that are independent of each other, and have
+   an equivalent vector instruction. Most groups correspond to one vector instruction,
+   but sometimes a sequence of vector instructions is needed, for example scalar addition
+   of a register and a constant.
+   - Current heuristic for identifying vectorizable instructions requires that memory
+   accesses of scalar instructions are adjacent and have the same base address and
+   addressing mode.
+   - Seed: a group of store instructions.
+   - Vectorizable computation is set of groups of scalar instructions that can be replaced
+   with equivalent vector instructions.
+   - Find vectorizable computations: starting from a seed group, traverse the dependencies
+   backwards to construct groups for each of the arguments. Stop at load instructions (and
+   constant arguments).
+   - Use (a) during the backward traversal to find scalar instruction need to form
+   a group.
+   - Use both (a) and (b) dependencies to determine whether scalar instructions can run in
+   parallel and therefore can form a group.
+   - Use (c) to determine the placement of vector instructions for each group in the
+   block, relative to other instructions. Order constraints detemine when scalar
+   instructions can be executed.
+   - Key: conservatively choose the position to the last scalar instruction of the group
+   to place the vector instructions for the group.
+   - Conservatively require that all ordering constraints from a scalar instruction in a
+   vectorizable computation is to instructions that appear earlier in the block,
+   before the first scalar instruction of the computation.
+   - Conservatively require that nothing depends (via a,b,c)
+   on scalar instructions in the computation.
+
+   Heuristics
+   ==========
+   Identify disjoint memory accesses to improve precision of memory dependency and
+   ordering overapproximation. Use the following observations:
+
+   (1) A freshly-allocated memory block is disjoint from all previously-allocated blocks.
+
+   A crude version of points-to analysis with allocation site abstraction is sufficient to
+   track this information.
+
+   (2) Different offsets from the same base address.
+
+   If both memory accesses use the same base address, and offsets from the base address
+   can statically be shown to refer to non-overlapping address intervals.
+
+   Two memory accesses have the same base address if they use the same register for the
+   base address, and the register contains the same value, i.e., there are no writes to
+   this register between the two memory accesses.
+
+   (3) Two valid ocaml blocks are disjoint, except for closure blocks.
+
+   A register points to a valid ocaml block iff [Reg.typ] is [Val].
+
+   For two different registers that point to valid ocaml blocks, one of the following
+   conditions holds:
+    - the registers point to the same block, or
+    - the registers point to disjoint blocks, or
+    - the registers point to closure blocks.
+
+   To rule out closure blocks, we use the following observation.
+
+   Closures cannot be modified after initialization. All stores to closure blocks emitted
+   by the ocaml compiler are initializing stores.  Therefore, if a register that points to
+   a valid ocaml block is used in an address computation of a non-initializing store, then
+   the block it points to is not a closure block.
+
+   The type system guarantees that there is no pointer arithmetic between valid ocaml
+   blocks: an address computed as an offset from a register that points to a valid ocaml
+   block cannot point to a different valid ocaml block, except for closure blocks.
+
+   If two registers (a) point to valid ocaml blocks, (b) one of the blocks is not a
+   closure block, and (c) a memory access uses one of the registers as base address,
+   then a memory access that uses the other base address is disjoint if the accesses
+   refer to non-overlapping address intervals (similarly to (2)).
+
+   Overview of the implementation
+   ==============================
+
+   - Partition: represents a set of memory blocks that are disjoint from memory blocks
+   represented by other partitions. A memory block need not be on the ocaml heap.
+   - Points-to graph: for each partition, track the set of partitions it may point to.
+   - Initial partition [unknown] represents all other memory blocks, previously allocated on
+   the ocaml heap or elsewhere, and it may point to itself.
+   - Fresh allocations: Track that a partition represents exactly one ocaml block. Such a
+   partion is a result of an allocation instruction, and known to be disjoint from all
+   other partitions, and initially has no valid pointers to or from other partitions.
+   - Aliases: For each register, track the set of partitions it may point to.
+   - Memory accesses: For each instruction, identify the set of partitions that the
+   instruction may access for read or write.
+   - For each partition, track the set of instructions that may read from it or write
+   to it.
+   - For each instruction [i] that may write to partition [p], and each instruction [j] that
+   appears before [i] in the basic block and may access the same partition [p] for read
+   or write, if heuristic conditions (2) and (3) above do not hold, then add a direct
+   dependency from [i] to [j]. Note that it will add dependency from Load to Store.
+
+   Computing aliases and points-to graph
+   =====================================
+
+   - Initially, all registers may point to [unknown] partition.
+   - If a register is clobbered, its partition is reset to empty, representing the fact
+   that it does not contain a valid pointer and it is illegal for the program to access
+   memory that this register may point to.
+   - If a register is assigned a static constant, its partition is reset to [unknown],
+   representing the fact that a constant may point to statically allocated memory, but it
+   is illegal to use it to access any partitions freshly allocated on the OCaml heap.
+   - If a register is used as RHS of a store (i.e., the value to store), add edges
+   between the corresponding partitions: the partitions of the LHS of the store (i.e., the
+   address arguments of the store) may point to the partitions of RHS.
+   - If register [r] is used as a LHS of a load (i.e., result of the load), then after the
+   load [r] may point to partition [p'] if [p] is an RHS of this load (i.e., address
+   arguments of the load) may point to, and [p] may point to [p'].
+   - For all other assignments, propagate points-to information from arguments to results.
+   - Edges between partitions can be added but not removed (i.e., no strong update
+   of the points-to graph). Strong updates of aliases within a basic block are safe.
+
+   Complexity
+   ========
+   - The number of partitions is n+1 where n is the number of allocation instruction in
+   the block.
+   - Tracking of memory accesses of each partition is quadratic in [n] in the worst case,
+   but in practice should be much smaller.
+   - Single pass on the block's body to construct memory dependencies.
+
+*)
+module Dependencies : sig
+  (** Dependencies between basic instructions within the same basic block *)
+  type t
+
+  val from_block : Block.t -> t
+
+  val get_direct_dependency_of_reg :
+    t -> Instruction.Id.t -> Reg.t -> Instruction.Id.t option
+
+  val get_direct_dependency_of_arg :
+    t -> Instruction.Id.t -> arg_i:int -> Instruction.Id.t option
+
+  val get_direct_dependencies : t -> Instruction.Id.t -> Instruction.Id.Set.t
+
+  val independent : t -> Instruction.t -> Instruction.t -> bool
+
+  (** [all_independent t l] returns true when all instructions in [l] are pairwise
+      independent.  *)
+  val all_independent : t -> Instruction.t list -> bool
+
+  (** [all_adjacent t l] raises if [l] are not all memory operations. *)
+  val all_adjacent : t -> Instruction.t list -> bool
+
+  val for_all_memory_dependencies :
+    t -> f:(Instruction.Id.t -> Instruction.Id.t -> bool) -> bool
+
+  (* (** [width_in_bits t i] raises if [i] is not a memory operation or width is not known,
+   *     e.g., Arbitrary or Alloc operations. *)
+   * val width_in_bits : t -> Instruction.t -> int *)
+
+  (* CR gyorsh: find a better way to pass the state around. it's mostly used for
+     debug printing, so we could just pass the [ppf] around, but not only - the
+     other two use is for reg_map. It's not yet needed across blocks, so need
+     not be passed around. Maybe we should split printing util out of state and
+     pass ppf around, then pass the state to only the places that actually need
+     it. *)
+  val state : t -> State.t
+
+  module Memory : sig
+    module Operation : sig
+      type t
+
+      val first_memory_arg_index : t -> int
+    end
+  end
+
+  val get_memory_operation : t -> Instruction.t -> Memory.Operation.t option
+
+  (* CR gyorsh: output assorted dependency graphs and points-to graph in dot
+     format, it'll be very useful for debugging. Not clear how to control it on
+     a per-block basis. *)
+end = struct
+  module Reaching_definitions : sig
+    type t
+
+    val from_block : Block.t -> t
+
+    (** [get t cur_id reg] For register [r], return [id] of instruction that defines [r]
+        and there is no other instruction that defines [r] or destroys/clobbers [r] on any
+        path from [id] to the current program point, i.e., the program point immediately
+        before [cur_id]. An instruction defines [r] means that [r] is a "result" register
+        that the instruction writes to, not clobbers it.  Returns [None] if there is no
+        definition of [r] in the block, or the definition of [r] is clobbered
+        prior to the current program point.  *)
+    val get : t -> Instruction.Id.t -> Reg.t -> Instruction.Id.t option
+
+    val dump : Format.formatter -> block:Block.t -> t -> unit
+  end = struct
+    module D = struct
+      type t = Instruction.Id.t Reg.Map.t
+
+      let get t reg = Reg.Map.find_opt reg t
+
+      let dump ppf t =
+        let open Format in
+        Reg.Map.iter
+          (fun reg id ->
+            fprintf ppf "%a defined by instruction %a@." Printreg.reg reg
+              Instruction.Id.print id)
+          t
+    end
+
+    type t = D.t Instruction.Id.Tbl.t
+
+    let from_block block =
+      let t = Block.size block |> Instruction.Id.Tbl.create in
+      let f map instruction =
+        (* record current value *)
+        let instruction = Instruction.basic instruction in
+        let id = Instruction.id instruction in
+        Instruction.Id.Tbl.add t id map;
+        (* transform *)
+        let remove map reg_array =
+          Array.fold_left (fun m r -> Reg.Map.remove r m) map reg_array
+        in
+        let add map reg_array id =
+          Array.fold_left (fun m r -> Reg.Map.add r id m) map reg_array
+        in
+        let map = remove map (Instruction.destroyed instruction) in
+        let map = add map (Instruction.results instruction) id in
+        map
       in
-      let direct_dependencies =
-        Array.fold_left
-          (fun dependencies arg_i ->
-            Option.fold ~none:dependencies
-              ~some:(fun dependency ->
-                Instruction.Id.Set.add dependency dependencies)
-              (find dependency_graph id).reg_nodes.(arg_i).direct_dependency)
-          Instruction.Id.Set.empty arg_indices
+      let init =
+        (* ignore definitions outside the block *)
+        Reg.Map.empty
       in
-      let all_dependencies =
-        Instruction.Id.Set.fold
-          (fun new_id old_indirect_dependencies ->
-            let node = Instruction.Id.Tbl.find dependency_graph new_id in
-            Instruction.Id.Set.union node.direct_dependencies
-              old_indirect_dependencies)
-          direct_dependencies direct_dependencies
+      let map = DLL.fold_left (Block.body block) ~init ~f in
+      let terminator_id = Block.terminator block |> Instruction.id in
+      Instruction.Id.Tbl.add t terminator_id map;
+      t
+
+    let get t id reg =
+      let d = Instruction.Id.Tbl.find t id in
+      D.get d reg
+
+    let dump ppf ~(block : Block.t) (t : t) =
+      let open Format in
+      DLL.iter (Block.body block) ~f:(fun instruction ->
+          let instruction = Instruction.basic instruction in
+          let id = Instruction.id instruction in
+          fprintf ppf "Reaching definitions after instruction %a:@.%a@."
+            Instruction.Id.print id D.dump
+            (Instruction.Id.Tbl.find t id))
+  end
+
+  (* [Reaching_definitions] wouldn't be needed here if we had SSA
+     representation. Converting to SSA even at a basic block level would require
+     some change in emit because instruction selection (a) relies on sharing of
+     registers between arguments and results to emit shorter encodings of
+     instructions such as Add and Shift, (b) forces the use of certain hardware
+     registers, for example Div and Bswap. *)
+  module Reg_defined_at_instruction : sig
+    type t
+
+    val create : Reg.t -> Instruction.t -> Reaching_definitions.t -> t
+
+    val equal : t -> t -> bool
+
+    val is_val : t -> bool
+
+    val get_offset : t -> t -> Block.t -> Reaching_definitions.t -> int option
+
+    val print : Format.formatter -> t -> unit
+  end = struct
+    type t =
+      { reg : Reg.t;
+        def : Instruction.Id.t option
+      }
+
+    let print ppf t =
+      let pp ppf def =
+        match def with
+        | None -> Format.fprintf ppf "(unknown)"
+        | Some id -> Instruction.Id.print ppf id
       in
-      let node = find dependency_graph id in
-      replace dependency_graph id
-        { node with direct_dependencies; all_dependencies }
-    in
-    let add_all_dependencies () =
-      DLL.iter block.body ~f:(fun instruction ->
-          add_dependencies (Basic instruction));
-      add_dependencies (Terminator block.terminator)
-    in
-    let set_is_dependency_of instruction_id dependency_id =
-      let dependency = find dependency_graph dependency_id in
-      replace dependency_graph dependency_id
-        { dependency with
-          is_direct_dependency_of =
-            Instruction.Id.Set.add instruction_id
-              dependency.is_direct_dependency_of
-        }
-    in
-    let set_is_dependency_of_plural (instruction : Instruction.t) =
+      Format.fprintf ppf "%a at %a" Printreg.reg t.reg pp t.def
+
+    let init reg id reaching_definitions =
+      let def = Reaching_definitions.get reaching_definitions id reg in
+      { reg; def }
+
+    let create reg instruction reaching_definitions =
       let id = Instruction.id instruction in
-      let node = find dependency_graph id in
-      Instruction.Id.Set.iter (set_is_dependency_of id) node.direct_dependencies
-    in
-    let set_all_is_dependency_of () =
-      DLL.iter block.body ~f:(fun instruction ->
-          set_is_dependency_of_plural (Basic instruction));
-      set_is_dependency_of_plural (Terminator block.terminator)
-    in
-    add_all_dependencies ();
-    set_all_is_dependency_of ();
-    dependency_graph
+      init reg id reaching_definitions
 
-  let dump ppf (t : t) (block : Cfg.basic_block) =
-    let open Format in
-    let print_reg_node arg_i (reg_node : Node.Reg_node.t) =
-      let dependency =
-        Option.fold ~none:"none"
-          ~some:(sprintf "instruction %d" << Instruction.Id.to_int)
-          reg_node.direct_dependency
+    let equal { reg = r1; def = d1 } { reg = r2; def = d2 } =
+      Reg.same r1 r2 && Option.equal Instruction.Id.equal d1 d2
+
+    let is_val t = Cmm.is_val t.reg.typ
+
+    let get_offset t1 t2 block reaching_definitions =
+      (* Heuristic to identify some very simple relations of the form [t2 = t1 +
+         N]. This heuristic is relatively cheap, because it only follows a
+         single use-def chain, backwards from [t2] until it finds [t1], and
+         returns the accumulated offset N such that [t2 = t1 + N]. *)
+      let add n acc =
+        (* [acc = None] indicates that the accumulator is not initialized. *)
+        match acc with None -> Some n | Some n' -> Some (n + n')
       in
-      fprintf ppf "argument %d, %a depends on %s\n" arg_i Printreg.reg
-        reg_node.reg dependency
-    in
-    let print_node (instruction : Instruction.t) =
-      let id = Instruction.id instruction in
-      let node = find t id in
-      fprintf ppf "\n%d:\n"
-        (Instruction.id node.instruction |> Instruction.Id.to_int);
-      Instruction.print ppf instruction;
-      fprintf ppf "\ndirect dependencies:\n";
-      Instruction.Id.Set.iter
-        (fprintf ppf "%d " << Instruction.Id.to_int)
-        node.direct_dependencies;
-      fprintf ppf "\nall dependencies:\n";
-      Instruction.Id.Set.iter
-        (fprintf ppf "%d " << Instruction.Id.to_int)
-        node.all_dependencies;
-      fprintf ppf "\nis direct dependency of:\n";
-      Instruction.Id.Set.iter
-        (fprintf ppf "%d " << Instruction.Id.to_int)
-        node.is_direct_dependency_of;
-      fprintf ppf "\narg dependencies:\n";
-      Array.iteri print_reg_node node.reg_nodes;
-      fprintf ppf "\n"
-    in
-    fprintf ppf "\ndependency graph:\n";
-    DLL.iter block.body ~f:(fun instruction -> print_node (Basic instruction));
-    print_node (Terminator block.terminator);
-    fprintf ppf "\n"
-end
+      let rec loop ~cur ~dst acc =
+        (* If [cur] does not depend on [dst], or an operation is encountered
+           that cannot be expressed as a constant offset, return None *)
+        if equal cur dst
+        then add 0 acc
+        else
+          match cur.def with
+          | None ->
+            (* [cur] defined outside the block, so it is not defined as an
+               offset of [dst]. *)
+            None
+          | Some id -> (
+            let next =
+              let instruction = Block.find block id in
+              match Instruction.arguments instruction with
+              | [| reg |] -> (
+                match Instruction.op instruction with
+                | None -> None
+                | Some op -> (
+                  match[@warning "-fragile-match"] op with
+                  | Move | Spill | Reload -> Some (reg, 0)
+                  | Intop_imm (Iadd, n) -> Some (reg, n)
+                  | Intop_imm (Isub, n) -> Some (reg, -n)
+                  | _ -> None))
+              | _ -> None
+            in
+            match next with
+            | None -> None
+            | Some (reg, n) ->
+              let acc = add n acc in
+              let cur = init reg id reaching_definitions in
+              loop ~cur ~dst acc)
+      in
+      (* CR-someday gyorsh: add the symmetric loop r with a negative offset? *)
+      (* The backward traversal starts from [t2]. *)
+      loop ~cur:t2 ~dst:t1 None
+  end
 
-module Memory_accesses : sig
-  module Memory_operation : sig
+  (* CR-someday gyorsh: [Memory] can be merged with [Reaching_definitions] but
+     let's do it separately first for simplicity. *)
+  module Memory : sig
     type t
 
-    val instruction : t -> Instruction.t
+    module Operation : sig
+      type t
 
-    val is_adjacent : t -> t -> bool
+      val first_memory_arg_index : t -> int
+    end
 
-    val width : t -> int
+    module Dependencies : sig
+      type t
 
-    val dump : Format.formatter -> t -> unit
-  end
+      val get : t -> Instruction.Id.t -> Instruction.Id.Set.t
 
-  type t
+      val for_all :
+        t -> f:(Instruction.Id.t -> Instruction.Id.t -> bool) -> bool
 
-  val stores : t -> Instruction.Id.t list
+      val dump : Format.formatter -> block:Block.t -> t -> unit
+    end
 
-  val get_memory_operation_exn : t -> Instruction.Id.t -> Memory_operation.t
+    val from_block : Block.t -> Reaching_definitions.t -> t * Dependencies.t
 
-  val from_block : Cfg.basic_block -> t
+    (* val is_memory_operation : t -> Instruction.t -> bool *)
 
-  val can_cross : t -> Instruction.t -> Instruction.t -> bool
+    val get_memory_operation : t -> Instruction.t -> Operation.t option
 
-  val dump : Format.formatter -> t -> unit
-end = struct
-  module Memory_operation = struct
-    type op =
-      | Load
-      | Store
+    val is_adjacent :
+      t ->
+      Instruction.t ->
+      Instruction.t ->
+      Block.t ->
+      Reaching_definitions.t ->
+      bool
 
-    type t =
-      { op : op;
-        memory_chunk : Cmm.memory_chunk;
-        addressing_mode : Arch.addressing_mode;
-        instruction : Instruction.t;
-        dependent_allocs : Instruction.Id.Set.t;
-        unsure_allocs : Instruction.Id.Set.t
-      }
+    val dump : Format.formatter -> block:Block.t -> t -> unit
+  end = struct
+    module Partition : sig
+      include Identifiable.S
 
-    let instruction t = t.instruction
+      (** Abstraction of all previously allocated blocks of memory.  *)
+      val unknown : t
 
-    let init (instruction : Instruction.t) : t option =
-      match instruction with
-      | Basic basic_instruction -> (
-        let desc = basic_instruction.desc in
-        match desc with
-        | Op op -> (
-          match op with
-          | Load { memory_chunk; addressing_mode; _ } ->
-            Some
-              { op = Load;
-                memory_chunk;
-                addressing_mode;
-                instruction;
-                dependent_allocs = Instruction.Id.Set.empty;
-                unsure_allocs = Instruction.Id.Set.empty
-              }
-          | Store (memory_chunk, addressing_mode, _) ->
-            Some
-              { op = Store;
-                memory_chunk;
-                addressing_mode;
-                instruction;
-                dependent_allocs = Instruction.Id.Set.empty;
-                unsure_allocs = Instruction.Id.Set.empty
-              }
-          | Specific _ ->
-            None
-            (* CR-someday tip: may need to rewrite a lot of code to handle loads
-               and stores inside [Specific] in the future *)
-          | Move | Reinterpret_cast _ | Static_cast _ | Spill | Reload
-          | Const_int _ | Const_float32 _ | Const_float _ | Const_symbol _
-          | Const_vec128 _ | Stackoffset _ | Intop _ | Intop_imm _
-          | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _ | Opaque
-          | Begin_region | End_region | Name_for_debugger _ | Dls_get | Poll
-          | Alloc _ ->
-            None)
-        | Reloadretaddr | Pushtrap _ | Poptrap | Prologue | Stack_check _ ->
-          None)
-      | Terminator _ -> None
+      (** Represents a block of memory that was freshly allocated at the
+          [allocation_site]. *)
+      val create : allocation_site:Instruction.Id.t -> t
 
-    let memory_arguments (t : t) =
-      let arguments = Instruction.arguments t.instruction in
-      match t.op with
-      | Load -> arguments
-      | Store -> Array.sub arguments 1 (Array.length arguments - 1)
+      (* (** [init] is the singleton set of [unknown] partition.  *)
+       * val init : Set.t *)
+      module Set : sig
+        include module type of Set
 
-    let width (t : t) = Cmm.width_in_bytes t.memory_chunk
+        val unknown : t
+      end
+    end = struct
+      module S = struct
+        type t = Instruction.Id.t option
 
-    let print_memory_chunk ppf (t : t) =
-      Format.fprintf ppf "%s (length %d)"
-        (Printcmm.chunk t.memory_chunk)
-        (Cmm.width_in_bytes t.memory_chunk)
+        let compare t1 t2 = Option.compare Instruction.Id.compare t1 t2
 
-    let dump ppf (t : t) =
-      let open Format in
-      let instruction = t.instruction in
-      let print_set ppf set =
-        Instruction.Id.Set.iter
-          (fun id -> fprintf ppf "%d " (Instruction.Id.to_int id))
-          set
-      in
-      fprintf ppf
-        "\n\
-         Instruction %d: %a (%a, %a)\n\
-        \ dependent allocs: %a\n\
-        \ unsure_allocs: %a"
-        (Instruction.id instruction |> Instruction.Id.to_int)
-        Instruction.print instruction print_memory_chunk t
-        (Arch.print_addressing Printreg.reg t.addressing_mode)
-        (memory_arguments t) print_set t.dependent_allocs print_set
-        t.unsure_allocs
-
-    let compare_arguments (t1 : t) (t2 : t) =
-      let arguments_1 = memory_arguments t1 in
-      let arguments_2 = memory_arguments t2 in
-      Array.combine arguments_1 arguments_2
-      |> Array.fold_left
-           (fun result ((arg1, arg2) : Reg.t * Reg.t) ->
-             if result = 0 then Reg.compare arg1 arg2 else result)
-           0
-
-    let compare_addressing_modes_and_arguments (t1 : t) (t2 : t) =
-      let addressing_mode_comparison =
-        Arch.compare_addressing_mode_without_displ t1.addressing_mode
-          t2.addressing_mode
-      in
-      if addressing_mode_comparison = 0
-      then
-        let arguments_comparison = compare_arguments t1 t2 in
-        arguments_comparison
-      else addressing_mode_comparison
+        let equal t1 t2 = compare t1 t2 = 0
 
-    let offset_in_bytes (t1 : t) (t2 : t) =
-      let addressing_mode_and_arguments_comparison =
-        compare_addressing_modes_and_arguments t1 t2
-      in
-      if addressing_mode_and_arguments_comparison = 0
-      then Arch.addressing_offset_in_bytes t1.addressing_mode t2.addressing_mode
-      else None
-
-    let is_adjacent (t1 : t) (t2 : t) =
-      let res =
-        if Cmm.equal_memory_chunk t1.memory_chunk t2.memory_chunk
-        then
-          let width = Cmm.width_in_bytes t1.memory_chunk in
-          let offset_option = offset_in_bytes t1 t2 in
-          match offset_option with
-          | None -> false
-          | Some offset -> width = offset
-        else false
-      in
-      res
+        let hash t =
+          match t with None -> 0 | Some id -> Instruction.Id.hash id
 
-    let index_offset t = match t.op with Load -> 0 | Store -> 1
-  end
+        let print ppf t =
+          match t with
+          | None -> Format.fprintf ppf "unknown"
+          | Some id ->
+            Format.fprintf ppf "alloc_site=%a" Instruction.Id.print id
 
-  type t =
-    { loads : Instruction.Id.t list;
-      stores : Instruction.Id.t list;
-      memory_operations : Memory_operation.t Instruction.Id.Tbl.t
-    }
+        let output oc t = Printf.fprintf oc "%s" (Format.asprintf "%a" print t)
+      end
 
-  let stores t = t.stores
+      include S
+      include Identifiable.Make (S)
 
-  let get_memory_operation_exn t id =
-    Instruction.Id.Tbl.find t.memory_operations id
+      let unknown = None
 
-  type alloc_tracker =
-    { loads : Instruction.Id.t list;
-      stores : Instruction.Id.t list;
-      fresh_allocs : Instruction.Id.Set.t;
-      stored_allocs : Instruction.Id.Set.t;
-      unsure_allocs : Instruction.Id.Set.t
-    }
+      let create ~allocation_site = Some allocation_site
 
-  let from_block (block : Cfg.basic_block) : t =
-    (* A heuristic to avoid treating the same "fresh" allocation which address
-       stored and loaded into a different register as different, has room for
-       improvement. Assumption: if x depends on a fresh allocation, and it is
-       certain that y does not depend on that fresh allocation, then they point
-       to disjoint addresses *)
-    (* At each load or store instruction, it keeps track of all allocs up to
-       this point in this basic block and puts them in one of 3 categories:
-       [fresh_allocs]: nothing that depends on the address of the fresh alloc
-       has been saved as a value; [stored_allocs]: something that depends on the
-       address of the fresh alloc has been saved as a value, but nothing has
-       been loaded till this point; [unsure_allocs]: something that depends on
-       the address of the fresh alloc has been saved as a value, and something
-       has been loaded till this point. For each memory operation, we will save
-       its dependent allocs and unsure allocs *)
-    let dependency_graph = Dependency_graph.from_block block in
-    let id_to_instructions =
-      DLL.to_list block.body
-      |> List.map (fun basic_instruction ->
-             let instruction = Instruction.Basic basic_instruction in
-             Instruction.id instruction, instruction)
-      |> Instruction.Id.Tbl.of_list
-    in
-    let memory_operations = Instruction.Id.Tbl.create (DLL.length block.body) in
-    let ({ loads; stores; _ } : alloc_tracker) =
-      DLL.fold_left block.body
-        ~f:
-          (fun { loads; stores; fresh_allocs; stored_allocs; unsure_allocs }
-               basic_instruction ->
-          let instruction = Instruction.Basic basic_instruction in
-          let id = Instruction.id instruction in
-          if Instruction.is_alloc instruction
-          then
-            { loads;
-              stores;
-              fresh_allocs = Instruction.Id.Set.add id fresh_allocs;
-              stored_allocs;
-              unsure_allocs
-            }
-          else
-            let memory_operation = Memory_operation.init instruction in
-            match memory_operation with
-            | None ->
-              if Instruction.may_break_alloc_freshness instruction
-              then
-                { loads;
-                  stores;
-                  fresh_allocs = Instruction.Id.Set.empty;
-                  stored_allocs = Instruction.Id.Set.empty;
-                  unsure_allocs =
-                    Instruction.Id.Set.union fresh_allocs stored_allocs
-                    |> Instruction.Id.Set.union unsure_allocs
+      module Set = struct
+        include Set
+
+        let unknown = singleton unknown
+      end
+    end
+
+    module Memory_access = Vectorize_utils.Memory_access
+
+    module Operation : sig
+      type t
+
+      (* Reaching_definitions is used here to disambiguate register names. It
+         wouldn't be needed here if we had SSA representation. *)
+      val create : Instruction.t -> Reaching_definitions.t -> t option
+
+      val desc : t -> Memory_access.desc
+
+      val address_args : t -> Reg.t array
+
+      val non_address_args : t -> Reg.t array
+
+      val first_memory_arg_index : t -> int
+
+      val get_instruction_id : t -> Instruction.Id.t
+
+      (** [is_adjacent t1 t2] assumes that [t1] and [t2] have isomorphic operations,
+          and conservatively returns [false] unless it can prove that [t1] and [t2]
+          are adjacent (i.e., the accesses are disjoint but the intervals of addresses
+          have no gap between them). *)
+      val is_adjacent : t -> t -> Block.t -> Reaching_definitions.t -> bool
+
+      (** [is_disjoint t1 t2] assumes that [t1] and [t2] have isomorphic operations,
+          and conservatively returns [false] unless it can prove that [t1] and [t2]
+          access disjoint memory addresses.   *)
+      val is_disjoint : t -> t -> Block.t -> Reaching_definitions.t -> bool
+
+      val dump : Format.formatter -> t -> unit
+    end = struct
+      type t =
+        { memory_access : Memory_access.t;
+          instruction : Instruction.t;
+          (* CR-soon gyorsh: the instruction is only for validation and
+             debugging. We don't actually need to keep the little subarrays of
+             args around, we can define iterators on the original arguments
+             array instead, at least for all current uses. Not worth optimizing
+             these little allocations at this point. [address_args] and
+             [address_args_defined_at] also have redundant information. *)
+          address_args_defined_at : Reg_defined_at_instruction.t array;
+          address_args : Reg.t array;
+          non_address_args : Reg.t array
+        }
+
+      let desc t = Memory_access.desc t.memory_access
+
+      let non_address_args t = t.non_address_args
+
+      let address_args t = t.address_args
+
+      let first_memory_arg_index t =
+        Memory_access.first_memory_arg_index t.memory_access
+
+      let get_instruction_id t = Instruction.id t.instruction
+
+      let memory_access (instruction : Instruction.t) : Memory_access.t option =
+        let width_in_bits c = Vectorize_utils.Width_in_bits.of_memory_chunk c in
+        let create ?first_memory_arg_index desc =
+          Some (Memory_access.create ?first_memory_arg_index desc)
+        in
+        match Instruction.op instruction with
+        | None ->
+          (* conservative *)
+          create Arbitrary
+        | Some op -> (
+          match op with
+          | Load { memory_chunk; addressing_mode; mutability; is_atomic } ->
+            let desc =
+              Memory_access.Read
+                { width_in_bits = width_in_bits memory_chunk;
+                  addressing_mode;
+                  is_mutable =
+                    (match mutability with
+                    | Mutable -> true
+                    | Immutable -> false);
+                  is_atomic
                 }
-              else { loads; stores; fresh_allocs; stored_allocs; unsure_allocs }
-            | Some memory_operation -> (
-              let get_dependent_allocs_of_arg arg_i =
-                Dependency_graph.get_all_dependencies_of_arg dependency_graph id
-                  ~arg_i
-                |> Instruction.Id.Set.filter
-                     (Instruction.is_alloc
-                     << Instruction.Id.Tbl.find id_to_instructions)
-              in
-              let rec get_dependent_allocs arg_i =
-                if arg_i < 0
-                then Instruction.Id.Set.empty
-                else
-                  Instruction.Id.Set.union
-                    (get_dependent_allocs_of_arg
-                       (arg_i + Memory_operation.index_offset memory_operation))
-                    (get_dependent_allocs (arg_i - 1))
-              in
-              let dependent_allocs =
-                get_dependent_allocs
-                  (Array.length
-                     (Memory_operation.memory_arguments memory_operation)
-                  - 1)
-              in
-              Instruction.Id.Tbl.add memory_operations id
-                { memory_operation with dependent_allocs; unsure_allocs };
-              match memory_operation.op with
-              | Load ->
-                { loads = id :: loads;
-                  stores;
-                  fresh_allocs;
-                  stored_allocs = Instruction.Id.Set.empty;
-                  unsure_allocs =
-                    Instruction.Id.Set.union stored_allocs unsure_allocs
+            in
+            create ~first_memory_arg_index:0 desc
+          | Store (memory_chunk, addressing_mode, is_assign) ->
+            let desc =
+              Memory_access.Write
+                { width_in_bits = width_in_bits memory_chunk;
+                  addressing_mode;
+                  init_or_assign =
+                    (if is_assign
+                    then Memory_access.Init_or_assign.Assignment
+                    else Memory_access.Init_or_assign.Initialization)
                 }
-              | Store ->
-                let new_stored_allocs =
-                  Instruction.Id.Set.diff
-                    (get_dependent_allocs_of_arg 0)
-                    unsure_allocs
-                in
-                { loads;
-                  stores = id :: stores;
-                  fresh_allocs =
-                    Instruction.Id.Set.diff fresh_allocs new_stored_allocs;
-                  stored_allocs =
-                    Instruction.Id.Set.union stored_allocs new_stored_allocs;
-                  unsure_allocs
-                }))
-        ~init:
-          { loads = [];
-            stores = [];
-            fresh_allocs = Instruction.Id.Set.empty;
-            stored_allocs = Instruction.Id.Set.empty;
-            unsure_allocs = Instruction.Id.Set.empty
-          }
-    in
-    { loads = List.rev loads; stores = List.rev stores; memory_operations }
+            in
+            create desc ~first_memory_arg_index:1
+          | Intop_atomic { op; size; addr } ->
+            let desc =
+              Memory_access.Read_and_write
+                { width_in_bits =
+                    Vectorize_utils.Width_in_bits.of_atomic_bitwidth size;
+                  addressing_mode = addr;
+                  is_atomic = true
+                }
+            in
+            let first_memory_arg_index =
+              match op with Compare_and_swap -> 2 | Fetch_and_add -> 1
+            in
+            create ~first_memory_arg_index desc
+          | Specific s -> Vectorize_specific.memory_access s
+          | Begin_region | End_region ->
+            (* conservative, don't reorder around region begin/end. *)
+            create Arbitrary
+          | Name_for_debugger _ | Dls_get | Poll | Opaque | Probe_is_enabled _
+            ->
+            (* conservative, don't reorder around this instruction. *)
+            (* CR-someday gyorsh: Poll insertion pass is after the vectorizer.
+               Currently, it inserts instruction at the end of a block, so it
+               would not have affected vectorizer decisions. Consider soundness
+               of it carefully if the vectorizer is extended beyond basic blocks
+               (also in arm64 backend). Note that some poll instructions can
+               also be inserted before the vectorizer by the user or during
+               selection. *)
+            (* CR-soon gyorsh: Update [Name_for_debugger.regs] if relevant
+               instructions are vectorized. Currently, the debug info would be
+               out of sync. *)
+            create Arbitrary
+          | Spill | Reload ->
+            Misc.fatal_error
+              "Unexpected instruction Spill or Reload during vectorize"
+          | Move | Reinterpret_cast _ | Static_cast _ | Const_int _
+          | Const_float32 _ | Const_float _ | Const_symbol _ | Const_vec128 _
+          | Stackoffset _ | Intop _ | Intop_imm _ | Floatop _ | Csel _ | Alloc _
+            ->
+            None)
 
-  let can_cross (t : t) (instruction_1 : Instruction.t)
-      (instruction_2 : Instruction.t) =
-    let get_memory_operation instruction =
-      Instruction.Id.Tbl.find_opt t.memory_operations
-        (Instruction.id instruction)
-    in
-    match
-      get_memory_operation instruction_1, get_memory_operation instruction_2
-    with
-    | None, _ | _, None ->
-      (* Make sure that they are not both "dangerous", ie. thinigs like allocs
-         or specific stores *)
-      Instruction.can_cross_loads_or_stores instruction_1
-      || Instruction.can_cross_loads_or_stores instruction_2
-    | Some memory_operation_1, Some memory_operation_2 -> (
-      match memory_operation_1.op, memory_operation_2.op with
-      | Load, Load -> true
-      | Load, Store | Store, Load | Store, Store ->
-        if Memory_operation.compare_addressing_modes_and_arguments
-             memory_operation_1 memory_operation_2
-           = 0
-        then
-          let check_direct_separation left_memory_operation
-              right_memory_operation =
-            match
-              Memory_operation.offset_in_bytes left_memory_operation
-                right_memory_operation
-            with
-            | None -> false
-            | Some offset ->
-              offset
-              >= (left_memory_operation.Memory_operation.memory_chunk
-                |> Cmm.width_in_bytes)
+      let create (instruction : Instruction.t) reaching_definitions : t option =
+        match memory_access instruction with
+        | None -> None
+        | Some memory_access ->
+          let arguments = Instruction.arguments instruction in
+          let address_args, non_address_args =
+            match Memory_access.first_memory_arg_index memory_access with
+            | 0 ->
+              (* avoid copy *)
+              arguments, [||]
+            | i ->
+              ( Array.sub arguments i (Array.length arguments - i),
+                Array.sub arguments 0 i )
           in
-          check_direct_separation memory_operation_1 memory_operation_2
-          || check_direct_separation memory_operation_2 memory_operation_1
-        else
-          (* Assumption: If memory operation 1 definitely depends on an
-             allocation and memory operation 2 definitely does not depend on it,
-             then they are disjoint *)
-          Instruction.Id.Set.is_empty
-            (Instruction.Id.Set.diff memory_operation_1.dependent_allocs
-               (Instruction.Id.Set.union memory_operation_2.dependent_allocs
-                  memory_operation_2.unsure_allocs))
-          || Instruction.Id.Set.is_empty
-               (Instruction.Id.Set.diff memory_operation_2.dependent_allocs
-                  (Instruction.Id.Set.union memory_operation_1.dependent_allocs
-                     memory_operation_1.unsure_allocs)))
-
-  let dump ppf ({ loads; stores; memory_operations } : t) =
-    let open Format in
-    let print_list list =
-      List.iter
-        (fun id ->
-          let address = Instruction.Id.Tbl.find memory_operations id in
-          Memory_operation.dump ppf address)
-        list
-    in
-    fprintf ppf "\nmemory accesses (loads):\n";
-    print_list loads;
-    fprintf ppf "\nmemory accesses (stores):\n";
-    print_list stores;
-    fprintf ppf "\n"
-end
-
-module Seed : sig
-  type t
-
-  val from_block : Cfg.basic_block -> t list
+          let address_args_defined_at =
+            Array.map
+              (fun reg ->
+                Reg_defined_at_instruction.create reg instruction
+                  reaching_definitions)
+              address_args
+          in
+          Some
+            { memory_access;
+              instruction;
+              address_args;
+              address_args_defined_at;
+              non_address_args
+            }
 
-  val dump : Format.formatter -> t list -> unit
-end = struct
-  type t = Memory_accesses.Memory_operation.t list
-
-  let can_cross memory_accesses instruction_1 instruction_2 =
-    let reg_array_to_set = Reg.Set.of_list << Array.to_list in
-    let argument_set = reg_array_to_set << Instruction.arguments
-    and affected_set instruction =
-      Reg.Set.union
-        (Instruction.results instruction |> reg_array_to_set)
-        (Instruction.destroyed instruction |> reg_array_to_set)
-    in
-    let arguments_1 = argument_set instruction_1
-    and affected_1 = affected_set instruction_1
-    and arguments_2 = argument_set instruction_2
-    and affected_2 = affected_set instruction_2 in
-    if Reg.Set.disjoint affected_1 affected_2
-       && Reg.Set.disjoint arguments_1 affected_2
-       && Reg.Set.disjoint affected_1 arguments_2
-    then Memory_accesses.can_cross memory_accesses instruction_1 instruction_2
-    else false
-
-  let from_block (block : Cfg.basic_block) : t list =
-    (* For each store instruction, it tries to form a seed with the closest
-       stores after it, it will go down the DLL of instructions and tries to
-       move the store instructions across the non-store instructions until all
-       the store instructions are together *)
-    let memory_accesses = Memory_accesses.from_block block in
-    let stores = Memory_accesses.stores memory_accesses in
-    List.filter_map
-      (fun store_id ->
-        let starting_cell =
+      let to_bits bytes = bytes * 8
+
+      let get_addressing_mode t =
+        match Memory_access.desc t.memory_access with
+        | Read { addressing_mode; _ }
+        | Write { addressing_mode; _ }
+        | Read_and_write { addressing_mode; _ } ->
+          Some addressing_mode
+        | Arbitrary | Alloc -> None
+
+      let get_width_in_bits t =
+        match Memory_access.desc t.memory_access with
+        | Read { width_in_bits; _ }
+        | Write { width_in_bits; _ }
+        | Read_and_write { width_in_bits; _ } ->
+          Some width_in_bits
+        | Arbitrary | Alloc -> None
+
+      let get_width_in_bits_exn t =
+        t |> get_width_in_bits |> Option.get
+        |> Vectorize_utils.Width_in_bits.to_int
+
+      let offset_in_bytes ~arg_offset_in_bytes (t1 : t) (t2 : t) =
+        match get_addressing_mode t1, get_addressing_mode t2 with
+        | Some addressing_mode_1, Some addressing_mode_2 ->
+          Arch.addressing_offset_in_bytes addressing_mode_1 addressing_mode_2
+            t1.address_args_defined_at t2.address_args_defined_at
+            ~arg_offset_in_bytes
+        | (Some _ | None), _ -> None
+
+      let actual_arg_offset_in_bytes block reaching_definitions r1 r2 =
+        (* Heuristic for handling array accesses where the scale register is not
+           the same, but a simple equation. *)
+        Reg_defined_at_instruction.get_offset r1 r2 block reaching_definitions
+
+      (* CR-someday gyorsh: this can be extended in the future to handle
+         "scatter-gather" accesses. *)
+      let is_adjacent (t1 : t) (t2 : t) block reaching_definitions =
+        let arg_offset_in_bytes =
+          actual_arg_offset_in_bytes block reaching_definitions
+        in
+        assert (Instruction.have_isomorphic_op t1.instruction t2.instruction);
+        match offset_in_bytes t1 t2 ~arg_offset_in_bytes with
+        | None -> false
+        | Some offset_in_bytes ->
+          to_bits offset_in_bytes = get_width_in_bits_exn t1
+
+      (** [is_before t1 t2] returns true if we can prove that t1 and t2 are disjoint
+          intervals within the same block, and t1 is before t2. *)
+      let is_before ~arg_offset_in_bytes t1 t2 block =
+        let res =
+          match offset_in_bytes ~arg_offset_in_bytes t1 t2 with
+          | None -> false
+          | Some offset_in_bytes ->
+            State.dump_debug (Block.state block)
+              "offset_in_bytes = %d, get_width_in_bits_exn t1 = %d \n"
+              (to_bits offset_in_bytes) (get_width_in_bits_exn t1);
+            to_bits offset_in_bytes >= get_width_in_bits_exn t1
+        in
+        State.dump_debug (Block.state block) "is_before (%a) (%a) = %b \n"
+          Instruction.print t1.instruction Instruction.print t2.instruction res;
+        res
+
+      (* [is_adjacent] implies [is_disjoint
+         ~arg_offset_in_bytes:actual_arg_offset_in_bytes] *)
+      let is_disjoint ~arg_offset_in_bytes t1 t2 block =
+        let res =
+          is_before ~arg_offset_in_bytes t1 t2 block
+          || is_before ~arg_offset_in_bytes t2 t1 block
+        in
+        State.dump_debug (Block.state block) "is_disjoint (%a) (%a) = %b \n"
+          Instruction.print t1.instruction Instruction.print t2.instruction res;
+        res
+
+      (* CR gyorsh: The middle end has more accurate information about
+         expressions that evaluate to closures. We can propagate it to the
+         backend in the future and have more precise Reg.typ for example,
+         instead of the current heuristic. *)
+      let maybe_closure_block t =
+        (* Closure blocks are not mutated after initialization. *)
+        match Memory_access.desc t.memory_access with
+        | Read { is_mutable = true; _ } ->
+          (* CR gyorsh: is it sound to return [false] here? Are reads from
+             closure emitted as immutable? Can immutability of closure blocks be
+             lost through optimizations such as CSE in the middle end? *)
+          false
+        | Read { is_mutable = false; _ } -> true
+        | Read_and_write _ -> false
+        | Write { init_or_assign = Assignment; _ } -> false
+        | Write { init_or_assign = Initialization; _ } -> true
+        | Arbitrary -> true
+        | Alloc -> true
+
+      let points_to_ocaml_block t block =
+        let val_args =
+          t.address_args_defined_at |> Array.to_seq
+          |> Seq.filter Reg_defined_at_instruction.is_val
+          |> List.of_seq
+        in
+        State.dump_debug (Block.state block)
+          "points_to_ocaml_block: %a val_args = %a\n" Instruction.print
+          t.instruction
+          (Format.pp_print_list Reg_defined_at_instruction.print)
+          val_args;
+        match val_args with [base] -> Some base | [] | _ -> None
+
+      let points_to_ocaml_block_not_closure_block t block =
+        if maybe_closure_block t then None else points_to_ocaml_block t block
+
+      let is_disjoint t1 t2 block reaching_definitions =
+        let equiv () =
+          (* heuristic to detect that either (a) r1 and r2 point to the same
+             ocaml block, or (b) r1 and r2 point to disjoint ocaml blocks, but
+             not to partially overlapping. *)
           match
-            DLL.find_cell_opt block.body ~f:(fun instruction ->
-                Basic instruction |> Instruction.id
-                |> Instruction.Id.equal store_id)
+            ( points_to_ocaml_block_not_closure_block t1 block,
+              points_to_ocaml_block_not_closure_block t2 block )
           with
-          | Some current_cell -> DLL.next current_cell
-          | None -> assert false
+          | Some base1, Some base2 ->
+            State.dump_debug (Block.state block)
+              "Found equiv: base 1 = %a, base2 = %a\n"
+              Reg_defined_at_instruction.print base1
+              Reg_defined_at_instruction.print base2;
+            Some (base1, base2)
+          | (Some _ | None), _ ->
+            State.dump_debug (Block.state block)
+              "Not found equiv for (%a) and (%a)\n" Instruction.print_id
+              t1.instruction Instruction.print_id t2.instruction;
+            None
         in
-        let starting_memory_operation =
-          Memory_accesses.get_memory_operation_exn memory_accesses store_id
+        let actual_arg_offset_in_bytes r1 r2 =
+          actual_arg_offset_in_bytes block reaching_definitions r1 r2
         in
-        let items_in_vector =
-          vector_width_in_bytes
-          / Memory_accesses.Memory_operation.width starting_memory_operation
+        let arg_offset_in_bytes r1 r2 =
+          State.dump_debug (Block.state block) "arg_offset_in_bytes (%a) (%a)\n"
+            Reg_defined_at_instruction.print r1 Reg_defined_at_instruction.print
+            r2;
+          match actual_arg_offset_in_bytes r1 r2 with
+          | Some _ as res -> res
+          | None -> (
+            match equiv () with
+            | None -> None
+            | Some (base1, base2) ->
+              if Reg_defined_at_instruction.equal base1 r1
+                 && Reg_defined_at_instruction.equal base2 r2
+                 || Reg_defined_at_instruction.equal base1 r2
+                    && Reg_defined_at_instruction.equal base2 r1
+              then (
+                (* pretend that the registers r1 and r2 are the same. *)
+                State.dump_debug (Block.state block)
+                  "arg_offset_in_bytes (%a) (%a) = Some 0\n"
+                  Reg_defined_at_instruction.print r1
+                  Reg_defined_at_instruction.print r2;
+                Some 0)
+              else None)
         in
-        let can_cross_chunk seed instruction =
-          List.fold_left
-            (fun can memory_operation ->
-              can
-              && can_cross memory_accesses
-                   (Memory_accesses.Memory_operation.instruction
-                      memory_operation)
-                   instruction)
-            true seed
-        in
-        let rec find_seed n seed cell_option =
-          if n = 0
-          then Some seed
+        is_disjoint ~arg_offset_in_bytes t1 t2 block
+
+      let dump_desc ppf (t : t) =
+        let open Format in
+        let print_bool msg ppf b =
+          if b then fprintf ppf " (%s=true)" msg else ()
+        in
+        let print_addr ppf addr =
+          Arch.print_addressing Printreg.reg addr ppf t.address_args
+        in
+        let pr (init_or_assign : Memory_access.Init_or_assign.t) =
+          match init_or_assign with
+          | Initialization -> "init"
+          | Assignment -> "assign"
+        in
+        match desc t with
+        | Alloc -> fprintf ppf "Alloc"
+        | Arbitrary -> fprintf ppf "Arbitrary"
+        | Read { width_in_bits; addressing_mode; is_mutable; is_atomic } ->
+          fprintf ppf "Read%a [%a]%a%a" Vectorize_utils.Width_in_bits.print
+            width_in_bits print_addr addressing_mode (print_bool "is_mutable")
+            is_mutable (print_bool "is_atomic") is_atomic
+        | Write { width_in_bits; addressing_mode; init_or_assign } ->
+          fprintf ppf "Write%a [%a] %s" Vectorize_utils.Width_in_bits.print
+            width_in_bits print_addr addressing_mode (pr init_or_assign)
+        | Read_and_write { width_in_bits; addressing_mode; is_atomic } ->
+          fprintf ppf "Read_and_write%a [%a]%a"
+            Vectorize_utils.Width_in_bits.print width_in_bits print_addr
+            addressing_mode (print_bool "is_atomic") is_atomic
+
+      let dump ppf t =
+        let dump_index =
+          let ind = first_memory_arg_index t in
+          if Int.equal ind 0
+          then ""
+          else ", first_memory_arg_index=" ^ string_of_int ind
+        in
+        Format.fprintf ppf "%a%s\n" dump_desc t dump_index
+    end
+
+    module Partitions : sig
+      (** Graph where nodes represent disjoint memory areas (partitions) and an edge from
+          partition A to B means that A may point to B (i.e., a memory location in A may
+          contain the address of a memory location in B). *)
+      type t
+
+      val init : t
+
+      val add_node : t -> Partition.t -> t
+
+      val add_edges : t -> src:Partition.Set.t -> dst:Partition.Set.t -> t
+
+      val get_successors : t -> Partition.Set.t -> Partition.Set.t
+
+      val all : t -> Partition.Set.t
+
+      (* val fold : f:(Partition.t -> 'a -> 'a) -> init:'a -> t -> 'a *)
+    end = struct
+      type t = Partition.Set.t Partition.Map.t
+
+      let init =
+        (* [unknown] partition may point to itself *)
+        Partition.Map.singleton Partition.unknown Partition.Set.unknown
+
+      let add_node t p = Partition.Map.add p Partition.Set.empty t
+
+      let add_edges t ~src ~dst =
+        Partition.Set.fold
+          (fun src_p acc ->
+            let old = Partition.Map.find src_p acc in
+            Partition.Map.add src_p (Partition.Set.union dst old) acc)
+          src t
+
+      let get_successors t set =
+        let init = Partition.Set.empty in
+        Partition.Set.fold
+          (fun src acc ->
+            let dst = Partition.Map.find src t in
+            Partition.Set.union dst acc)
+          set init
+
+      let all t =
+        t |> Partition.Map.to_seq |> Seq.map fst |> Partition.Set.of_seq
+
+      (* let fold ~f ~init t = Partition.Map.fold (fun p _ acc -> f p acc) t
+         init *)
+    end
+
+    module Accesses : sig
+      type t
+
+      val empty : t
+
+      val add : t -> Partition.t -> Operation.t -> t
+
+      val add_all : t -> Partition.Set.t -> Operation.t -> t
+
+      val fold :
+        f:(Partition.t -> Operation.t list -> 'a -> 'a) -> init:'a -> t -> 'a
+    end = struct
+      (** The order of operations in the list is the reverse of the order they appear in the
+          block (relative to each other). *)
+      type t = Operation.t list Partition.Map.t
+
+      let empty = Partition.Map.empty
+
+      let get t p =
+        match Partition.Map.find_opt p t with None -> [] | Some s -> s
+
+      let add t p op = Partition.Map.add p (op :: get t p) t
+
+      let add_all t set op =
+        Partition.Set.fold (fun p acc -> add acc p op) set t
+
+      let fold ~f ~init t = Partition.Map.fold f t init
+    end
+
+    module Aliases : sig
+      type t
+
+      val empty : t
+
+      val replace_regs : Reg.t array -> Partition.Set.t -> t -> t
+
+      val remove_regs : Reg.t array -> t -> t
+
+      val get_regs : Reg.t array -> t -> Partition.Set.t
+    end = struct
+      (** for each p, a set of partitions that p may point to, including p itself
+          explicitly. *)
+      type t = Partition.Set.t Reg.Map.t
+
+      let empty = Reg.Map.empty
+
+      let find t reg =
+        match Reg.Map.find_opt reg t with
+        | None ->
+          (* reg is implicitly associated with [Partition.unknown] *)
+          Partition.Set.unknown
+        | Some old_partitions -> old_partitions
+
+      let get_regs regs t =
+        Array.fold_left
+          (fun set reg ->
+            let p = find t reg in
+            Partition.Set.union set p)
+          Partition.Set.empty regs
+
+      let replace reg partitions t = Reg.Map.add reg partitions t
+
+      let replace_regs regs partitions t =
+        Array.fold_left (fun acc reg -> replace reg partitions acc) t regs
+
+      let remove_regs regs t = replace_regs regs Partition.Set.empty t
+    end
+
+    module Points_to : sig
+      type t
+
+      val from_block : Block.t -> Reaching_definitions.t -> t
+
+      (* val transform : t -> Instruction.t -> t *)
+      (* val partitions : t -> Partitions.t *)
+      val operations : t -> Operation.t Instruction.Id.Map.t
+
+      val accesses : t -> Accesses.t
+    end = struct
+      (* CR gyorsh: not sure if the extra layer of abstraction (Aliases and
+         Partitions modules) is useful. *)
+      type t =
+        { partitions : Partitions.t;
+              (** For each partition, the set of partitions it may point. *)
+          aliases : Aliases.t;
+              (** For each register, the set of partitions it may point to. *)
+          accesses : Accesses.t;
+              (** For each partition, the list of instructions that may access it. *)
+          operations : Operation.t Instruction.Id.Map.t
+              (** Mapping from instruction id to the corresponding memory operations. Instructions
+              that do no access memory are not in the map. *)
+        }
+
+      let init =
+        { partitions = Partitions.init;
+          aliases = Aliases.empty;
+          accesses = Accesses.empty;
+          operations = Instruction.Id.Map.empty
+        }
+
+      (* let partitions t = t.partitions *)
+
+      let accesses t = t.accesses
+
+      let operations t = t.operations
+
+      let update_aliases t instruction result_may_point_to_partitions =
+        let aliases =
+          (* Kill: set the points-to of clobbered registers to empty because
+             using these registers is illegal. Gen: set the points-to of
+             results. *)
+          t.aliases
+          |> Aliases.remove_regs (Instruction.destroyed instruction)
+          |> Aliases.replace_regs
+               (Instruction.results instruction)
+               result_may_point_to_partitions
+        in
+        { t with aliases }
+
+      let update t ~may_access_partitions ~may_point_to_partitions ~is_atomic
+          instruction op =
+        let may_access_partitions =
+          if is_atomic
+          then
+            (* Should not be reordered with respect to any other
+               instructions. *)
+            Partitions.all t.partitions
+          else may_access_partitions
+        in
+        let accesses = Accesses.add_all t.accesses may_access_partitions op in
+        let result_may_point_to_partitions =
+          Partition.Set.union may_point_to_partitions
+            (Partitions.get_successors t.partitions may_access_partitions)
+        in
+        let t = { t with accesses } in
+        update_aliases t instruction result_may_point_to_partitions
+
+      let transform_write t ~is_atomic ~may_access_any_partition instruction op
+          =
+        let addr_args = Operation.address_args op in
+        let may_access_partitions =
+          if may_access_any_partition
+          then Partitions.all t.partitions
+          else Aliases.get_regs addr_args t.aliases
+        in
+        let value_to_store = Operation.non_address_args op in
+        let may_point_to_partitions =
+          Aliases.get_regs value_to_store t.aliases
+        in
+        let partitions =
+          Partitions.add_edges t.partitions ~src:may_access_partitions
+            ~dst:may_point_to_partitions
+        in
+        let t = { t with partitions } in
+        update t ~may_access_partitions ~may_point_to_partitions ~is_atomic
+          instruction op
+
+      let transform t instruction ~reaching_definitions =
+        match Operation.create instruction reaching_definitions with
+        | None ->
+          (* propagates from args to res *)
+          let args_may_point_to_partitions =
+            Aliases.get_regs (Instruction.arguments instruction) t.aliases
+          in
+          update_aliases t instruction args_may_point_to_partitions
+        | Some op -> (
+          let id = Instruction.id instruction in
+          let t =
+            { t with operations = Instruction.Id.Map.add id op t.operations }
+          in
+          match Operation.desc op with
+          | Alloc ->
+            let fresh_partition = Partition.create ~allocation_site:id in
+            let t =
+              { t with
+                partitions = Partitions.add_node t.partitions fresh_partition;
+                accesses = Accesses.add t.accesses fresh_partition op
+              }
+            in
+            update_aliases t instruction
+              (Partition.Set.singleton fresh_partition)
+          | Read { is_atomic; _ } ->
+            let non_address_args = Operation.non_address_args op in
+            let may_point_to_partitions =
+              Aliases.get_regs non_address_args t.aliases
+            in
+            let addr_args = Operation.address_args op in
+            let may_access_partitions = Aliases.get_regs addr_args t.aliases in
+            update t ~may_access_partitions ~may_point_to_partitions ~is_atomic
+              instruction op
+          | Write _ ->
+            assert (Array.length (Instruction.results instruction) = 0);
+            transform_write t ~is_atomic:false ~may_access_any_partition:false
+              instruction op
+          | Read_and_write { is_atomic; _ } ->
+            transform_write t ~is_atomic ~may_access_any_partition:false
+              instruction op
+          | Arbitrary ->
+            transform_write t ~is_atomic:true ~may_access_any_partition:true
+              instruction op)
+
+      let from_block block reaching_definitions =
+        DLL.fold_left (Block.body block) ~init ~f:(fun acc i ->
+            transform acc (Instruction.basic i) ~reaching_definitions)
+    end
+
+    (* CR-someday gyorsh: Merge with the dependency graph for regs. Both can be
+       computed alongside reaching definitions and points_to. *)
+    module Dependencies : sig
+      type t
+
+      val from_block : Accesses.t -> Block.t -> Reaching_definitions.t -> t
+
+      val get : t -> Instruction.Id.t -> Instruction.Id.Set.t
+
+      val for_all :
+        t -> f:(Instruction.Id.t -> Instruction.Id.t -> bool) -> bool
+
+      val dump : Format.formatter -> block:Block.t -> t -> unit
+    end = struct
+      (** [i] is mapped to [j] if [i] may directly depend on [j] via memory, i.e., [i] may
+          read and [j] may write the same memory location. *)
+      type t = Instruction.Id.Set.t Instruction.Id.Map.t
+
+      let get t id =
+        let default = Instruction.Id.Set.empty in
+        Instruction.Id.Map.find_opt id t |> Option.value ~default
+
+      let for_all t ~f =
+        Instruction.Id.Map.for_all
+          (fun id dsts -> Instruction.Id.Set.for_all (f id) dsts)
+          t
+
+      type dependency_kind =
+        | No_direct_dependency
+        | Data_dependency
+        | Order_constraint
+
+      let dep_kind_to_string d =
+        match d with
+        | No_direct_dependency -> "No direct dependency"
+        | Data_dependency -> "Data dependency"
+        | Order_constraint -> "Order constraint"
+
+      let get_dependency_kind ~src ~dst block reaching_definitions =
+        (* [get_dependency_kind ~src ~dst] conservatively answers the question:
+           does [src] directly depend on [dst]? *)
+        let if_not_disjoint m1 m2 dep_kind =
+          if Operation.is_disjoint m1 m2 block reaching_definitions
+          then No_direct_dependency
+          else (
+            State.dump_debug (Block.state block) "%s: %a->%a\n"
+              (dep_kind_to_string dep_kind)
+              Instruction.Id.print
+              (Operation.get_instruction_id m1)
+              Instruction.Id.print
+              (Operation.get_instruction_id m2);
+            dep_kind)
+        in
+        (* CR-someday gyorsh: this big match can be simplified/split by checking
+           the following simple conditions: (a) if one of the operations is
+           atomic or one of the operations does not have an addressing_mode,
+           then no disjointness check needed. (b) if [src] may read and [dst]
+           may write, then this is a [Data_dependencies], otherwise it is an
+           [Order_constraint]. *)
+        match Operation.desc src, Operation.desc dst with
+        | Alloc, _ ->
+          (* Currently, Alloc always starts a new partition, so it is the first
+             operation in the list of accesses of its partition. *)
+          Misc.fatal_error "Unexpected Alloc"
+        | (Read _ | Write _ | Read_and_write _ | Arbitrary), Alloc ->
+          Order_constraint
+        | Read { is_atomic = true; _ }, (Write _ | Read_and_write _ | Arbitrary)
+          ->
+          (* Treat atomic instructions as memory barriers, don't reorder
+             anything around an atomic instruction, and conservatively assume
+             that an atomic read may access anywhere in memory. *)
+          Data_dependency
+        | Read { is_atomic = true; _ }, Read _
+        | Read { is_atomic = false; _ }, Read { is_atomic = true; _ }
+        | (Write _ | Read_and_write _ | Arbitrary), Read { is_atomic = true; _ }
+          ->
+          Order_constraint
+        | Read { is_atomic = false; _ }, Read { is_atomic = false; _ } ->
+          No_direct_dependency
+        | Write _, Write _ | Write _, Read { is_atomic = false; _ } ->
+          if_not_disjoint src dst Order_constraint
+        | Read { is_atomic = false; _ }, Write _ ->
+          if_not_disjoint src dst Data_dependency
+        | Arbitrary, Read { is_atomic = false; _ } -> Order_constraint
+        | ( Arbitrary,
+            (Write _ | Read_and_write { is_atomic = false; _ } | Arbitrary) )
+        | (Read _ | Read_and_write _), Arbitrary ->
+          Data_dependency
+        | Arbitrary, Read_and_write { is_atomic = true; _ } -> Data_dependency
+        | Write _, Arbitrary -> Order_constraint
+        | ( Read_and_write { is_atomic = false; _ },
+            Read_and_write { is_atomic = false; _ } )
+        | Read_and_write { is_atomic = false; _ }, Write _
+        | Read { is_atomic = false; _ }, Read_and_write { is_atomic = false; _ }
+          ->
+          if_not_disjoint src dst Data_dependency
+        | Read_and_write { is_atomic = false; _ }, Read { is_atomic = false; _ }
+          ->
+          if_not_disjoint src dst Order_constraint
+        | ( ( Read { is_atomic = false; _ }
+            | Read_and_write { is_atomic = false; _ } ),
+            Read_and_write { is_atomic = true; _ } ) ->
+          Data_dependency
+        | Write _, Read_and_write { is_atomic = true; _ } -> Order_constraint
+        | Read_and_write { is_atomic = true; _ }, Read _ -> Order_constraint
+        | Read_and_write { is_atomic = true; _ }, Write _ -> Data_dependency
+        | Read_and_write { is_atomic = true; _ }, Read_and_write _ ->
+          Data_dependency
+        | Write _, Read_and_write { is_atomic = false; _ } ->
+          if_not_disjoint src dst Order_constraint
+
+      let from_block accesses block reaching_definitions =
+        (* [create] is quadratic in the number of memory operations in the
+           block. *)
+        let add t ~src ~dst =
+          let src_id = Operation.get_instruction_id src in
+          let dst_id = Operation.get_instruction_id dst in
+          let f set =
+            match set with
+            | None -> Some (Instruction.Id.Set.singleton dst_id)
+            | Some set -> Some (Instruction.Id.Set.add dst_id set)
+          in
+          Instruction.Id.Map.update src_id f t
+        in
+        let add_dep t ~src ~dst =
+          match get_dependency_kind ~src ~dst block reaching_definitions with
+          | No_direct_dependency -> t
+          | Data_dependency | Order_constraint ->
+            (* CR-someday gyorsh: we don't currently use the distinction between
+               data dependencies and order constraints, but it can be useful for
+               future optimizations. *)
+            add t ~src ~dst
+        in
+        let rec add_deps t ~src operations =
+          match operations with
+          | [] -> t
+          | hd :: tl ->
+            let t = add_dep t ~src ~dst:hd in
+            add_deps t ~src tl
+        in
+        let rec add_operations t operations =
+          match operations with
+          | [] -> t
+          | hd :: tl ->
+            let t = add_deps t ~src:hd tl in
+            add_operations t tl
+        in
+        Accesses.fold accesses ~init:Instruction.Id.Map.empty
+          ~f:(fun _p operations acc -> add_operations acc operations)
+
+      let dump ppf ~block t =
+        (* print dependencies in the order they appear in the block. *)
+        DLL.iter (Block.body block) ~f:(fun instruction ->
+            let instruction = Instruction.basic instruction in
+            let src = Instruction.id instruction in
+            let dsts = get t src in
+            Instruction.Id.Set.iter
+              (fun dst ->
+                Format.fprintf ppf "memory dep: %a->%a\n" Instruction.Id.print
+                  src Instruction.Id.print dst)
+              dsts)
+    end
+
+    type t = Operation.t Instruction.Id.Map.t
+
+    let from_block block reaching_definitions =
+      let pts = Points_to.from_block block reaching_definitions in
+      let operations = Points_to.operations pts in
+      let deps =
+        Dependencies.from_block (Points_to.accesses pts) block
+          reaching_definitions
+      in
+      operations, deps
+
+    let get_memory_operation t instruction =
+      Instruction.Id.Map.find_opt (Instruction.id instruction) t
+
+    let get_memory_operation_exn t instruction =
+      Instruction.Id.Map.find (Instruction.id instruction) t
+
+    let is_adjacent (t : t) i1 i2 block reaching_definitions : bool =
+      Operation.is_adjacent
+        (get_memory_operation_exn t i1)
+        (get_memory_operation_exn t i2)
+        block reaching_definitions
+
+    let dump ppf ~block t =
+      (* print dependencies in the order they appear in the block. *)
+      DLL.iter (Block.body block) ~f:(fun instruction ->
+          let instruction = Instruction.basic instruction in
+          let id = Instruction.id instruction in
+          match Instruction.Id.Map.find_opt id t with
+          | None -> ()
+          | Some op ->
+            Format.fprintf ppf "memory op for %a: %a\n" Instruction.Id.print id
+              Operation.dump op)
+  end
+
+  module Dependency_graph : sig
+    type t
+
+    val from_block :
+      Block.t -> Reaching_definitions.t -> Memory.Dependencies.t -> t
+
+    val independent : t -> Instruction.Id.t -> Instruction.Id.t -> bool
+
+    val get_direct_dependency_of_arg :
+      t -> Instruction.Id.t -> arg_i:int -> Instruction.Id.t option
+
+    val get_direct_dependencies : t -> Instruction.Id.t -> Instruction.Id.Set.t
+
+    val dump : Format.formatter -> block:Block.t -> t -> unit
+  end = struct
+    module Node = struct
+      type t =
+        { direct_dependencies_of_args : Instruction.Id.t option array;
+              (** instruction that defines the argument *)
+          direct_dependencies : Instruction.Id.Set.t;
+              (** direct dependencies of all register arguments of this instruction (does
+                  not include memory dependencies). *)
+          all_dependencies : Instruction.Id.Set.t
+              (** transitive reflexive dependencies of this instruction, covers register and
+              memory dependencies (but not order constraints). *)
+        }
+
+      let init instruction reaching_definitions : t =
+        let arguments = Instruction.arguments instruction in
+        let id = Instruction.id instruction in
+        let direct_dependencies_of_args =
+          Array.map (Reaching_definitions.get reaching_definitions id) arguments
+        in
+        let direct_dependencies =
+          Array.fold_left
+            (fun acc dep ->
+              match dep with
+              | None -> acc
+              | Some dep -> Instruction.Id.Set.add dep acc)
+            Instruction.Id.Set.empty direct_dependencies_of_args
+        in
+        { direct_dependencies_of_args;
+          direct_dependencies;
+          (* initialized later: *)
+          all_dependencies = Instruction.Id.Set.empty
+        }
+    end
+
+    type t = Node.t Instruction.Id.Tbl.t
+
+    let get_direct_dependency_of_arg dependency_graph id ~arg_i =
+      let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in
+      node.direct_dependencies_of_args.(arg_i)
+
+    let get_all_dependencies dependency_graph id =
+      let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in
+      node.all_dependencies
+
+    let get_direct_dependencies dependency_graph id =
+      let (node : Node.t) = Instruction.Id.Tbl.find dependency_graph id in
+      node.direct_dependencies
+
+    let independent t i1 i2 =
+      let all_deps_of_i1 = get_all_dependencies t i1 in
+      let all_deps_of_i2 = get_all_dependencies t i2 in
+      not
+        (Instruction.Id.Set.mem i1 all_deps_of_i2
+        || Instruction.Id.Set.mem i2 all_deps_of_i1)
+
+    (* CR-soon gyorsh: we don't need to keep [Points_to] and table of
+       [Reaching_definitions.t] per instruction after we computed direct
+       dependencies between instructions. For now it's easier to keep them for
+       debugging and probably not too expensive as there is a lot of sharing in
+       the representation of the values in these tables, because they are
+       constructed incrementally. *)
+    let from_block (block : Block.t) reaching_definitions mem_deps =
+      let body = Block.body block in
+      let terminator = Block.terminator block in
+      let t = Block.size block |> Instruction.Id.Tbl.create in
+      let add_dependencies (instruction : Instruction.t) =
+        (* Add direct dependencies *)
+        let node = Node.init instruction reaching_definitions in
+        (* Add transitive dependencies *)
+        let id = Instruction.id instruction in
+        let all_dependencies =
+          let init =
+            (* data dependencies via registers and memory, and order
+               constraints. *)
+            Instruction.Id.Set.union node.direct_dependencies
+              (Memory.Dependencies.get mem_deps id)
+          in
+          Instruction.Id.Set.fold
+            (fun new_id acc ->
+              let (new_node : Node.t) = Instruction.Id.Tbl.find t new_id in
+              Instruction.Id.Set.union new_node.all_dependencies acc)
+            init init
+        in
+        let node = { node with all_dependencies } in
+        Instruction.Id.Tbl.add t id node
+      in
+      DLL.iter body ~f:(fun i -> add_dependencies (Instruction.basic i));
+      (* CR gyorsh: not sure we need dependencies before terminator. *)
+      add_dependencies terminator;
+      t
+
+    let dump ppf ~(block : Block.t) (t : t) =
+      let open Format in
+      let print_node (instruction : Instruction.t) =
+        let args = Instruction.arguments instruction in
+        let print_reg arg_i dep =
+          let pp ppf o =
+            match o with
+            | None -> fprintf ppf "none"
+            | Some id -> fprintf ppf "instruction %a" Instruction.Id.print id
+          in
+          let reg = args.(arg_i) in
+          fprintf ppf "argument %d, %a depends on %a\n" arg_i Printreg.reg reg
+            pp dep
+        in
+        let id = Instruction.id instruction in
+        let node = Instruction.Id.Tbl.find t id in
+        fprintf ppf "\n%a\n" Instruction.print instruction;
+        fprintf ppf "\ndirect dependencies:\n";
+        Instruction.Id.Set.iter
+          (fprintf ppf "%a " Instruction.Id.print)
+          node.direct_dependencies;
+        fprintf ppf "\nall dependencies:\n";
+        Instruction.Id.Set.iter
+          (fprintf ppf "%a " Instruction.Id.print)
+          node.all_dependencies;
+        fprintf ppf "\nis direct dependency of:\n";
+        fprintf ppf "\narg dependencies:\n";
+        Array.iteri print_reg node.direct_dependencies_of_args;
+        fprintf ppf "\n"
+      in
+      fprintf ppf "\ndependency graph:\n";
+      DLL.iter (Block.body block) ~f:(fun instruction ->
+          print_node (Instruction.basic instruction));
+      print_node (Block.terminator block);
+      fprintf ppf "\n"
+  end
+
+  (* CR-soon gyorsh: make [t] lazy, and only construct when needed, for
+     performance reasons. For now, for testing purposes, it's better to always
+     construct to increase coverage. *)
+  type t =
+    { reaching_definitions : Reaching_definitions.t;
+      dependency_graph : Dependency_graph.t;
+      memory_operations : Memory.t;
+      mem_deps : Memory.Dependencies.t;
+      block : Block.t
+    }
+
+  let state t = Block.state t.block
+
+  let independent t instruction_1 instruction_2 =
+    (* This covers register and memory dependencies and order constraints,
+       because of the way we constructed the graph. *)
+    let id1 = Instruction.id instruction_1 in
+    let id2 = Instruction.id instruction_2 in
+    Dependency_graph.independent t.dependency_graph id1 id2
+
+  let all_independent t instructions =
+    (* All pairs of instructions in the group are independent. *)
+    let rec check t instructions =
+      match instructions with
+      | [] -> true
+      | hd :: tl ->
+        let b = List.for_all (independent t hd) tl in
+        if b
+        then (
+          State.dump_debug (state t) "Group.all_independent: %a\n"
+            Instruction.print_id hd;
+          check t tl)
+        else false
+    in
+    check t instructions
+
+  let all_adjacent t instructions =
+    (* CR-soon gyorsh: shuffles are not supported yet, but this is the place to
+       identify them, for example: i<-[a];j<-[a+8];[b]<-j;[b+8]<-i *)
+    let rec check_adjacent hd1 tl1 =
+      match tl1 with
+      | [] -> true
+      | hd2 :: tl2 ->
+        if Memory.is_adjacent t.memory_operations hd1 hd2 t.block
+             t.reaching_definitions
+        then check_adjacent hd2 tl2
+        else false
+    in
+    check_adjacent (List.hd instructions) (List.tl instructions)
+
+  let for_all_memory_dependencies t ~f =
+    Memory.Dependencies.for_all ~f t.mem_deps
+
+  (* let is_memory_operation t i = Memory.is_memory_operation
+     t.memory_operations i *)
+
+  (* let width_in_bits t i = Memory.width_in_bits t.memory_operation i *)
+
+  let get_direct_dependency_of_arg t =
+    Dependency_graph.get_direct_dependency_of_arg t.dependency_graph
+
+  let get_direct_dependencies t id =
+    Dependency_graph.get_direct_dependencies t.dependency_graph id
+
+  let get_direct_dependency_of_reg t id reg =
+    Reaching_definitions.get t.reaching_definitions id reg
+
+  let get_memory_operation t instructon =
+    Memory.get_memory_operation t.memory_operations instructon
+
+  let from_block block =
+    let state = Block.state block in
+    let reaching_definitions = Reaching_definitions.from_block block in
+    State.dump_debug state "%a@."
+      (Reaching_definitions.dump ~block)
+      reaching_definitions;
+    let memory_operations, mem_deps =
+      Memory.from_block block reaching_definitions
+    in
+    State.dump_debug state "%a@." (Memory.dump ~block) memory_operations;
+    State.dump_debug state "%a@." (Memory.Dependencies.dump ~block) mem_deps;
+    let dependency_graph =
+      Dependency_graph.from_block block reaching_definitions mem_deps
+    in
+    State.dump_debug state "%a@."
+      (Dependency_graph.dump ~block)
+      dependency_graph;
+    { reaching_definitions;
+      dependency_graph;
+      memory_operations;
+      mem_deps;
+      block
+    }
+end
+
+module Computation : sig
+  type t
+
+  module Group : sig
+    type t
+
+    val scalar_instructions : t -> Instruction.t list
+
+    val vector_instructions : t -> Vectorize_utils.Vectorized_instruction.t list
+
+    val iter_vectorizable_args : t -> f:(arg_i:int -> unit) -> unit
+  end
+
+  module Seed : sig
+    type t
+
+    val from_block : Block.t -> Dependencies.t -> t list
+
+    val dump : Format.formatter -> t list -> unit
+  end
+
+  val from_seed : Block.t -> Dependencies.t -> Seed.t -> t option
+
+  val dump : Format.formatter -> block:Block.t -> t -> unit
+
+  val dump_one_line_stat : Format.formatter -> t -> unit
+
+  val dump_all : Format.formatter -> block:Block.t -> t list -> unit
+
+  (** [contains t i] returns true iff instruction [i] belongs to some
+      group in [t]. [i] need not be the key instruction of the group. *)
+  val contains : t -> Instruction.t -> bool
+
+  (** [find_group t key] returns the group of [key] instruction, or
+      None if [key] instruction is not the key of any group in [t].  *)
+  val find_group : t -> key:Instruction.t -> Group.t option
+
+  (** Selects disjoint computations from the input list of computations
+      and returns their union. *)
+  val select_and_join : t list -> Block.t -> Dependencies.t -> t option
+
+  val cost : t -> int
+
+  val num_groups : t -> int
+end = struct
+  module Group : sig
+    (** Represents scalar instructions and the corresponding
+        vector instructions. *)
+    type t
+
+    (** guaranteed to return a list with at least 2 instructions. *)
+    val scalar_instructions : t -> Instruction.t list
+
+    (** guaranteed to return a non-empty list.  *)
+    val vector_instructions : t -> Vectorize_utils.Vectorized_instruction.t list
+
+    (** maps over the indexes of arguments that need to be considered when vectorizing
+        dependencies. Currently skips over arguments that are used for memory address
+        calculation. [init] ensures that the memory address arguments have the same values
+        for all instructions in the group. The result list is in reverse order. *)
+    val map_vectorizable_args : t -> f:(arg_i:int -> 'a) -> 'a list
+
+    val iter_vectorizable_args : t -> f:(arg_i:int -> unit) -> unit
+
+    val for_all_non_vectorizable_args : t -> f:(arg_i:int -> bool) -> bool
+
+    (** [init width_in_bits instructions] checks that [instructions]
+        are supported isomorphic scalar instructions that are
+        inter-independent and if they have memory accesses,
+        the accesses must be adjacent. *)
+    val init :
+      width_in_bits:Vectorize_utils.Width_in_bits.t ->
+      Instruction.t list ->
+      Dependencies.t ->
+      t option
+
+    val equal : t -> t -> bool
+
+    val dump : Format.formatter -> t -> unit
+  end = struct
+    type t =
+      { vector_instructions : Vectorize_utils.Vectorized_instruction.t list;
+        instructions : Instruction.t list;
+        non_address_arg_count : int;
+        arg_count : int
+      }
+
+    let scalar_instructions t = t.instructions
+
+    let vector_instructions t = t.vector_instructions
+
+    let equal t1 t2 =
+      List.equal Instruction.equal_id t1.instructions t2.instructions
+
+    let get_arg_count i = Instruction.arguments i |> Array.length
+
+    let get_res_count i = Instruction.results i |> Array.length
+
+    let map_vectorizable_args t ~f =
+      (* Currently, the code assumes that the (variable number of) address args
+         are always at the end of the array of arguments of an instruction. The
+         non-address args therefore start from 0, conveniently, and we don't
+         need to know if there are any address args or not, so we don't need to
+         know if it's a memory operation or not. *)
+      List.init t.non_address_arg_count (fun arg_i -> f ~arg_i)
+
+    let iter_vectorizable_args t ~f =
+      (* see [map_vectorizable_args] *)
+      for arg_i = 0 to t.non_address_arg_count - 1 do
+        f ~arg_i
+      done
+
+    let for_all_non_vectorizable_args t ~f =
+      (* see [map_vectorizable_args] *)
+      let rec loop arg_i =
+        if arg_i = t.arg_count
+        then true
+        else if f ~arg_i
+        then loop (arg_i + 1)
+        else false
+      in
+      loop t.non_address_arg_count
+
+    let same_stack_offset instructions =
+      match instructions with
+      | [] -> true
+      | hd :: tl ->
+        let stack_offset = Instruction.stack_offset hd in
+        List.for_all
+          (fun i -> Int.equal stack_offset (Instruction.stack_offset i))
+          tl
+
+    let have_isomorphic_op instructions =
+      match instructions with
+      | [] -> true
+      | hd :: tl ->
+        let is_isomorphic i =
+          Instruction.have_isomorphic_op hd i
+          && Int.equal (get_arg_count hd) (get_arg_count i)
+          && Int.equal (get_res_count hd) (get_res_count i)
+        in
+        List.for_all is_isomorphic tl
+
+    let independent instructions deps =
+      let res = Dependencies.all_independent deps instructions in
+      State.dump_debug (Dependencies.state deps) "Group.independent: res=%b\n"
+        res;
+      res
+
+    (** Returns true if all memory accesses performed by the [instructions] are
+        vectorizable.  Current implementation assumes that all instructions have
+        isomorphic operations. It returns true if the operations do not access
+        memory, or if all memory accesses are adjacent; otherwise returns false.  In
+        the future, this can be extended to support shuffles. *)
+    let can_vectorize_memory_accesses mem_op instructions deps =
+      match mem_op with
+      | None -> true
+      | Some _ ->
+        let res = Dependencies.all_adjacent deps instructions in
+        State.dump_debug (Dependencies.state deps)
+          "Group.all_adjacent: res=%b\n" res;
+        res
+
+    let init ~width_in_bits instructions deps =
+      assert (List.length instructions > 1);
+      assert (
+        Vectorize_utils.Width_in_bits.to_int width_in_bits
+        * List.length instructions
+        = Simd_selection.vector_width_in_bits);
+      Format.(
+        State.dump_debug (Dependencies.state deps) "Group.init\n%a\n"
+          (pp_print_list ~pp_sep:pp_print_newline Instruction.print_id)
+          instructions);
+      match instructions with
+      | [] -> assert false
+      | instruction :: _ -> (
+        let arg_count = get_arg_count instruction in
+        let res_count = get_res_count instruction in
+        let mem_op = Dependencies.get_memory_operation deps instruction in
+        if not
+             (same_stack_offset instructions
+             && have_isomorphic_op instructions
+             && independent instructions deps
+             && can_vectorize_memory_accesses mem_op instructions deps)
+        then None
+        else
+          let cfg_ops =
+            List.map (fun i -> i |> Instruction.op |> Option.get) instructions
+          in
+          let vector_instructions =
+            Simd_selection.vectorize_operation width_in_bits ~arg_count
+              ~res_count cfg_ops
+          in
+          match vector_instructions with
+          | None -> None
+          | Some vector_instructions ->
+            let non_address_arg_count =
+              match mem_op with
+              | None -> arg_count
+              | Some mem_op ->
+                Dependencies.Memory.Operation.first_memory_arg_index mem_op
+            in
+            assert (List.length vector_instructions > 0);
+            Some
+              { vector_instructions;
+                instructions;
+                non_address_arg_count;
+                arg_count
+              })
+
+    (* Load: At the moment, we do not vectorize dependencies of load
+       instructions, and don't support vectorizing load instructions that have
+       interesting vectorizable address dependencies, such as scatter-gather. *)
+    (* Store: It must be the seed group, because store instruction has no
+       "result", so it can't be a dependency of another group in the tree. We
+       have already checked that seed consists of adjacent memory accesses that
+       are independent. The only remaining dependency to check is the new value
+       stored. It's always the argument at index 0. *)
+
+    let dump ppf t =
+      let open Format in
+      let spp ppf l =
+        pp_print_list ~pp_sep:pp_print_newline Instruction.print ppf l
+      in
+      let vpp ppf l =
+        pp_print_list ~pp_sep:pp_print_newline
+          Vectorize_utils.Vectorized_instruction.print ppf l
+      in
+      fprintf ppf "Group:\nScalar:\n%a\nVector:\n%a\n" spp t.instructions vpp
+        t.vector_instructions
+  end
+
+  module Seed : sig
+    (** A seed is a group of inter-independent store instructions that access adjacent
+        memory addresses. *)
+    type t
+
+    val lane_width_in_bits : t -> Vectorize_utils.Width_in_bits.t
+
+    val group : t -> Group.t
+
+    val from_block : Block.t -> Dependencies.t -> t list
+
+    val dump : Format.formatter -> t list -> unit
+
+    val exists_address_dependency :
+      t -> f:(Instruction.Id.t -> Reg.t -> bool) -> bool
+  end = struct
+    type t =
+      { group : Group.t;
+        width_in_bits : Vectorize_utils.Width_in_bits.t
+      }
+
+    let init ~width_in_bits instructions deps =
+      match Group.init ~width_in_bits instructions deps with
+      | None -> None
+      | Some group -> Some { group; width_in_bits }
+
+    (* [take ~n list] returns the first [n] items of [list], or None if list has
+       fewer than [n] items. *)
+    let take ~n ~width_in_bits l =
+      let rec loop n l acc =
+        if n <= 0
+        then Some (List.rev acc)
+        else
+          match l with
+          | [] -> None
+          | (w, hd) :: tl ->
+            if Vectorize_utils.Width_in_bits.equal w width_in_bits
+            then loop (n - 1) tl (hd :: acc)
+            else None
+      in
+      loop n l []
+
+    let lane_width_in_bits t = t.width_in_bits
+
+    let group t = t.group
+
+    (* [is_store i] if [i] is a Store, return the width in bits, otherwise
+       return None. *)
+    let is_store instruction =
+      match Instruction.op instruction with
+      | None -> None
+      | Some op -> (
+        match op with
+        | Store (chunk, _, _) -> Some chunk
+        | Alloc _ | Load _ | Move | Reinterpret_cast _ | Static_cast _ | Spill
+        | Reload | Const_int _ | Const_float32 _ | Const_float _
+        | Const_symbol _ | Const_vec128 _ | Stackoffset _ | Intop _
+        | Intop_imm _ | Intop_atomic _ | Floatop _ | Csel _ | Probe_is_enabled _
+        | Opaque | Begin_region | End_region | Specific _ | Name_for_debugger _
+        | Dls_get | Poll ->
+          None)
+
+    let from_block (block : Block.t) deps : t list =
+      (* For each store instruction, tries to form a seed with the closest
+         stores after it, skipping other instructions. This is a heuristic that
+         considers only [Store] instructions, not other intructions that write
+         to memory. The goal is to quickly filter out blocks without candidate
+         seed, and only use [deps] when needed. *)
+      (* CR-someday gyorsh: try to reorder independent stores to the same base
+         address to create adjacent accesses that can be vectorized, for
+         example: [b+8] <- x; [b]<-y *)
+      (* CR-someday gyorsh: find seeds with any store, not necessarily the
+         closest stores. This requires proving that stores that are skipped are
+         independent, but at the moment we can only do it if they are to the
+         same address which limits the effectiveness of looking further ahead.
+         It would only add cases of the form: [b] <- x0; [b+16] <-x2 [b+8] <-x1;
+         [b+24] <- x3 *)
+      let body = Block.body block in
+      let all_stores =
+        DLL.fold_right body ~init:[] ~f:(fun i acc ->
+            let i = Instruction.basic i in
+            match is_store i with
+            | Some chunk ->
+              (Vectorize_utils.Width_in_bits.of_memory_chunk chunk, i) :: acc
+            | None -> acc)
+      in
+      Format.(
+        let pp_pair ppf (x, y) =
+          fprintf ppf "(%a, %a)" Vectorize_utils.Width_in_bits.print x
+            Instruction.print y
+        in
+        State.dump_debug (Block.state block)
+          "Seeds.from_block: all_stores=\n(%a)\n"
+          (pp_print_list ~pp_sep:pp_print_newline pp_pair)
+          all_stores);
+      let rec loop stores acc =
+        match stores with
+        | [] -> List.rev acc
+        | (width_in_bits, _store) :: tl -> (
+          let n =
+            Simd_selection.vector_width_in_bits
+            / Vectorize_utils.Width_in_bits.to_int width_in_bits
+          in
+          if n <= 1
+          then
+            (* nothing to vectorize, the store's access is at least
+               vector-length. *)
+            loop tl acc
           else
-            match cell_option with
-            | None -> None
-            | Some cell ->
-              let instruction = Instruction.Basic (DLL.value cell) in
-              if Instruction.is_store instruction
-              then
-                let new_store =
-                  Instruction.id instruction
-                  |> Memory_accesses.get_memory_operation_exn memory_accesses
-                in
-                if Memory_accesses.Memory_operation.is_adjacent (List.hd seed)
-                     new_store
-                then find_seed (n - 1) (new_store :: seed) (DLL.next cell)
-                else None
-              else if can_cross_chunk seed instruction
-              then find_seed n seed (DLL.next cell)
-              else None
-        in
-        find_seed (items_in_vector - 1)
-          [starting_memory_operation]
-          starting_cell
-        |> Option.map List.rev)
-      stores
-
-  let dump ppf (seeds : t list) =
+            match take ~n ~width_in_bits stores with
+            | None ->
+              (* not enough instructions in [stores] to make a group of [n]
+                 instructions. *)
+              loop tl acc
+            | Some instructions -> (
+              Format.(
+                State.dump_debug (Block.state block)
+                  "Seeds.from_block: instructions=\n(%a)\n"
+                  (pp_print_list Instruction.print_id)
+                  instructions);
+              match init ~width_in_bits instructions deps with
+              | None -> loop tl acc
+              | Some t -> loop tl (t :: acc)))
+      in
+      loop all_stores [] |> List.rev
+
+    let exists_address_dependency (t : t) ~f =
+      (* Seed construction guarantees that all instructions in the [seed] have
+         the same address arguments and isomorphic operations. *)
+      let instruction = Group.scalar_instructions t.group |> List.hd in
+      let args = Instruction.arguments instruction in
+      let len = Array.length args in
+      let id = Instruction.id instruction in
+      let rec exists i =
+        (* The first argument of a store is the new value, the rest are address
+           arguments. *)
+        if i < 1 then false else f id args.(i) || exists (i - 1)
+      in
+      exists (len - 1)
+
+    let dump ppf (seeds : t list) =
+      let open Format in
+      fprintf ppf "\nSeeds:\n";
+      List.iter (fun t -> fprintf ppf "(%a)\n" Group.dump t.group) seeds
+  end
+
+  type t =
+    { groups : Group.t Instruction.Id.Map.t;
+      (* [all_instructions] is all the scalar instructions in the computations.
+         It is an optimization to cache this value here. It is used for ruling
+         out computations that are invalid or not implementable, and to estimate
+         cost/benefit of vectorized computations. *)
+      all_scalar_instructions : Instruction.Id.Set.t;
+      new_positions : int Instruction.Id.Map.t
+    }
+
+  let num_groups t = Instruction.Id.Map.cardinal t.groups
+
+  let num_vector_instructions t =
+    Instruction.Id.Map.fold
+      (fun _k g acc -> acc + List.length (Group.vector_instructions g))
+      t.groups 0
+
+  let num_scalar_instructions t =
+    Instruction.Id.Set.cardinal t.all_scalar_instructions
+
+  (** [cost t] returns an integer [n] describing the cost of
+      vectorized computation [t] instead of the original code:
+      negative [n] means vectorized computation is better than
+      the original code. The goal is to find [t] that minimizes cost(t).
+
+      Currently, [cost] uses a naive measure of number of instructions,
+      i.e., the difference between the number of vector instructions
+      and the number of scalar instructions. *)
+  let cost t = num_vector_instructions t - num_scalar_instructions t
+
+  let is_cost_effective t = cost t < 0
+
+  let dump_one_line_stat ppf t =
+    Format.fprintf ppf
+      "%d groups, %d scalar instructions, %d vector instructions, cost = %d"
+      (num_groups t)
+      (num_scalar_instructions t)
+      (num_vector_instructions t)
+      (cost t)
+
+  let dump ppf ~(block : Block.t) t =
+    let open Format in
+    let print_group id group_option =
+      match group_option with
+      | None -> ()
+      | Some (group : Group.t) ->
+        fprintf ppf "\nGroup key: %a\n%a\n" Instruction.Id.print id Group.dump
+          group
+    in
+    Format.fprintf ppf "Vectorized computation has %a:\n" dump_one_line_stat t;
+    DLL.iter (Block.body block) ~f:(fun instruction ->
+        let instruction = Instruction.basic instruction in
+        let id = Instruction.id instruction in
+        Instruction.Id.Map.find_opt id t.groups |> print_group id)
+
+  let dump_all ppf ~(block : Block.t) (trees : t list) =
     let open Format in
-    let print_seed seed =
-      List.iter
-        (fun (address : Memory_accesses.Memory_operation.t) ->
-          Memory_accesses.Memory_operation.dump ppf address)
-        seed
+    let print_trees ppf trees =
+      List.iter (fun tree -> fprintf ppf "(%a)\n" (dump ~block) tree) trees
+    in
+    fprintf ppf "Vectorized computations:\n(%a)\n" print_trees trees
+  (* The key is the id of the instruction where the group will be inserted,
+     which is the last instruction in the group for now, we can change that
+     later *)
+
+  let find_group t ~key =
+    let key_id = Instruction.id key in
+    Instruction.Id.Map.find_opt key_id t.groups
+
+  let contains_id t id = Instruction.Id.Set.mem id t.all_scalar_instructions
+
+  let contains t instruction =
+    let id = Instruction.id instruction in
+    contains_id t id
+
+  let is_dependency_of_the_rest_of_body t block deps =
+    (* is there an instruction outside the computation that directly depends on
+       a register defined in the computation? *)
+    let res =
+      DLL.exists (Block.body block) ~f:(fun instruction ->
+          let instruction = Instruction.basic instruction in
+          if contains t instruction
+          then false
+          else
+            let id = Instruction.id instruction in
+            let direct_deps = Dependencies.get_direct_dependencies deps id in
+            let res =
+              not
+                (Instruction.Id.Set.disjoint direct_deps
+                   t.all_scalar_instructions)
+            in
+            State.dump_debug (Block.state block)
+              "Computation.is_dependency_of_the_rest_of_body = %b %a\n" res
+              Instruction.print instruction;
+            res)
+    in
+    State.dump_debug (Block.state block)
+      "Computation.is_dependency_of_the_rest_of_body result = %b\n" res;
+    res
+
+  (** [depends_on_computation t deps id reg] does the value of [reg] at [id]
+      depend on any instruction in [t]? *)
+  let depends_on_computation t deps id reg =
+    match Dependencies.get_direct_dependency_of_reg deps id reg with
+    | None -> false
+    | Some reaching_definition_id ->
+      Instruction.Id.Set.mem reaching_definition_id t.all_scalar_instructions
+
+  let is_dependency_of_outside_body t block deps =
+    (* live registers before terminator represent dependencies outside of
+       body. *)
+    let terminator_id = Instruction.id (Block.terminator block) in
+    let live_before_terminator = Block.get_live_regs_before_terminator block in
+    let res =
+      (* live register before terminator are not defined by the computation. *)
+      Reg.Set.exists
+        (depends_on_computation t deps terminator_id)
+        live_before_terminator
+    in
+    State.dump_debug (Block.state block)
+      "Computation.is_valid: is_dependency_of_outside_body = %b\n" res;
+    res
+
+  let seed_address_does_not_depend_on_tree t block deps seed =
+    let res =
+      (* the only way seed address can depend on the tree is if there is a
+         constant assignment *)
+      not
+        (Seed.exists_address_dependency seed ~f:(depends_on_computation t deps))
+    in
+    State.dump_debug (Block.state block)
+      "Computation.is_valid seed_address_does_not_depend_on_tree = %b\n" res;
+    res
+
+  let is_respected t block src dst =
+    let old_pos id = Block.pos block id in
+    let new_pos id =
+      match Instruction.Id.Map.find_opt id t.new_positions with
+      | None -> old_pos id
+      | Some pos -> pos
+    in
+    let appears get_pos src ~after:dst = get_pos src > get_pos dst in
+    (* [src] depends on [dst] implies that [src] appears after [dst] in the
+       block, i.e., [old_pos_src] > [old_pos_dst]. Check that [new_pos_src] >
+       [new_pos_dst]. *)
+    if not (appears old_pos src ~after:dst)
+    then
+      Misc.fatal_errorf
+        "Unexpected old position: %a (old pos %d) depends on %a (old pos %d)"
+        Instruction.Id.print src (old_pos src) Instruction.Id.print dst
+        (old_pos dst);
+    let res = appears new_pos src ~after:dst in
+    State.dump_debug (Block.state block)
+      "Computation.respects_memory_dependencies = %b: %a (old pos %d, new pos \
+       %d) depends on %a (old pos %d, new pos %d)\n"
+      res Instruction.Id.print src (old_pos src) (new_pos src)
+      Instruction.Id.print dst (old_pos dst) (new_pos dst);
+    res
+
+  let respects_memory_dependencies t block deps =
+    (* conservative: for each memory dependency (data or order), check that the
+       new order of instructions satisfies the dependency. This is needed to
+       ensure that memory deps between different groups are respected, and
+       memory deps between instructions in and out of the vectorized computation
+       are respected. The construction of the groups themselves guarantees that
+       there are no deps of any kind (reg, mem, or order) between instructions
+       in the same group. *)
+    let res =
+      Dependencies.for_all_memory_dependencies ~f:(is_respected t block) deps
+    in
+    State.dump_debug (Block.state block)
+      "Computation.respects_memory_dependencies result = %b\n" res;
+    res
+
+  let respects_register_order_constraints t deps =
+    (* Check that read or write of register [r] is not reordered past another
+       write to [r] (similarly to respecting order constraints between memory
+       operations in [respects_memory_dependencies]). This check would not be
+       needed if we had basic-block-level SSA. *)
+    let is_valid_definition instruction ~arg_i ~new_pos =
+      let id = Instruction.id instruction in
+      let args = Instruction.arguments instruction in
+      let reg = args.(arg_i) in
+      let old_def = Dependencies.get_direct_dependency_of_reg deps id reg in
+      let new_def =
+        Dependencies.get_direct_dependency_of_reg deps new_pos reg
+      in
+      match old_def, new_def with
+      | None, None ->
+        (* The register is defined before the block, and not redefined within
+           the block. *)
+        true
+      | None, Some _ ->
+        (* The instruction uses register defined before the block, but the
+           register is redefined before the new position. *)
+        false
+      | Some _, None ->
+        Misc.fatal_errorf
+          "Use of clobbered register %a at %a, previously defined at %a"
+          Printreg.reg reg Instruction.Id.print new_pos Instruction.print
+          instruction
+      | Some old_def, Some new_def ->
+        (* cannot move past another definition point *)
+        Instruction.Id.equal old_def new_def
     in
-    let print_seeds seeds =
-      List.iter
-        (fun seed ->
-          fprintf ppf "(";
-          print_seed seed;
-          fprintf ppf "\n)\n")
-        seeds
+    Instruction.Id.Map.for_all
+      (fun key group ->
+        let scalar_instructions = Group.scalar_instructions group in
+        Group.for_all_non_vectorizable_args group ~f:(fun ~arg_i ->
+            List.for_all
+              (is_valid_definition ~arg_i ~new_pos:key)
+              scalar_instructions))
+      t.groups
+
+  let respects_register_dependencies t block deps =
+    (* Each computation is guaranteed by construction to respect register
+       dependencies, but after merging two computation, they may be violated if
+       one of the computations depends on another and the placement of vector
+       instructions does not respect the dependency. Conservatively reject such
+       a computation for now. This check should run at the same time as
+       [is_dependency_of_the_rest_of_body]. Better placement of vectorized
+       instructions can help in some cases, but not all and not yet
+       implemented. *)
+    let is_valid_dep i =
+      let id = Instruction.id i in
+      let set = Dependencies.get_direct_dependencies deps id in
+      Instruction.Id.Set.for_all
+        (fun dep_id ->
+          if contains_id t dep_id then is_respected t block id dep_id else true)
+        set
     in
-    fprintf ppf "\nseeds:\n";
-    print_seeds seeds;
-    fprintf ppf "\n"
+    Instruction.Id.Map.for_all
+      (fun _key group ->
+        let scalar_instructions = Group.scalar_instructions group in
+        List.for_all is_valid_dep scalar_instructions)
+      t.groups
+
+  (* CR gyorsh: [is_dependency_of_outside_body] condition can be weakened if we
+     propagate register substitution to instructions that depend on them outside
+     the tree (in the same block and other blocks), but may require additional
+     instructions to extract scalar values from vector registers. Same weakening
+     can be applied to [is_dependency_of_the_rest_of_body]. We check
+     [is_dependency_of_the_rest_of_body] later, after [select_and_merge],
+     because it allows us to vectorize computations that share some nodes. *)
+  let is_valid t block deps =
+    respects_memory_dependencies t block deps
+    && respects_register_order_constraints t deps
+    && not (is_dependency_of_outside_body t block deps)
+
+  (** The key is the last instruction id, for now. This is the place
+      where the vectorized intructions will be inserted. *)
+  let get_key block instruction_ids =
+    let last_instruction = Block.find_last_instruction block instruction_ids in
+    Instruction.id last_instruction
+
+  (** Returns the dependencies of arguments at position [arg_i]
+      of each instruction in [instruction_ids]. Returns None if
+      one of the instruction's dependencies is None for [arg_i]. *)
+  let get_deps deps ~arg_i instruction_ids =
+    Misc.Stdlib.List.map_option
+      (Dependencies.get_direct_dependency_of_arg deps ~arg_i)
+      instruction_ids
+
+  let all_instructions map =
+    Instruction.Id.Map.fold
+      (fun _key (group : Group.t) acc ->
+        let instructions = Group.scalar_instructions group in
+        let seq = List.to_seq instructions |> Seq.map Instruction.id in
+        Instruction.Id.Set.add_seq seq acc)
+      map Instruction.Id.Set.empty
+
+  let new_positions map block =
+    (* CR-someday gyorsh: improve instruction scheduling for vectorized
+       instructions to allow more code to be vectorized. Order constraints can
+       be used to choose the position of vectorized instructions within a block,
+       instead of the predefined position used now for [Group.key]. *)
+    let old_pos id = Block.pos block id in
+    Instruction.Id.Map.fold
+      (fun key group acc ->
+        let key_pos = old_pos key in
+        List.fold_left
+          (fun acc i ->
+            let id = Instruction.id i in
+            Instruction.Id.Map.add id key_pos acc)
+          acc
+          (Group.scalar_instructions group))
+      map Instruction.Id.Map.empty
+
+  let empty =
+    { groups = Instruction.Id.Map.empty;
+      all_scalar_instructions = Instruction.Id.Set.empty;
+      new_positions = Instruction.Id.Map.empty
+    }
+
+  (* CR gyorsh: if same instruction belongs to two groups, is it handled
+     correctly? no, only same key is handled correctly. *)
+  (* CR gyorsh: handle same instruction multiple times in the same group
+     correctly, don't allow duplicating. Currently, this can only lead to a
+     valid tree if the duplicated instruction is [Const_*] *)
+  let rec build group map ~block ~deps ~width_in_bits =
+    (* Recursively builds the vectorized computation and returns the key of the
+       root, otherwise None. It consists of groups do not depend on instructions
+       outside the computation except for loads. *)
+    match group with
+    | None -> None
+    | Some (group : Group.t) -> (
+      let instruction_ids =
+        Group.scalar_instructions group |> List.map Instruction.id
+      in
+      let key = get_key block instruction_ids in
+      (* Is there another group with the same key already in the tree? If the
+         key instruction of the group is already in another group, and the other
+         group is different from this group, we won't vectorize this for
+         simplicity's sake. *)
+      match Instruction.Id.Map.find_opt key map with
+      | Some (old_group : Group.t) ->
+        if Group.equal group old_group then Some map else None
+      | None ->
+        (* add to the map *)
+        let map = Instruction.Id.Map.add key group map in
+        (* try to create groups for all dependencies *)
+        let dep_groups =
+          Group.map_vectorizable_args group ~f:(fun ~arg_i ->
+              (* [arg_i] ranges over indexes of arguments that need to be
+                 considered when vectorizing dependencies. Currently skips over
+                 arguments that are used for memory address calculation.
+                 [Group.init] ensures that the memory address arguments have the
+                 same values for all instructions in the group. *)
+              (* CR-someday gyorsh: refer directly to [Reg.t] instead of
+                 positional [arg_i]. Currently, the code assumes that address
+                 args are always at the end. *)
+              match get_deps deps ~arg_i instruction_ids with
+              | None ->
+                (* At least one of the arguments has a dependency outside the
+                   block. Currently, not supported. *)
+                None
+              | Some dep_ids ->
+                State.dump_debug (Block.state block)
+                  "Computation.from_seed build deps arg_i=%d\n" arg_i;
+                let instructions = List.map (Block.find block) dep_ids in
+                Group.init ~width_in_bits instructions deps)
+        in
+        let missing_deps = List.exists Option.is_none dep_groups in
+        if missing_deps
+        then None
+        else
+          (* recurse to vectorize dependencies. *)
+          List.fold_left
+            (fun acc g -> Option.bind acc (build g ~block ~deps ~width_in_bits))
+            (Some map) dep_groups)
+
+  let from_seed (block : Block.t) deps seed =
+    let width_in_bits = Seed.lane_width_in_bits seed in
+    let root = Seed.group seed in
+    State.dump_debug (Block.state block) "Computation.from_seed root=\n%a\n"
+      Group.dump root;
+    let map = Instruction.Id.Map.empty in
+    match build (Some root) map ~block ~deps ~width_in_bits with
+    | None -> None
+    | Some map ->
+      let t =
+        { groups = map;
+          all_scalar_instructions = all_instructions map;
+          new_positions = new_positions map block
+        }
+      in
+      State.dump_debug (Block.state block)
+        "Computation.from_seed build finished\n%a\n" (dump ~block) t;
+      assert (seed_address_does_not_depend_on_tree t block deps seed);
+      if is_valid t block deps then Some t else None
+
+  let join t1 t2 =
+    { groups =
+        Instruction.Id.Map.union
+          (fun key g1 g2 ->
+            if not (Group.equal g1 g2)
+            then
+              Misc.fatal_errorf
+                "Computation.join: illegal groups for key=%a group1=%a \
+                 group2=%a"
+                Instruction.Id.print key Group.dump g1 Group.dump g2;
+            Some g1)
+          t1.groups t2.groups;
+      all_scalar_instructions =
+        Instruction.Id.Set.union t1.all_scalar_instructions
+          t2.all_scalar_instructions;
+      new_positions =
+        Instruction.Id.Map.union
+          (fun key pos1 pos2 ->
+            if not (Int.equal pos1 pos2)
+            then
+              Misc.fatal_errorf
+                "Computation.join: illegal new_positions for key=%a pos1=%d \
+                 pos2=%d"
+                Instruction.Id.print key pos1 pos2;
+            Some pos1)
+          t1.new_positions t2.new_positions
+    }
+
+  (** [compatible t t'] returns true if for every group [g] in [t],
+      and [g'] in [t'],  [g] and [g'] are equal or have disjoint sets
+      of scalar instructions. *)
+  let compatible t t' =
+    if Instruction.Id.Set.disjoint t.all_scalar_instructions
+         t'.all_scalar_instructions
+    then true
+    else
+      let sub t1 t2 =
+        Instruction.Id.Map.for_all
+          (fun key g1 ->
+            match Instruction.Id.Map.find_opt key t2.groups with
+            | Some g2 ->
+              (* equal groups: if the key is in t2, then the corresponding
+                 groups are equal. *)
+              (* CR gyorsh: don't need to repeat this check in the symmetric
+                 case, but it's easier to understand the code this way. *)
+              Group.equal g1 g2
+            | None ->
+              (* disjoint groups: if the key is not in t2, then all insts are
+                 not in t2. *)
+              List.for_all
+                (fun i ->
+                  not
+                    (Instruction.Id.Set.mem (Instruction.id i)
+                       t2.all_scalar_instructions))
+                (Group.scalar_instructions g1))
+          t1.groups
+      in
+      sub t t' && sub t' t
+
+  let select_and_join trees block deps =
+    match trees with
+    | [] -> None
+    | trees ->
+      (* sort by cost, ascending *)
+      let compare_cost t1 t2 = Int.compare (cost t1) (cost t2) in
+      let trees = List.sort compare_cost trees in
+      let rec loop trees acc =
+        match trees with
+        | [] -> acc
+        | hd :: tl ->
+          if compatible hd acc
+          then
+            let new_acc = join hd acc in
+            if compare_cost new_acc acc < 0
+            then loop tl new_acc
+            else
+              (* CR gyorsh: this case is not reachable with the current cost function. *)
+              (* skip [hd], try to add the rest of the tail *)
+              loop tl acc
+          else (* skip [hd], add the rest of the tail *)
+            loop tl acc
+      in
+      let res = loop trees empty in
+      (* CR gyorsh: does join respect memory order? *)
+      assert (is_valid res block deps);
+      State.dump_debug (Block.state block) "Computation.select_and_join %a\n"
+        (dump ~block) res;
+      if is_dependency_of_the_rest_of_body res block deps
+         || (not (respects_register_dependencies res block deps))
+         || not (is_cost_effective res)
+      then None
+      else Some res
 end
 
-let dump ppf cfg_with_layout ~msg =
-  let open Format in
-  let cfg = Cfg_with_layout.cfg cfg_with_layout in
-  fprintf ppf "\nvectorization extra information for %s\n" msg;
-  fprintf ppf "%s\n" (Cfg.fun_name cfg);
-  let block_count = Label.Tbl.length cfg.blocks in
-  fprintf ppf "blocks.length=%d\n" block_count;
-  let body_instruction_count =
-    Cfg.fold_body_instructions cfg ~f:(fun sum _ -> sum + 1) ~init:0
+let augment_reg_map reg_map group =
+  (* Make sure that [reg_map] contains all scalar registers of the [group] that
+     will be replaced by vector registers. It is not enough to map only scalar
+     registers that appear in [key_instruction] because another group's key
+     instruction may refer to different scalar registers in the same packed
+     register (i.e., the order of scalar instructions need not be the same as
+     block order, and the key is currently the last instruction in the block
+     order.
+
+     For example, group [10;7] and group [13;14]: *)
+
+  (* (id:5) V/63 := val [b:V/62 + 8]
+   * (id:6) V/64 := val [a:V/61 + 8]
+   * (id:7) Paddint:I/65 := V/64 + V/63 + -1
+   * (id:8) V/66 := val [b:V/62]
+   * (id:9) V/67 := val [a:V/61]
+   * (id:10) Paddint:I/68 := V/67 + V/66 + -1
+   * (id:13) val[V/69] := Paddint:I/68 (init)
+   * (id:14) val[V/69 + 8] := Paddint:I/65 (init) *)
+
+  (* The key of [10;7] has result register [I/68] but the corresponding argument
+     register in the key of [13;14] is [I/65]. *)
+  let scalar_instructions = Computation.Group.scalar_instructions group in
+  let arg = List.map Instruction.arguments scalar_instructions in
+  let res = List.map Instruction.results scalar_instructions in
+  let augment index reg_arrays =
+    let pack = List.map (fun reg_array -> reg_array.(index)) reg_arrays in
+    match pack with
+    | [] -> ()
+    | hd :: tl -> (
+      match Substitution.get_reg_opt reg_map hd with
+      | None -> Substitution.fresh_reg_for_pack reg_map pack Vec128
+      | Some old_reg_for_hd ->
+        (* other registers in the pack must be mapped in the same way as
+           [hd]. *)
+        List.iter
+          (fun reg ->
+            match Substitution.get_reg_opt reg_map reg with
+            | None ->
+              Misc.fatal_errorf
+                "augment_reg_map: %a is mapped to %a but %a is not mapped"
+                Printreg.reg hd Printreg.reg old_reg_for_hd Printreg.reg reg
+            | Some old_reg ->
+              if not (Reg.same old_reg_for_hd old_reg)
+              then
+                Misc.fatal_errorf
+                  "augment_reg_map: %a is mapped to %a but %a is mapped to %a"
+                  Printreg.reg hd Printreg.reg old_reg_for_hd Printreg.reg reg
+                  Printreg.reg old_reg)
+          tl)
   in
-  fprintf ppf "body instruction count=%d\n" body_instruction_count;
-  fprintf ppf "terminator instruction count=%d\n" block_count;
-  fprintf ppf "body and terminator instruction count=%d\n"
-    (body_instruction_count + block_count);
-  fprintf ppf "@."
+  (* only some of the args are vectorizable, but all results are vectorizable. *)
+  (* CR gyorsh: get rid of positional interface, use registers directly. *)
+  Computation.Group.iter_vectorizable_args group ~f:(fun ~arg_i ->
+      augment arg_i arg);
+  Array.iteri (fun i _r -> augment i res) (List.hd res)
+
+let add_vector_instructions_for_group reg_map state group ~before:cell
+    old_instruction =
+  let vector_instructions = Computation.Group.vector_instructions group in
+  let key_instruction = Instruction.basic old_instruction in
+  let new_regs : Reg.t Numbers.Int.Tbl.t = Numbers.Int.Tbl.create 2 in
+  let get_new_reg n =
+    match Numbers.Int.Tbl.find_opt new_regs n with
+    | Some reg -> reg
+    | None ->
+      let new_reg = Reg.create Vec128 in
+      Numbers.Int.Tbl.add new_regs n new_reg;
+      new_reg
+  in
+  let create_instruction
+      (simd_instruction : Vectorize_utils.Vectorized_instruction.t) =
+    let get_register
+        (simd_reg : Vectorize_utils.Vectorized_instruction.register) =
+      match simd_reg with
+      | New n -> get_new_reg n
+      | Argument n ->
+        let original_reg = (Instruction.arguments key_instruction).(n) in
+        Substitution.get_reg_exn reg_map original_reg
+      | Result n ->
+        let original_reg = (Instruction.results key_instruction).(n) in
+        Substitution.get_reg_exn reg_map original_reg
+      | Original n ->
+        let original_reg = (Instruction.arguments key_instruction).(n) in
+        original_reg
+    in
+    let desc = Cfg.Op simd_instruction.operation in
+    let arg = Array.map get_register simd_instruction.arguments in
+    let res = Array.map get_register simd_instruction.results in
+    let id = State.next_available_instruction state in
+    Instruction.copy old_instruction ~desc ~arg ~res ~id
+  in
+  augment_reg_map reg_map group;
+  (* actually insert the vector instructions into the body of the block *)
+  List.iter
+    (fun simd_instruction ->
+      create_instruction simd_instruction |> DLL.insert_before cell)
+    vector_instructions
+
+let vectorize (block : Block.t) tree =
+  let reg_map = Block.reg_map block in
+  let state = Block.state block in
+  (* Add vector instructions. *)
+  let rec add_vector_instructions cell_option =
+    match cell_option with
+    | None -> ()
+    | Some cell ->
+      (let old_instruction = DLL.value cell in
+       let instruction = Instruction.basic old_instruction in
+       match Computation.find_group tree ~key:instruction with
+       | None -> ()
+       | Some group ->
+         add_vector_instructions_for_group reg_map state group ~before:cell
+           old_instruction);
+      DLL.next cell |> add_vector_instructions
+  in
+  let body = Block.body block in
+  DLL.hd_cell body |> add_vector_instructions;
+  (* Remove all instructions that were replaced by vector instructions. *)
+  DLL.filter_left body ~f:(fun instruction ->
+      let instruction = Instruction.basic instruction in
+      not (Computation.contains tree instruction))
+
+exception Cannot_reorder of Instruction.t * Instruction.t
+
+let can_reorder tree body deps =
+  (* Checks nodes can be grouped together. Modifies a copy of the block's body
+     to move the scalar instructions together to where they would be replaced by
+     the corresponding vector instructions. Does not actually emit vectorized
+     instructions. There is a faster/simpler/safer way to emit vectorized code.
+
+     Used for validation only. Intended to work even after the vectorization
+     heuristics evolve. Can be expensive. *)
+  let reorder_instruction body instruction position =
+    match position with
+    | None -> assert false
+    | Some position -> (
+      (* find the cell that holds the scalar [instruction] *)
+      let same_position i =
+        let p = DLL.value position in
+        Instruction.equal_id i p
+      in
+      if same_position instruction
+      then
+        (* the [instruction] is already at [position], no need to reorder. *)
+        DLL.prev position
+      else
+        let cell =
+          DLL.find_cell_opt body ~f:(Instruction.equal_id instruction)
+          |> Option.get
+        in
+        (* Traverse from [cell] to [position], making sure the [instruction] can
+           be moved across every [other_instruction] along the way. *)
+        let rec can_cross cur =
+          match DLL.next cur with
+          | None -> assert false
+          | Some next_cell ->
+            let other_instruction = DLL.value next_cell in
+            if not (Dependencies.independent deps instruction other_instruction)
+            then raise (Cannot_reorder (instruction, other_instruction))
+            else if same_position other_instruction
+            then true
+            else can_cross next_cell
+        in
+        match can_cross cell with
+        | false -> None
+        | true ->
+          (* Insert the scalar [instruction] after [position] and return the new
+             cell. This duplicates the scalar instruction, including the key. *)
+          DLL.insert_after position instruction;
+          (* delete old scalar instructions *)
+          DLL.delete_curr cell;
+          Some position)
+  in
+  let reorder_group position : Instruction.t DLL.cell option =
+    let key = DLL.value position in
+    let group = Computation.find_group tree ~key in
+    match group with
+    | None -> DLL.prev position
+    | Some (group : Computation.Group.t) ->
+      let instructions = Computation.Group.scalar_instructions group in
+      (* traverse [instructions] backwards, moving each to [position], and
+         returns the cell before the group. *)
+      List.fold_right (reorder_instruction body) instructions (Some position)
+  in
+  let rec reorder cell_option =
+    match cell_option with
+    | None -> ()
+    | Some cell -> reorder_group cell |> reorder
+  in
+  (* traverse the block backwards *)
+  DLL.last_cell body |> reorder
+
+let validate tree block deps =
+  (* CR-soon gyorsh: if placement changes, the check will be too strict. *)
+  let state = Block.state block in
+  let body = Block.body block in
+  (* copy body *)
+  let body = DLL.map ~f:Instruction.basic body in
+  (try can_reorder tree body deps
+   with Cannot_reorder (i1, i2) ->
+     let pp ppf body =
+       DLL.iter body ~f:(fun i -> Format.fprintf ppf "%a\n" Instruction.print i)
+     in
+     Misc.fatal_errorf "Cannot reorder %a %a in Block:\n%a\n"
+       Instruction.print_id i1 Instruction.print_id i2 pp body);
+  State.dump_debug state "Validated. Reordered block:\n";
+  DLL.iter body ~f:(fun i -> State.dump_debug state "%a\n" Instruction.print i)
+
+let count block computation =
+  let counter =
+    Profile.Counters.create ()
+    |> Profile.Counters.set "tried_to_vectorize_blocks" 1
+    |> Profile.Counters.set "block_size" (Block.size block)
+  in
+  if Block.size block > !Flambda_backend_flags.vectorize_max_block_size
+  then counter |> Profile.Counters.set "block_too_big" 1
+  else
+    match computation with
+    | None -> counter
+    | Some c ->
+      counter
+      |> Profile.Counters.set "vectorized_block" 1
+      |> Profile.Counters.set "cost" (Computation.cost c)
+      |> Profile.Counters.set "num_groups" (Computation.num_groups c)
+
+let maybe_vectorize block =
+  let state = Block.state block in
+  let instruction_count = Block.size block in
+  let label = Block.start block in
+  State.dump state "\nBlock %a:\n" Label.print label;
+  if instruction_count > !Flambda_backend_flags.vectorize_max_block_size
+  then (
+    State.dump state
+      "Skipping block %a with %d instructions (> %d = \
+       max_block_size_to_vectorize).\n"
+      Label.print label instruction_count
+      !Flambda_backend_flags.vectorize_max_block_size;
+    None)
+  else
+    let deps = Dependencies.from_block block in
+    let seeds = Computation.Seed.from_block block deps in
+    State.dump_debug state "%a@." Computation.Seed.dump seeds;
+    let computations =
+      List.filter_map (Computation.from_seed block deps) seeds
+    in
+    State.dump_debug state "%a@." (Computation.dump_all ~block) computations;
+    match Computation.select_and_join computations block deps with
+    | None -> None
+    | Some computation ->
+      let scoped_name =
+        State.fun_dbg state |> Debuginfo.get_dbg |> Debuginfo.Dbg.to_list
+        |> List.map (fun dbg ->
+               Debuginfo.(
+                 Scoped_location.string_of_scopes ~include_zero_alloc:false
+                   dbg.dinfo_scopes))
+        |> String.concat ","
+      in
+      State.dump state "**** Vectorize selected computation: %a (%s)\n"
+        Computation.dump_one_line_stat computation scoped_name;
+      State.dump_debug state "%a\n" (Computation.dump ~block) computation;
+      if State.extra_debug then validate computation block deps;
+      let dump_block msg block =
+        let size = DLL.length (Block.body block) in
+        State.dump state "Block %a in %s: %s body instruction count=%d\n"
+          Label.print (Block.start block) (State.fun_name state) msg size;
+        DLL.iter (Block.body block) ~f:(fun i ->
+            State.dump_debug state "%a\n" Instruction.print
+              (Instruction.basic i))
+      in
+      dump_block "before vectorize" block;
+      (* This is the only function that changes the [block]. *)
+      vectorize block computation;
+      dump_block "after vectorize" block;
+      Some computation
 
 let cfg ppf_dump cl =
-  if !Flambda_backend_flags.dump_vectorize
-  then Format.fprintf ppf_dump "*** Vectorization@.";
+  let state = State.create ppf_dump cl in
+  State.dump state "*** Vectorize@.";
   let cfg = Cfg_with_layout.cfg cl in
+  (* Iterate in layout order instead of default block order to make debugging
+     easier. *)
   let layout = Cfg_with_layout.layout cl in
   DLL.iter layout ~f:(fun label ->
-      let block = Cfg.get_block_exn cfg label in
-      let instruction_count = DLL.length block.body in
-      Format.fprintf ppf_dump "\nBlock %a (%d basic instructions):\n"
-        Label.format label instruction_count;
-      if instruction_count > 1000
-      then
-        Format.fprintf ppf_dump
-          "more than 1000 instructions in basic block, cannot vectorize\n"
-      else
-        let dependency_graph = Dependency_graph.from_block block in
-        if !Flambda_backend_flags.dump_vectorize
-        then Dependency_graph.dump ppf_dump dependency_graph block;
-        let memory_accesses = Memory_accesses.from_block block in
-        if !Flambda_backend_flags.dump_vectorize
-        then Memory_accesses.dump ppf_dump memory_accesses;
-        let seeds = Seed.from_block block in
-        if !Flambda_backend_flags.dump_vectorize then Seed.dump ppf_dump seeds);
-  if !Flambda_backend_flags.dump_vectorize then dump ppf_dump ~msg:"" cl;
+      let block = Block.create (Cfg.get_block_exn cfg label) state in
+      (Profile.record_with_counters ~counter_f:(count block) ~accumulate:true
+         "vectorize_block" maybe_vectorize block
+        : Computation.t option)
+      |> ignore);
   cl
diff --git a/backend/cmm.ml b/backend/cmm.ml
index 78e5dbd8930..f055944d92f 100644
--- a/backend/cmm.ml
+++ b/backend/cmm.ml
@@ -388,14 +388,6 @@ type phrase =
     Cfunction of fundecl
   | Cdata of data_item list
 
-let width_in_bytes (memory_chunk : memory_chunk) : int =
-  match memory_chunk with
-  | Byte_unsigned | Byte_signed -> 1
-  | Sixteen_unsigned | Sixteen_signed -> 2
-  | Thirtytwo_unsigned | Thirtytwo_signed | Single _ -> 4
-  | Word_int | Word_val | Double -> 8
-  | Onetwentyeight_unaligned | Onetwentyeight_aligned -> 16
-
 let ccatch (i, ids, e1, e2, dbg, kind, is_cold) =
   Ccatch(Nonrecursive, [i, ids, e2, dbg, is_cold], e1, kind)
 
@@ -746,3 +738,8 @@ let equal_integer_comparison left right =
     false
 
 let caml_flambda2_invalid = "caml_flambda2_invalid"
+
+let is_val (m: machtype_component) =
+  match m with
+  | Val -> true
+  | Addr | Int | Float | Vec128 | Float32 -> false
diff --git a/backend/cmm.mli b/backend/cmm.mli
index aec2331ceae..cfb84c1ba65 100644
--- a/backend/cmm.mli
+++ b/backend/cmm.mli
@@ -391,8 +391,6 @@ type phrase =
     Cfunction of fundecl
   | Cdata of data_item list
 
-val width_in_bytes : memory_chunk -> int
-
 val ccatch :
      Lambda.static_label * (Backend_var.With_provenance.t * machtype) list
        * expression * expression * Debuginfo.t * kind_for_unboxing
@@ -436,3 +434,4 @@ val equal_memory_chunk : memory_chunk -> memory_chunk -> bool
 val equal_integer_comparison : integer_comparison -> integer_comparison -> bool
 
 val caml_flambda2_invalid : string
+val is_val : machtype_component -> bool
diff --git a/backend/dune b/backend/dune
index 138cb1f09e9..a066b4b4b72 100644
--- a/backend/dune
+++ b/backend/dune
@@ -15,7 +15,9 @@
 (rule
  (targets arch.ml arch.mli cfg_selection.ml CSE.ml proc.ml regalloc_stack_operands.ml reload.ml
           selection.ml selection_utils.ml simd.ml simd_selection.ml simd_reload.ml simd_proc.ml
-          stack_check.ml)
+          stack_check.ml
+          vectorize_specific.ml
+          )
  (mode    fallback)
  (deps    (glob_files amd64/*.ml)
           (glob_files amd64/*.mli)
diff --git a/backend/operation.mli b/backend/operation.mli
index 1afbedd9e37..fe086e7d225 100644
--- a/backend/operation.mli
+++ b/backend/operation.mli
@@ -30,7 +30,6 @@
 
 (* CR-soon xclerc for xclerc: consider whether `Simple_operation` and
    `Operation` should be merged into a single module. *)
-
 type t =
   | Move
   | Spill
diff --git a/backend/vectorize_utils.ml b/backend/vectorize_utils.ml
new file mode 100644
index 00000000000..f119306bbe8
--- /dev/null
+++ b/backend/vectorize_utils.ml
@@ -0,0 +1,97 @@
+open Arch
+
+module Width_in_bits = struct
+  type t =
+    | W8
+    | W16
+    | W32
+    | W64
+    | W128
+
+  let of_memory_chunk (c : Cmm.memory_chunk) =
+    match c with
+    | Byte_unsigned | Byte_signed -> W8
+    | Sixteen_unsigned | Sixteen_signed -> W16
+    | Thirtytwo_unsigned | Thirtytwo_signed | Single _ -> W32
+    | Word_int | Word_val | Double -> W64
+    | Onetwentyeight_unaligned | Onetwentyeight_aligned -> W128
+
+  let of_atomic_bitwidth (b : Cmm.atomic_bitwidth) =
+    match b with Thirtytwo -> W32 | Sixtyfour -> W64 | Word -> W64
+
+  let to_int t =
+    match t with W128 -> 128 | W64 -> 64 | W32 -> 32 | W16 -> 16 | W8 -> 8
+
+  let equal t1 t2 =
+    match t1, t2 with
+    | W128, W128 -> true
+    | W64, W64 -> true
+    | W32, W32 -> true
+    | W16, W16 -> true
+    | W8, W8 -> true
+    | (W128 | W64 | W32 | W16 | W8), _ -> false
+
+  let print ppf t = Format.fprintf ppf "%d" (to_int t)
+end
+
+module Memory_access = struct
+  module Init_or_assign = struct
+    type t =
+      | Initialization
+      | Assignment
+  end
+
+  type desc =
+    | Alloc
+    | Arbitrary
+    | Read of
+        { width_in_bits : Width_in_bits.t;
+          addressing_mode : addressing_mode;
+          is_mutable : bool;
+          is_atomic : bool
+        }
+    | Write of
+        { width_in_bits : Width_in_bits.t;
+          addressing_mode : addressing_mode;
+          init_or_assign : Init_or_assign.t
+        }
+    | Read_and_write of
+        { width_in_bits : Width_in_bits.t;
+          addressing_mode : addressing_mode;
+          is_atomic : bool
+        }
+
+  type t =
+    { desc : desc;
+      first_memory_arg_index : int
+    }
+
+  let create ?(first_memory_arg_index = 0) desc =
+    { desc; first_memory_arg_index }
+
+  let desc t = t.desc
+
+  let first_memory_arg_index t = t.first_memory_arg_index
+end
+
+module Vectorized_instruction = struct
+  type register =
+    | New of int
+    | Argument of int
+    | Result of int
+    | Original of int
+
+  type t =
+    { operation : Operation.t;
+      arguments : register array;
+      results : register array
+    }
+
+  let print ppf t = Format.fprintf ppf "%a " Cfg.dump_basic (Cfg.Op t.operation)
+
+  let make_default ~arg_count ~res_count operation : t =
+    { operation;
+      arguments = Array.init arg_count (fun i -> Argument i);
+      results = Array.init res_count (fun i -> Result i)
+    }
+end
diff --git a/backend/vectorize_utils.mli b/backend/vectorize_utils.mli
new file mode 100644
index 00000000000..43e6961f35a
--- /dev/null
+++ b/backend/vectorize_utils.mli
@@ -0,0 +1,83 @@
+open Arch
+
+module Width_in_bits : sig
+  type t =
+    | W8
+    | W16
+    | W32
+    | W64
+    | W128
+
+  val of_memory_chunk : Cmm.memory_chunk -> t
+
+  val of_atomic_bitwidth : Cmm.atomic_bitwidth -> t
+
+  val to_int : t -> int
+
+  val print : Format.formatter -> t -> unit
+
+  val equal : t -> t -> bool
+end
+
+module Memory_access : sig
+  module Init_or_assign : sig
+    type t =
+      | Initialization
+      | Assignment
+  end
+
+  type desc =
+    | Alloc
+    | Arbitrary
+    | Read of
+        { width_in_bits : Width_in_bits.t;
+          addressing_mode : addressing_mode;
+          is_mutable : bool;
+          is_atomic : bool
+        }
+    | Write of
+        { width_in_bits : Width_in_bits.t;
+          addressing_mode : addressing_mode;
+          init_or_assign : Init_or_assign.t
+        }
+    | Read_and_write of
+        { width_in_bits : Width_in_bits.t;
+          addressing_mode : addressing_mode;
+          is_atomic : bool
+        }
+
+  type t
+
+  val create : ?first_memory_arg_index:int -> desc -> t
+
+  val desc : t -> desc
+
+  val first_memory_arg_index : t -> int
+end
+
+module Vectorized_instruction : sig
+  (** Registers used in vectorized instructions of one scalar instruction
+     group. *)
+  type register =
+    | New of int
+        (** The n-th new temporary register used in the vectorized instructions *)
+    | Argument of int
+        (** Vector version of the n-th argument's register of the scalar
+       instruction *)
+    | Result of int
+        (** Vector version of the n-th result's register of the scalar instruction *)
+    | Original of int
+        (** Keep the original instruction in the n-th argument/result (depending on whether
+          it is used in the argument or result of the vectorized instructions) of the
+          scalar instruction*)
+
+  type t =
+    { operation : Operation.t;
+      arguments : register array;
+      results : register array
+    }
+
+  val print : Format.formatter -> t -> unit
+
+  val make_default : arg_count:int -> res_count:int -> Operation.t -> t
+end
diff --git a/driver/flambda_backend_args.ml b/driver/flambda_backend_args.ml
index ef92c551d2e..44ade683f79 100644
--- a/driver/flambda_backend_args.ml
+++ b/driver/flambda_backend_args.ml
@@ -60,6 +60,11 @@ let mk_vectorize f =
 let mk_no_vectorize f =
   "-no-vectorize", Arg.Unit f, " Disable vectorizer (EXPERIMENTAL)"
 
+let mk_vectorize_max_block_size f =
+  "-vectorize-max-block-size", Arg.Int f,
+  Printf.sprintf "<n>  Only CFG block with at most n IR instructions will be vectorized \
+                  (default %d)" Flambda_backend_flags.default_vectorize_max_block_size
+
 let mk_dvectorize f =
   "-dvectorize", Arg.Unit f, " (undocumented)"
 ;;
@@ -710,6 +715,7 @@ module type Flambda_backend_options = sig
 
   val vectorize : unit -> unit
   val no_vectorize : unit -> unit
+  val vectorize_max_block_size : int -> unit
   val dvectorize : unit -> unit
 
   val cfg_selection : unit -> unit
@@ -847,6 +853,7 @@ struct
 
     mk_vectorize F.vectorize;
     mk_no_vectorize F.no_vectorize;
+    mk_vectorize_max_block_size F.vectorize_max_block_size;
     mk_dvectorize F.dvectorize;
 
     mk_cfg_selection F.cfg_selection;
@@ -1015,6 +1022,8 @@ module Flambda_backend_options_impl = struct
 
   let vectorize = set' Flambda_backend_flags.vectorize
   let no_vectorize = clear' Flambda_backend_flags.vectorize
+  let vectorize_max_block_size n =
+    Flambda_backend_flags.vectorize_max_block_size := n
   let dvectorize = set' Flambda_backend_flags.dump_vectorize
 
   let cfg_selection = set' Flambda_backend_flags.cfg_selection
@@ -1356,6 +1365,7 @@ module Extra_params = struct
     | "regalloc-param" -> add_string Flambda_backend_flags.regalloc_params
     | "regalloc-validate" -> set' Flambda_backend_flags.regalloc_validate
     | "vectorize" -> set' Flambda_backend_flags.vectorize
+    | "vectorize-max-block-size" -> set_int' Flambda_backend_flags.vectorize_max_block_size
     | "cfg-selection" -> set' Flambda_backend_flags.cfg_selection
     | "cfg-peephole-optimize" -> set' Flambda_backend_flags.cfg_peephole_optimize
     | "cfg-cse-optimize" -> set' Flambda_backend_flags.cfg_cse_optimize
diff --git a/driver/flambda_backend_args.mli b/driver/flambda_backend_args.mli
index 0cf27be21ed..6454287a392 100644
--- a/driver/flambda_backend_args.mli
+++ b/driver/flambda_backend_args.mli
@@ -36,6 +36,7 @@ module type Flambda_backend_options = sig
 
   val vectorize : unit -> unit
   val no_vectorize : unit -> unit
+  val vectorize_max_block_size : int -> unit
   val dvectorize : unit -> unit
 
   val cfg_selection : unit -> unit
diff --git a/driver/flambda_backend_flags.ml b/driver/flambda_backend_flags.ml
index 91ad2b4cdf9..5c371cc9d4b 100644
--- a/driver/flambda_backend_flags.ml
+++ b/driver/flambda_backend_flags.ml
@@ -24,6 +24,10 @@ let regalloc_validate = ref true        (* -[no-]regalloc-validate *)
 let vectorize = ref false                (* -[no-]vectorize *)
 let dump_vectorize = ref false          (* -dvectorize *)
 
+let default_vectorize_max_block_size = 100
+let vectorize_max_block_size =
+  ref default_vectorize_max_block_size (* -vectorize-max-block-size *)
+
 let cfg_selection = ref false           (* -[no-]cfg-selection *)
 
 let cfg_peephole_optimize = ref true    (* -[no-]cfg-peephole-optimize *)
diff --git a/driver/flambda_backend_flags.mli b/driver/flambda_backend_flags.mli
index 486751cf7b3..a2b082dd77b 100644
--- a/driver/flambda_backend_flags.mli
+++ b/driver/flambda_backend_flags.mli
@@ -24,6 +24,8 @@ val regalloc_validate : bool ref
 
 val vectorize : bool ref
 val dump_vectorize : bool ref
+val default_vectorize_max_block_size : int
+val vectorize_max_block_size : int ref
 
 val cfg_selection : bool ref
 
diff --git a/dune b/dune
index 11fd6826617..b72e46d606b 100644
--- a/dune
+++ b/dune
@@ -527,6 +527,8 @@
   linear_utils
   simplify_terminator
   vectorize
+  vectorize_utils
+  vectorize_specific
   ;; backend/regalloc
   regalloc_gi
   regalloc_gi_state
diff --git a/flambda-backend/tests/backend/vectorizer/.ocamlformat b/flambda-backend/tests/backend/vectorizer/.ocamlformat
new file mode 100644
index 00000000000..0262494e37d
--- /dev/null
+++ b/flambda-backend/tests/backend/vectorizer/.ocamlformat
@@ -0,0 +1,18 @@
+# Please make a pull request to change this file.
+disable=false
+# There is an .ocamlformat-enable file in this directory.
+# Keep the remainder of this file in sync with other .ocamlformat files in this repo.
+assignment-operator=begin-line
+cases-exp-indent=2
+doc-comments=before
+dock-collection-brackets=false
+exp-grouping=preserve
+if-then-else=keyword-first
+module-item-spacing=sparse
+parens-tuple=multi-line-only
+sequence-blank-line=compact
+space-around-lists=false
+space-around-variants=false
+type-decl=sparse
+wrap-comments=true
+version=0.24.1
diff --git a/flambda-backend/tests/backend/vectorizer/dune b/flambda-backend/tests/backend/vectorizer/dune
index c43d921d279..b20daea1c66 100644
--- a/flambda-backend/tests/backend/vectorizer/dune
+++ b/flambda-backend/tests/backend/vectorizer/dune
@@ -1,303 +1,9 @@
-; Test1
+(include dune.inc)
 
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets test1_runner.exe)
- (deps test1.ml)
- (action
-  (run
-   %{bin:ocamlopt.opt}
-   %{deps}
-   -o
-   test1_runner.exe
-   -S
-   -O3
-   -regalloc
-   cfg
-   -regalloc-param
-   IRC_SPILLING_HEURISTICS:flat-uses
-   -regalloc-param
-   SPLIT_LIVE_RANGES:on
-   -regalloc-param
-   STACK_SLOTS_THRESHOLD:3072
-   -no-vectorize)))
-
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets test1_runner_simd.exe)
- (deps test1_simd.ml stubs.c)
- (action
-  (run
-   %{bin:ocamlopt.opt}
-   %{deps}
-   -o
-   test1_runner_simd.exe
-   -S
-   -O3
-   -regalloc
-   cfg
-   -regalloc-param
-   IRC_SPILLING_HEURISTICS:flat-uses
-   -regalloc-param
-   SPLIT_LIVE_RANGES:on
-   -regalloc-param
-   STACK_SLOTS_THRESHOLD:3072
-   -dump-into-file
-   -dcfg
-   -no-vectorize
-   -extension
-   simd)))
-
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets test1_vectorized.ml)
- (deps test1.ml)
- (action
-  (run cp test1.ml test1_vectorized.ml)))
-
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets test1_runner_vectorized.exe test1_vectorized.cmx.dump)
- (deps test1_vectorized.ml)
- (action
-  (run
-   %{bin:ocamlopt.opt}
-   test1_vectorized.ml
-   -o
-   test1_runner_vectorized.exe
-   -S
-   -O3
-   -regalloc
-   cfg
-   -regalloc-param
-   IRC_SPILLING_HEURISTICS:flat-uses
-   -regalloc-param
-   SPLIT_LIVE_RANGES:on
-   -regalloc-param
-   STACK_SLOTS_THRESHOLD:3072
-   -dump-into-file
-   -dcfg
-   -dvectorize
-   -vectorize)))
-
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (deps filter.sh test1_vectorized.cmx.dump)
- (action
-  (with-outputs-to
-   grep_test1_vectorizer_cmx.out
-   (run ./filter.sh test1_vectorized.cmx.dump))))
-
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (action
-  (with-outputs-to
-   test1.out
-   (run ./test1_runner.exe))))
-
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (action
-  (with-outputs-to
-   test1_simd.out
-   (run ./test1_runner_simd.exe))))
-
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (action
-  (with-outputs-to
-   test1_vectorized.out
-   (run ./test1_runner_vectorized.exe))))
-
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (= %{architecture} "amd64")
-   (<> %{system} macosx)))
- (action
-  (progn
-   (diff grep_test1_vectorizer_cmx.out grep_vectorizer_cmx.expected)
-   (diff test1.out test1.expected)
-   (diff test1_simd.out test1.expected)
-   (diff test1_vectorized.out test1.expected))))
-
-; Examples
-
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets examples_runner.exe)
- (deps examples.ml)
- (action
-  (run
-   %{bin:ocamlopt.opt}
-   %{deps}
-   -o
-   examples_runner.exe
-   -S
-   -O3
-   -regalloc
-   cfg
-   -regalloc-param
-   IRC_SPILLING_HEURISTICS:flat-uses
-   -regalloc-param
-   SPLIT_LIVE_RANGES:on
-   -regalloc-param
-   STACK_SLOTS_THRESHOLD:3072
-   -no-vectorize)))
-
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets examples_runner_unrolled.exe)
- (deps examples_unrolled.ml stubs.c)
- (action
-  (run
-   %{bin:ocamlopt.opt}
-   %{deps}
-   -o
-   examples_runner_unrolled.exe
-   -S
-   -O3
-   -regalloc
-   cfg
-   -regalloc-param
-   IRC_SPILLING_HEURISTICS:flat-uses
-   -regalloc-param
-   SPLIT_LIVE_RANGES:on
-   -regalloc-param
-   STACK_SLOTS_THRESHOLD:3072
-   -dump-into-file
-   -dcfg
-   -no-vectorize)))
-
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets examples_vectorized.ml)
- (deps examples.ml)
- (action
-  (run cp examples.ml examples_vectorized.ml)))
+(rule (with-stdout-to dune.inc.gen (run ./gen/gen_dune.exe)))
 
 (rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (targets examples_runner_vectorized.exe examples_vectorized.cmx.dump)
- (deps examples_vectorized.ml)
- (action
-  (run
-   %{bin:ocamlopt.opt}
-   examples_vectorized.ml
-   -o
-   examples_runner_vectorized.exe
-   -S
-   -O3
-   -regalloc
-   cfg
-   -regalloc-param
-   IRC_SPILLING_HEURISTICS:flat-uses
-   -regalloc-param
-   SPLIT_LIVE_RANGES:on
-   -regalloc-param
-   STACK_SLOTS_THRESHOLD:3072
-   -dump-into-file
-   -dcfg
-   -dvectorize
-   -vectorize)))
+ (alias  runtest)
+ (action (diff dune.inc dune.inc.gen)))
 
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (deps filter.sh examples_vectorized.cmx.dump)
- (action
-  (with-outputs-to
-   grep_examples_vectorizer_cmx.out
-   (run ./filter.sh examples_vectorized.cmx.dump))))
-
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (action
-  (with-outputs-to
-   examples.out
-   (run ./examples_runner.exe))))
-
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (action
-  (with-outputs-to
-   examples_unrolled.out
-   (run ./examples_runner_unrolled.exe))))
-
-(rule
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (<> %{system} macosx)))
- (action
-  (with-outputs-to
-   examples_vectorized.out
-   (run ./examples_runner_vectorized.exe))))
 
-(rule
- (alias runtest)
- (enabled_if
-  (and
-   (= %{context_name} "main")
-   (= %{architecture} "amd64")
-   (<> %{system} macosx)))
- (action
-  (progn
-   (diff grep_examples_vectorizer_cmx.out grep_vectorizer_cmx.expected)
-   (diff examples.out examples.expected)
-   (diff examples_unrolled.out examples.expected)
-   (diff examples_vectorized.out examples.expected))))
diff --git a/flambda-backend/tests/backend/vectorizer/dune.inc b/flambda-backend/tests/backend/vectorizer/dune.inc
new file mode 100644
index 00000000000..64efbe74dc1
--- /dev/null
+++ b/flambda-backend/tests/backend/vectorizer/dune.inc
@@ -0,0 +1,75 @@
+
+(rule
+ (alias   runtest)
+ (enabled_if (= %{context_name} "main"))
+ (targets test1_runner.exe test1.cmx.dump)
+ (deps test1.mli test1.ml)
+ (action (run %{bin:ocamlopt.opt} %{deps} -S -O3 -g -dump-into-file -dcfg -dvectorize -dsel -dlinear -dlive -regalloc cfg -extension simd -no-vectorize -o test1_runner.exe)))
+
+(rule
+ (alias   runtest)
+ (enabled_if (= %{context_name} "main"))
+ (action
+  (with-outputs-to
+   test1.output
+   (run ./test1_runner.exe))))
+
+(rule
+ (alias runtest)
+ (enabled_if (= %{context_name} "main"))
+ (action
+   (diff test1.expected test1.output)))
+
+(rule
+ (alias runtest)
+ (enabled_if (= %{context_name} "main"))
+ (action
+  (copy test1.ml test1_vectorized.ml)))
+
+(rule
+ (alias runtest)
+ (enabled_if (= %{context_name} "main"))
+ (action
+  (copy test1.mli test1_vectorized.mli)))
+
+(rule
+ (alias   runtest)
+ (enabled_if (= %{context_name} "main"))
+ (targets test1_vectorized_runner.exe test1_vectorized.cmx.dump)
+ (deps test1_vectorized.mli test1_vectorized.ml)
+ (action (run %{bin:ocamlopt.opt} %{deps} -S -O3 -g -dump-into-file -dcfg -dvectorize -dsel -dlinear -dlive -regalloc cfg -extension simd -vectorize -o test1_vectorized_runner.exe)))
+
+(rule
+ (enabled_if (and (= %{context_name} "main") (= %{architecture} "amd64")) )
+ (target test1_vectorized.cmx.dump.output)
+ (deps ./filter.sh test1_vectorized.cmx.dump)
+ (action
+  (with-outputs-to
+   %{target}
+   (run %{deps}))))
+
+(rule
+ (alias runtest)
+ (enabled_if (and (= %{context_name} "main") (= %{architecture} "amd64")) )
+ (action
+   (diff test1_vectorized.cmx.dump.expected test1_vectorized.cmx.dump.output)))
+
+(rule
+ (alias   runtest)
+ (enabled_if (= %{context_name} "main"))
+ (action
+  (with-outputs-to
+   test1_vectorized.output
+   (run ./test1_vectorized_runner.exe))))
+
+(rule
+ (alias runtest)
+ (enabled_if (= %{context_name} "main"))
+ (action
+  (copy test1.expected test1_vectorized.expected)))
+
+(rule
+ (alias runtest)
+ (enabled_if (= %{context_name} "main"))
+ (action
+   (diff test1_vectorized.expected test1_vectorized.output)))
diff --git a/flambda-backend/tests/backend/vectorizer/examples.ml b/flambda-backend/tests/backend/vectorizer/examples.ml
deleted file mode 100644
index 213b7fa12d2..00000000000
--- a/flambda-backend/tests/backend/vectorizer/examples.ml
+++ /dev/null
@@ -1,9 +0,0 @@
-let big_n = 1024
-let a = Array.make (big_n * 2) 0
-let b = Array.make (big_n * 2) 0
-let c = Array.make (big_n * 2) 0
-
-let example1 () =
-  for i = 0 to 255 do
-    Array.set a i (Array.get b i + Array.get c i)
-  done
diff --git a/flambda-backend/tests/backend/vectorizer/examples_unrolled.ml b/flambda-backend/tests/backend/vectorizer/examples_unrolled.ml
deleted file mode 100644
index e2c6d3a3aa8..00000000000
--- a/flambda-backend/tests/backend/vectorizer/examples_unrolled.ml
+++ /dev/null
@@ -1,10 +0,0 @@
-let big_n = 1024
-let a = Array.make (big_n * 2) 0
-let b = Array.make (big_n * 2) 0
-let c = Array.make (big_n * 2) 0
-
-let example1 () =
-  for i = 0 to 127 do
-    Array.set a (i * 2) (Array.get b (i * 2) + Array.get c (i * 2));
-    Array.set a (i * 2 + 1) (Array.get b (i * 2 + 1) + Array.get c (i * 2 + 1))
-  done
diff --git a/flambda-backend/tests/backend/vectorizer/filter.sh b/flambda-backend/tests/backend/vectorizer/filter.sh
index 5b400eb2b9c..8ca64e193c1 100755
--- a/flambda-backend/tests/backend/vectorizer/filter.sh
+++ b/flambda-backend/tests/backend/vectorizer/filter.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-grep "*** Vectorization" $1 --line-regexp --max-count=1
+grep "\*\*\*\* Vectorize selected computation:" $1
diff --git a/flambda-backend/tests/backend/vectorizer/gen/dune b/flambda-backend/tests/backend/vectorizer/gen/dune
new file mode 100644
index 00000000000..aaa783a5e0c
--- /dev/null
+++ b/flambda-backend/tests/backend/vectorizer/gen/dune
@@ -0,0 +1,2 @@
+(executable
+ (name gen_dune))
diff --git a/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml b/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml
new file mode 100644
index 00000000000..54784707a27
--- /dev/null
+++ b/flambda-backend/tests/backend/vectorizer/gen/gen_dune.ml
@@ -0,0 +1,168 @@
+let enabled_if_main = {|(enabled_if (= %{context_name} "main"))|}
+
+let enabled_if_main_amd64 =
+  {|(enabled_if (and (= %{context_name} "main") (= %{architecture} "amd64")) )|}
+
+let flags =
+  "-S -O3 -g -dump-into-file -dcfg -dvectorize -dsel -dlinear -dlive -regalloc \
+   cfg -extension simd"
+
+let runner name = name ^ "_runner.exe"
+
+let output name = name ^ ".output"
+
+let expected name = name ^ ".expected"
+
+let impl name = name ^ ".ml"
+
+let intf name = name ^ ".mli"
+
+let cmx_dump name = name ^ ".cmx.dump"
+
+let vectorized name = name ^ "_vectorized"
+
+let buf = Buffer.create 1000
+
+let rule ~subst template =
+  Buffer.add_substitute buf subst template;
+  Buffer.output_buffer Out_channel.stdout buf;
+  Buffer.clear buf
+
+let compile ~enabled_if ~extra_flags name =
+  let subst = function
+    | "enabled_if" -> enabled_if
+    | "flags" -> flags
+    | "extra_flags" -> extra_flags
+    | "runner" -> runner name
+    | "deps" -> String.concat " " [intf name; impl name]
+    | "cmx_dump" -> cmx_dump name
+    | _ -> assert false
+  in
+  rule ~subst
+    {|
+(rule
+ (alias   runtest)
+ ${enabled_if}
+ (targets ${runner} ${cmx_dump})
+ (deps ${deps})
+ (action (run %{bin:ocamlopt.opt} %{deps} ${flags} ${extra_flags} -o ${runner})))
+|}
+
+let run ~enabled_if name =
+  let subst = function
+    | "enabled_if" -> enabled_if
+    | "runner" -> runner name
+    | "output" -> output name
+    | _ -> assert false
+  in
+  rule ~subst
+    {|
+(rule
+ (alias   runtest)
+ ${enabled_if}
+ (action
+  (with-outputs-to
+   ${output}
+   (run ./${runner}))))
+|}
+
+let diff_output ~enabled_if name =
+  let subst = function
+    | "enabled_if" -> enabled_if
+    | "output" -> output name
+    | "expected" -> expected name
+    | _ -> assert false
+  in
+  rule ~subst
+    {|
+(rule
+ (alias runtest)
+ ${enabled_if}
+ (action
+   (diff ${expected} ${output})))
+|}
+
+let copy_file ~enabled_if name new_name =
+  let subst = function
+    | "enabled_if" -> enabled_if
+    | "source" -> name
+    | "target" -> new_name
+    | _ -> assert false
+  in
+  rule ~subst
+    {|
+(rule
+ (alias runtest)
+ ${enabled_if}
+ (action
+  (copy ${source} ${target})))
+|}
+
+let filter_dump ~enabled_if name =
+  let subst = function
+    | "enabled_if" -> enabled_if
+    | "dump" -> name |> cmx_dump
+    | "filtered" -> name |> cmx_dump |> output
+    | _ -> assert false
+  in
+  rule ~subst
+    {|
+(rule
+ ${enabled_if}
+ (target ${filtered})
+ (deps ./filter.sh ${dump})
+ (action
+  (with-outputs-to
+   %{target}
+   (run %{deps}))))
+|}
+
+let copy_source_to_vectorize name =
+  copy_file ~enabled_if:enabled_if_main (name |> impl)
+    (name |> vectorized |> impl);
+  copy_file ~enabled_if:enabled_if_main (name |> intf)
+    (name |> vectorized |> intf)
+
+let compile_no_vectorizer name =
+  compile ~enabled_if:enabled_if_main ~extra_flags:"-no-vectorize" name
+
+let compile_with_vectorizer name =
+  compile ~enabled_if:enabled_if_main ~extra_flags:"-vectorize"
+    (vectorized name)
+
+let filter_vectorizer_dump ~enabled_if name =
+  filter_dump ~enabled_if (name |> vectorized)
+
+let diff_vectorizer_dump ~enabled_if name =
+  diff_output ~enabled_if (name |> vectorized |> cmx_dump)
+
+let run_no_vectorizer name = run ~enabled_if:enabled_if_main name
+
+let run_vectorized name = run ~enabled_if:enabled_if_main (name |> vectorized)
+
+let diff_output_no_vectorizer name =
+  diff_output ~enabled_if:enabled_if_main name
+
+let diff_output_vectorized name =
+  diff_output ~enabled_if:enabled_if_main (name |> vectorized)
+
+let copy_expected_output name =
+  copy_file ~enabled_if:enabled_if_main (name |> expected)
+    (name |> vectorized |> expected)
+
+let print_test name =
+  (* check expected test output is up to date *)
+  compile_no_vectorizer name;
+  run_no_vectorizer name;
+  diff_output_no_vectorizer name;
+  (* vectorizer *)
+  copy_source_to_vectorize name;
+  compile_with_vectorizer name;
+  filter_vectorizer_dump name ~enabled_if:enabled_if_main_amd64;
+  diff_vectorizer_dump name ~enabled_if:enabled_if_main_amd64;
+  run_vectorized name;
+  copy_expected_output name;
+  diff_output_vectorized name;
+  ()
+
+let () = print_test "test1"
diff --git a/flambda-backend/tests/backend/vectorizer/grep_vectorizer_cmx.expected b/flambda-backend/tests/backend/vectorizer/grep_vectorizer_cmx.expected
deleted file mode 100644
index aaa82db1e7e..00000000000
--- a/flambda-backend/tests/backend/vectorizer/grep_vectorizer_cmx.expected
+++ /dev/null
@@ -1 +0,0 @@
-*** Vectorization
diff --git a/flambda-backend/tests/backend/vectorizer/test1.expected b/flambda-backend/tests/backend/vectorizer/test1.expected
index 0efd67f9502..4b1e5a753fe 100644
--- a/flambda-backend/tests/backend/vectorizer/test1.expected
+++ b/flambda-backend/tests/backend/vectorizer/test1.expected
@@ -1 +1,11 @@
-2 4
+add_pairs_immutable_record { d0 = 88 ; d1 = 110 }
+add_fours_immutable_record { d0 = 34 ; d1 = 97; d2 = 88 ; d3 = 146 }
+add_int_tuples (52, 102)
+add_t2_to_t4 { d0 = 88 ; d1 = 110; d2 = 88 ; d3 = 110 }
+add_t2_to_t4_reordered { d0 = 88 ; d1 = 110; d2 = 110 ; d3 = 88 }
+copy_t2_to_t4_immutable_record { d0 = 8 ; d1 = 96; d2 = 8 ; d3 = 96 }
+same_value_in_both_fields_immutable_record { d0 = 104 ; d1 = 104 }
+copy_pairs_mutable_record { f0 = 42 ; f1 = 27 }
+copy_pairs_mutable_record_return { f0 = 42 ; f1 = 27 }
+copy_fours_mutable_record { f0 = 1 ; f1 = 2; f2 = 3 ; f3 = -4 }
+add_fours_mutable_record { f0 = 43 ; f1 = 29; f2 = 426 ; f3 = 235 }
diff --git a/flambda-backend/tests/backend/vectorizer/test1.ml b/flambda-backend/tests/backend/vectorizer/test1.ml
index e6d41a69f52..0f7a77fdd1e 100644
--- a/flambda-backend/tests/backend/vectorizer/test1.ml
+++ b/flambda-backend/tests/backend/vectorizer/test1.ml
@@ -1,9 +1,143 @@
+[@@@ocaml.warnerror "+a-40-41-42"]
 
-let add_pairs (a0, a1 : int64 * int64) (b0, b1 : int64 * int64) =
-  (Int64.add a0 b0, Int64.add a1 b1)
-;;
+(* Record with immutable int fields *)
+
+type t2 =
+  { d0 : int;
+    d1 : int
+  }
+
+let[@inline never] [@local never] add_pairs_immutable_record (a : t2) (b : t2) :
+    t2 =
+  { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1 }
+
+type t4 =
+  { d0 : int;
+    d1 : int;
+    d2 : int;
+    d3 : int
+  }
+
+let[@inline never] [@local never] add_fours_immutable_record (a : t4) (b : t4) :
+    t4 =
+  { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1; d2 = a.d2 + b.d2; d3 = a.d3 + b.d3 }
+
+(* Tuples *)
+
+let[@inline never] [@local never] add_int_tuples ((a0, a1) : int * int)
+    ((b0, b1) : int * int) =
+  a0 + b0, a1 + b1
+
+let[@inline never] [@local never] add_t2_to_t4 (a : t2) (b : t2) : t4 =
+  { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1; d2 = a.d0 + b.d0; d3 = a.d1 + b.d1 }
+
+(* CR gyorsh: can't vectorize, requires a shuffle because the order of the first
+   vector access is not the same as the second. *)
+let[@inline never] [@local never] add_t2_to_t4_reordered (a : t2) (b : t2) : t4
+    =
+  { d0 = a.d0 + b.d0; d1 = a.d1 + b.d1; d3 = a.d0 + b.d0; d2 = a.d1 + b.d1 }
+
+let[@inline never] [@local never] copy_t2_to_t4_immutable_record (a : t2) : t4 =
+  { d0 = a.d0; d1 = a.d1; d2 = a.d0; d3 = a.d1 }
+
+(* CR gyorsh: can't vectorize same load. *)
+let[@inline never] [@local never] same_value_in_both_fields_immutable_record
+    (a : t2) : t2 =
+  let x = a.d0 in
+  let y = a.d1 in
+  let z = x + y in
+  { d0 = z; d1 = z }
+
+type s2 =
+  { mutable f0 : int;
+    mutable f1 : int
+  }
+
+let[@inline never] [@local never] copy_pairs_mutable_record (a : s2) (b : s2) :
+    unit =
+  b.f0 <- a.f0;
+  b.f1 <- a.f1;
+  ()
+
+(* CR gyorsh: dependency outside computation should only look at reg not mem *)
+let[@inline never] [@local never] copy_pairs_mutable_record_return (a : s2)
+    (b : s2) : s2 =
+  b.f0 <- a.f0;
+  b.f1 <- a.f1;
+  b
+
+type s4 =
+  { mutable f0 : int;
+    mutable f1 : int;
+    mutable f2 : int;
+    mutable f3 : int
+  }
+
+let[@inline never] [@local never] copy_fours_mutable_record (a : s4) (b : s4) :
+    unit =
+  a.f0 <- b.f0;
+  a.f1 <- b.f1;
+  a.f2 <- b.f2;
+  a.f3 <- b.f3;
+  ()
+
+let[@inline never] [@local never] add_fours_mutable_record (a : s4) (b : s4) :
+    unit =
+  a.f0 <- b.f0 + a.f0;
+  a.f1 <- b.f1 + a.f1;
+  a.f2 <- b.f2 + a.f2;
+  a.f3 <- b.f3 + a.f3;
+  ()
+
+(* CR-someday gyorsh: use expect test *)
+
+let print_t2 ppf (t2 : t2) =
+  Format.fprintf ppf "{ d0 = %d ; d1 = %d }" t2.d0 t2.d1
+
+let print_t4 ppf (t4 : t4) =
+  Format.fprintf ppf "{ d0 = %d ; d1 = %d; d2 = %d ; d3 = %d }" t4.d0 t4.d1
+    t4.d2 t4.d3
+
+let print_s2 ppf (s2 : s2) =
+  Format.fprintf ppf "{ f0 = %d ; f1 = %d }" s2.f0 s2.f1
+
+let print_s4 ppf (s4 : s4) =
+  Format.fprintf ppf "{ f0 = %d ; f1 = %d; f2 = %d ; f3 = %d }" s4.f0 s4.f1
+    s4.f2 s4.f3
+
+let print_pair ppf (x, y) = Format.fprintf ppf "(%d, %d)" x y
 
 let () =
-  let sum0, sum1 = add_pairs (0L, 1L) (2L, 3L) in
-  Printf.printf "%Lx %Lx\n" sum0 sum1
-;;
+  Format.printf "add_pairs_immutable_record %a\n" print_t2
+    (add_pairs_immutable_record { d0 = 8; d1 = 96 } { d0 = 80; d1 = 14 });
+  Format.printf "add_fours_immutable_record %a\n" print_t4
+    (add_fours_immutable_record
+       { d0 = 9; d1 = 12; d2 = 16; d3 = 98 }
+       { d0 = 25; d1 = 85; d2 = 72; d3 = 48 });
+  Format.printf "add_int_tuples %a\n" print_pair
+    (add_int_tuples (48, 31) (4, 71));
+  Format.printf "add_t2_to_t4 %a\n" print_t4
+    (add_t2_to_t4 { d0 = 8; d1 = 96 } { d0 = 80; d1 = 14 });
+  Format.printf "add_t2_to_t4_reordered %a\n" print_t4
+    (add_t2_to_t4_reordered { d0 = 8; d1 = 96 } { d0 = 80; d1 = 14 });
+  Format.printf "copy_t2_to_t4_immutable_record %a\n" print_t4
+    (copy_t2_to_t4_immutable_record { d0 = 8; d1 = 96 });
+  Format.printf "same_value_in_both_fields_immutable_record %a\n" print_t2
+    (same_value_in_both_fields_immutable_record { d0 = 8; d1 = 96 });
+  Format.printf "copy_pairs_mutable_record %a\n" print_s2
+    (let s2 = { f0 = 42; f1 = 27 } in
+     let s2' = { f0 = 1; f1 = -1 } in
+     copy_pairs_mutable_record s2 s2';
+     s2);
+  Format.printf "copy_pairs_mutable_record_return %a\n" print_s2
+    (copy_pairs_mutable_record_return { f0 = 42; f1 = 27 } { f0 = 0; f1 = -100 });
+  Format.printf "copy_fours_mutable_record %a\n" print_s4
+    (let s4 = { f0 = 42; f1 = 27; f2 = 423; f3 = 239 } in
+     let s4' = { f0 = 1; f1 = 2; f2 = 3; f3 = -4 } in
+     copy_fours_mutable_record s4 s4';
+     s4);
+  Format.printf "add_fours_mutable_record %a\n" print_s4
+    (let s4 = { f0 = 42; f1 = 27; f2 = 423; f3 = 239 } in
+     let s4' = { f0 = 1; f1 = 2; f2 = 3; f3 = -4 } in
+     add_fours_mutable_record s4 s4';
+     s4)
diff --git a/flambda-backend/tests/backend/vectorizer/test1.mli b/flambda-backend/tests/backend/vectorizer/test1.mli
new file mode 100644
index 00000000000..5b909d90a8c
--- /dev/null
+++ b/flambda-backend/tests/backend/vectorizer/test1.mli
@@ -0,0 +1 @@
+(* blank, make sure all the functions are called from top-level *)
diff --git a/flambda-backend/tests/backend/vectorizer/test1_simd.ml b/flambda-backend/tests/backend/vectorizer/test1_simd.ml
deleted file mode 100644
index 3ffad872fd4..00000000000
--- a/flambda-backend/tests/backend/vectorizer/test1_simd.ml
+++ /dev/null
@@ -1,60 +0,0 @@
-module Int64_u = struct
-  type t = int64#
-
-  external to_int64 : t -> (int64[@local_opt]) = "%box_int64" [@@warning "-187"]
-
-  external of_int64 : (int64[@local_opt]) -> t = "%unbox_int64" [@@warning "-187"]
-end
-
-module Int64x2 = struct
-  type t = int64x2
-  external add : t -> t -> t = "ocaml_simd_unreachable" "caml_sse2_int64x2_add"
-  [@@noalloc] [@@unboxed] [@@builtin]
-
-  external low_of
-    :  int64#
-    -> (t[@unboxed])
-    = "ocaml_simd_unreachable" "caml_int64x2_low_of_int64"
-  [@@noalloc] [@@builtin]
-
-  external low_to
-    :  (t[@unboxed])
-    -> int64#
-    = "ocaml_simd_unreachable" "caml_int64x2_low_to_int64"
-  [@@noalloc] [@@builtin]
-
-  external high_64_to_low_64
-    :  onto:t
-    -> from:t
-    -> t
-    = "ocaml_simd_unreachable" "caml_sse_vec128_high_64_to_low_64"
-  [@@noalloc] [@@unboxed] [@@builtin]
-
-  external low_64_to_high_64
-    :  onto:t
-    -> from:t
-    -> t
-    = "ocaml_simd_unreachable" "caml_sse_vec128_low_64_to_high_64"
-  [@@noalloc] [@@unboxed] [@@builtin]
-
-  let[@inline always] set a b =
-    let a = low_of a in
-    let b = low_of b in
-    low_64_to_high_64 ~onto:a ~from:b
-  ;;
-
-  let of_int64s a b = set (Int64_u.of_int64 a) (Int64_u.of_int64 b)
-  let low_int64 t = Int64_u.to_int64(low_to t)
-  let high_int64 t = Int64_u.to_int64(low_to (high_64_to_low_64 ~onto:t ~from:t))
-;;
-end
-
-let add_pairs (a0, a1 : int64 * int64) (b0, b1 : int64 * int64) =
-  let sum_vector = (Int64x2.add (Int64x2.of_int64s a0 a1) (Int64x2.of_int64s b0 b1)) in
-  (Int64x2.low_int64 sum_vector, Int64x2.high_int64 sum_vector)
-;;
-
-let () =
-  let sum0, sum1 = add_pairs (0L, 1L) (2L, 3L) in
-  Printf.printf "%Lx %Lx\n" sum0 sum1
-;;
diff --git a/flambda-backend/tests/backend/vectorizer/test1_vectorized.cmx.dump.expected b/flambda-backend/tests/backend/vectorizer/test1_vectorized.cmx.dump.expected
new file mode 100644
index 00000000000..bfbcfa3b662
--- /dev/null
+++ b/flambda-backend/tests/backend/vectorizer/test1_vectorized.cmx.dump.expected
@@ -0,0 +1,9 @@
+**** Vectorize selected computation: 4 groups, 8 scalar instructions, 7 vector instructions, cost = -1 (Test1_vectorized.add_pairs_immutable_record)
+**** Vectorize selected computation: 8 groups, 16 scalar instructions, 14 vector instructions, cost = -2 (Test1_vectorized.add_fours_immutable_record)
+**** Vectorize selected computation: 4 groups, 8 scalar instructions, 7 vector instructions, cost = -1 (Test1_vectorized.add_int_tuples)
+**** Vectorize selected computation: 5 groups, 10 scalar instructions, 8 vector instructions, cost = -2 (Test1_vectorized.add_t2_to_t4)
+**** Vectorize selected computation: 3 groups, 6 scalar instructions, 3 vector instructions, cost = -3 (Test1_vectorized.copy_t2_to_t4_immutable_record)
+**** Vectorize selected computation: 2 groups, 4 scalar instructions, 2 vector instructions, cost = -2 (Test1_vectorized.copy_pairs_mutable_record)
+**** Vectorize selected computation: 2 groups, 4 scalar instructions, 2 vector instructions, cost = -2 (Test1_vectorized.copy_pairs_mutable_record_return)
+**** Vectorize selected computation: 4 groups, 8 scalar instructions, 4 vector instructions, cost = -4 (Test1_vectorized.copy_fours_mutable_record)
+**** Vectorize selected computation: 8 groups, 16 scalar instructions, 14 vector instructions, cost = -2 (Test1_vectorized.add_fours_mutable_record)
diff --git a/lambda/matching.ml b/lambda/matching.ml
index 44beae72709..c794d044f51 100644
--- a/lambda/matching.ml
+++ b/lambda/matching.ml
@@ -104,18 +104,16 @@ exception Error of Location.t * error
 
 let dbg = false
 
-let jkind_layout_default_to_value_and_check_not_void loc jkind =
-  let rec contains_void : Jkind.Layout.Const.t -> bool = function
-    | Any -> false
+let sort_check_not_void loc sort =
+  let rec contains_void : Jkind.Sort.Const.t -> bool = function
     | Base Void -> true
     | Base (Value | Float64 | Float32 | Word | Bits8 | Bits16 | Bits32 | Bits64
            | Vec128) -> false
     | Product [] ->
-      Misc.fatal_error "nil in jkind_layout_default_to_value_and_check_not_void"
-    | Product ts -> List.exists contains_void ts
+      Misc.fatal_error "nil in sort_check_not_void"
+    | Product ss -> List.exists contains_void ss
   in
-  let layout = Jkind.get_layout_defaulting_to_value jkind in
-  if contains_void layout then
+  if contains_void sort then
     raise (Error (loc, Void_layout))
 ;;
 
@@ -231,7 +229,7 @@ module Half_simple : sig
   type nonrec clause = pattern Non_empty_row.t clause
 
   val of_clause :
-    arg:lambda -> arg_sort:Jkind.sort -> General.clause -> clause
+    arg:lambda -> arg_sort:Jkind.Sort.Const.t -> General.clause -> clause
 end = struct
   include Patterns.Half_simple
 
@@ -308,7 +306,7 @@ module Simple : sig
 
   val explode_or_pat :
     arg:lambda ->
-    arg_sort:Jkind.sort ->
+    arg_sort:Jkind.Sort.Const.t ->
     Half_simple.pattern ->
     mk_action:(vars:Ident.t list -> lambda) ->
     patbound_action_vars:Ident.t list ->
@@ -1011,7 +1009,7 @@ end
 
 type 'row pattern_matching = {
   mutable cases : 'row list;
-  args : (lambda * let_kind * Jkind.sort * layout) list;
+  args : (lambda * let_kind * Jkind.Sort.Const.t * layout) list;
       (** args are not just Ident.t in at least the following cases:
         - when matching the arguments of a constructor,
           direct field projections are used (make_field_args)
@@ -1822,7 +1820,7 @@ let make_line_matching get_expr_args head def = function
       }
 
 type 'a division = {
-  args : (lambda * let_kind * Jkind.sort * layout) list;
+  args : (lambda * let_kind * Jkind.Sort.Const.t * layout) list;
   cells : ('a * cell) list
 }
 
@@ -1911,9 +1909,7 @@ let get_pat_args_constr p rem =
   match p with
   | { pat_desc = Tpat_construct (_, {cstr_args}, args, _) } ->
     List.iter2
-      (fun { ca_jkind } arg ->
-         jkind_layout_default_to_value_and_check_not_void
-           arg.pat_loc ca_jkind)
+      (fun { ca_sort } arg -> sort_check_not_void arg.pat_loc ca_sort)
       cstr_args args;
       (* CR layouts v5: This sanity check will have to go (or be replaced with a
          void-specific check) when we have other non-value sorts *)
@@ -1929,12 +1925,11 @@ let get_expr_args_constr ~scopes head (arg, _mut, sort, layout) rem =
   let loc = head_loc ~scopes head in
   (* CR layouts v5: This sanity check should be removed or changed to
      specifically check for void when we add other non-value sorts. *)
-  List.iter (fun { ca_jkind } ->
-      jkind_layout_default_to_value_and_check_not_void head.pat_loc ca_jkind)
+  List.iter (fun { ca_sort } -> sort_check_not_void head.pat_loc ca_sort)
     cstr.cstr_args;
   let ubr = Translmode.transl_unique_barrier (head.pat_unique_barrier) in
   let sem = add_barrier_to_read ubr Reads_agree in
-  let make_field_access binding_kind jkind ~field ~pos =
+  let make_field_access binding_kind sort ~field ~pos =
     let prim =
       match cstr.cstr_shape with
       | Constructor_uniform_value -> Pfield (pos, Pointer, sem)
@@ -1956,7 +1951,6 @@ let get_expr_args_constr ~scopes head (arg, _mut, sort, layout) rem =
           let shape = Lambda.transl_mixed_product_shape shape in
           Pmixedfield (pos, read, shape, sem)
     in
-    let sort = Jkind.sort_of_jkind jkind in
     let layout = Typeopt.layout_of_sort head.pat_loc sort in
     (Lprim (prim, [ arg ], loc), binding_kind, sort, layout)
   in
@@ -1967,15 +1961,15 @@ let get_expr_args_constr ~scopes head (arg, _mut, sort, layout) rem =
     match cstr.cstr_repr with
     | Variant_boxed _ ->
         List.mapi
-          (fun i { ca_jkind } ->
-             make_field_access str ca_jkind ~field:i ~pos:i)
+          (fun i { ca_sort } ->
+             make_field_access str ca_sort ~field:i ~pos:i)
           cstr.cstr_args
         @ rem
     | Variant_unboxed -> (arg, str, sort, layout) :: rem
     | Variant_extensible ->
         List.mapi
-          (fun i { ca_jkind } ->
-             make_field_access str ca_jkind ~field:i ~pos:(i+1))
+          (fun i { ca_sort } ->
+             make_field_access str ca_sort ~field:i ~pos:(i+1))
           cstr.cstr_args
         @ rem
 
@@ -2001,7 +1995,7 @@ let get_expr_args_variant_nonconst ~scopes head (arg, _mut, _sort, _layout)
   let ubr = Translmode.transl_unique_barrier (head.pat_unique_barrier) in
   let field_prim = nonconstant_variant_field ubr 1 in
   let str = add_barrier_to_let_kind ubr Alias in
-  (Lprim (field_prim, [ arg ], loc), str, Jkind.Sort.for_variant_arg,
+  (Lprim (field_prim, [ arg ], loc), str, Jkind.Sort.Const.for_variant_arg,
    layout_variant_arg)
   :: rem
 
@@ -2217,7 +2211,7 @@ let inline_lazy_force arg pos loc =
 
 let get_expr_args_lazy ~scopes head (arg, _mut, _sort, _layout) rem =
   let loc = head_loc ~scopes head in
-  (inline_lazy_force arg Rc_normal loc, Strict, Jkind.Sort.for_lazy_body,
+  (inline_lazy_force arg Rc_normal loc, Strict, Jkind.Sort.Const.for_lazy_body,
    layout_lazy_contents) :: rem
 
 let divide_lazy ~scopes head ctx pm =
@@ -2252,7 +2246,7 @@ let get_expr_args_tuple ~scopes head (arg, _mut, _sort, _layout) rem =
       rem
     else
       (Lprim (Pfield (pos, Pointer, sem), [ arg ], loc), str,
-       Jkind.Sort.for_tuple_element, layout_tuple_element)
+       Jkind.Sort.Const.for_tuple_element, layout_tuple_element)
         :: make_args (pos + 1)
   in
   make_args 0
@@ -2262,6 +2256,7 @@ let get_expr_args_unboxed_tuple ~scopes shape head (arg, _mut, _sort, _layout)
   let loc = head_loc ~scopes head in
   let shape =
     List.map (fun (_, sort) ->
+      let sort = Jkind.Sort.default_for_transl_and_get sort in
       sort,
       (* CR layouts v7.1: consider whether more accurate [Lambda.layout]s here
          would make a difference for later optimizations. *)
@@ -2296,7 +2291,7 @@ let record_matching_line num_fields lbl_pat_list =
   List.iter (fun (_, lbl, pat) ->
     (* CR layouts v5: This void sanity check can be removed when we add proper
        void support (or whenever we remove `lbl_pos_void`) *)
-    jkind_layout_default_to_value_and_check_not_void pat.pat_loc lbl.lbl_jkind;
+    sort_check_not_void pat.pat_loc lbl.lbl_sort;
     patv.(lbl.lbl_pos) <- pat)
     lbl_pat_list;
   Array.to_list patv
@@ -2330,11 +2325,9 @@ let get_expr_args_record ~scopes head (arg, _mut, sort, layout) rem =
       rem
     else
       let lbl = all_labels.(pos) in
-      jkind_layout_default_to_value_and_check_not_void
-        head.pat_loc lbl.lbl_jkind;
+      sort_check_not_void head.pat_loc lbl.lbl_sort;
       let ptr = Typeopt.maybe_pointer_type head.pat_env lbl.lbl_arg in
-      let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in
-      let lbl_layout = Typeopt.layout_of_sort lbl.lbl_loc lbl_sort in
+      let lbl_layout = Typeopt.layout_of_sort lbl.lbl_loc lbl.lbl_sort in
       let sem =
         if Types.is_mutable lbl.lbl_mut then Reads_vary else Reads_agree
       in
@@ -2345,21 +2338,21 @@ let get_expr_args_record ~scopes head (arg, _mut, sort, layout) rem =
         | Record_boxed _
         | Record_inlined (_, Constructor_uniform_value, Variant_boxed _) ->
             Lprim (Pfield (lbl.lbl_pos, ptr, sem), [ arg ], loc),
-            lbl_sort, lbl_layout
+            lbl.lbl_sort, lbl_layout
         | Record_unboxed
         | Record_inlined (_, _, Variant_unboxed) -> arg, sort, layout
         | Record_float ->
            (* TODO: could optimise to Alloc_local sometimes *)
            Lprim (Pfloatfield (lbl.lbl_pos, sem, alloc_heap), [ arg ], loc),
            (* Here we are projecting a boxed float from a float record. *)
-           lbl_sort, lbl_layout
+           lbl.lbl_sort, lbl_layout
         | Record_ufloat ->
            Lprim (Pufloatfield (lbl.lbl_pos, sem), [ arg ], loc),
            (* Here we are projecting an unboxed float from a float record. *)
-           lbl_sort, lbl_layout
+           lbl.lbl_sort, lbl_layout
         | Record_inlined (_, Constructor_uniform_value, Variant_extensible) ->
             Lprim (Pfield (lbl.lbl_pos + 1, ptr, sem), [ arg ], loc),
-            lbl_sort, lbl_layout
+            lbl.lbl_sort, lbl_layout
         | Record_inlined (_, Constructor_mixed _, Variant_extensible) ->
             (* CR layouts v5.9: support this *)
             fatal_error
@@ -2386,7 +2379,7 @@ let get_expr_args_record ~scopes head (arg, _mut, sort, layout) rem =
               { value_prefix_len; flat_suffix }
             in
             Lprim (Pmixedfield (lbl.lbl_pos, read, shape, sem), [ arg ], loc),
-            lbl_sort, lbl_layout
+            lbl.lbl_sort, lbl_layout
       in
       let str = if Types.is_mutable lbl.lbl_mut then StrictOpt else Alias in
       let str = add_barrier_to_let_kind ubr str in
@@ -2408,7 +2401,7 @@ let get_expr_args_record_unboxed_product ~scopes head
   in
   let lbl_layouts =
     Array.map (fun lbl ->
-      Typeopt.layout_of_sort lbl.lbl_loc (Jkind.sort_of_jkind lbl.lbl_jkind)
+      Typeopt.layout_of_sort lbl.lbl_loc lbl.lbl_sort
     ) all_labels
     |> Array.to_list
   in
@@ -2417,8 +2410,7 @@ let get_expr_args_record_unboxed_product ~scopes head
       rem
     else
       let lbl = all_labels.(pos) in
-      jkind_layout_default_to_value_and_check_not_void
-        head.pat_loc lbl.lbl_jkind;
+      sort_check_not_void head.pat_loc lbl.lbl_sort;
       let access = if Array.length all_labels = 1 then
         arg (* erase singleton unboxed records before lambda *)
       else
@@ -2432,9 +2424,8 @@ let get_expr_args_record_unboxed_product ~scopes head
         else
           Alias
       in
-      let sort = Jkind.sort_of_jkind lbl.lbl_jkind in
-      let layout = Typeopt.layout_of_sort lbl.lbl_loc sort in
-      (access, str, sort, layout) :: make_args (pos + 1)
+      let layout = Typeopt.layout_of_sort lbl.lbl_loc lbl.lbl_sort in
+      (access, str, lbl.lbl_sort, layout) :: make_args (pos + 1)
   in
   make_args 0
 
@@ -2475,6 +2466,7 @@ let get_expr_args_array ~scopes kind head (arg, _mut, _sort, _layout) rem =
     | Array (am, arg_sort, len) -> am, arg_sort, len
     | _ -> assert false
   in
+  let arg_sort = Jkind.Sort.default_for_transl_and_get arg_sort in
   let loc = head_loc ~scopes head in
   let rec make_args pos =
     if pos >= len then
@@ -3880,6 +3872,7 @@ and do_compile_matching ~scopes value_kind repr partial ctx pmh =
             (combine_constructor value_kind ploc arg ph.pat_env ph.pat_unique_barrier cstr partial)
             ctx pm
       | Array (_, elt_sort, _) ->
+          let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in
           let kind = Typeopt.array_pattern_kind pomega elt_sort in
           compile_test
             (compile_match ~scopes value_kind repr partial)
@@ -4094,7 +4087,7 @@ let for_trywith ~scopes ~return_layout loc param pat_act_list =
      It is important to *not* include location information in
      the reraise (hence the [_noloc]) to avoid seeing this
      silent reraise in exception backtraces. *)
-  compile_matching ~scopes ~arg_sort:Jkind.Sort.for_predef_value
+  compile_matching ~scopes ~arg_sort:Jkind.Sort.Const.for_predef_value
     ~arg_layout:layout_block ~return_layout loc ~failer:(Reraise_noloc param)
     None param pat_act_list Partial
 
@@ -4211,12 +4204,12 @@ let assign_pat ~scopes body_layout opt nraise catch_ids loc pat pat_sort lam =
         opt := true;
         List.fold_left2
           (fun acc (_, pat) lam ->
-             collect Jkind.Sort.for_tuple_element acc pat lam)
+             collect Jkind.Sort.Const.for_tuple_element acc pat lam)
           acc patl lams
     | Tpat_tuple patl, Lconst (Const_block (_, scl)) ->
         opt := true;
         let collect_const acc (_, pat) sc =
-          collect Jkind.Sort.for_tuple_element acc pat (Lconst sc)
+          collect Jkind.Sort.Const.for_tuple_element acc pat (Lconst sc)
         in
         List.fold_left2 collect_const acc patl scl
     | _ ->
@@ -4293,7 +4286,7 @@ let for_tupled_function ~scopes ~return_layout loc paraml pats_act_list partial
   (* The arguments of a tupled function are always values since they must be
      tuple elements *)
   let args =
-    List.map (fun id -> (Lvar id, Strict, Jkind.Sort.for_tuple_element,
+    List.map (fun id -> (Lvar id, Strict, Jkind.Sort.Const.for_tuple_element,
                          layout_tuple_element))
       paraml
   in
@@ -4393,12 +4386,12 @@ let do_for_multiple_match ~scopes ~return_layout loc paraml mode pat_act_list pa
     let sloc = Scoped_location.of_location ~scopes loc in
     Lprim (Pmakeblock (0, Immutable, None, mode), param_lambda, sloc)
   in
-  let arg_sort = Jkind.Sort.for_tuple in
+  let arg_sort = Jkind.Sort.Const.for_tuple in
   let handler =
     let partial = check_partial pat_act_list partial in
     let rows = map_on_rows (fun p -> (p, [])) pat_act_list in
     toplevel_handler ~scopes ~return_layout loc ~failer:Raise_match_failure
-      partial [ (arg, Strict, Jkind.Sort.for_tuple, layout_block) ] rows in
+      partial [ (arg, Strict, Jkind.Sort.Const.for_tuple, layout_block) ] rows in
   handler (fun partial pm1 ->
     let pm1_half =
       { pm1 with
diff --git a/lambda/matching.mli b/lambda/matching.mli
index cd87113064c..c1694124f5f 100644
--- a/lambda/matching.mli
+++ b/lambda/matching.mli
@@ -22,7 +22,7 @@ open Debuginfo.Scoped_location
 (* Entry points to match compiler *)
 val for_function:
         scopes:scopes ->
-        arg_sort:Jkind.sort -> arg_layout:layout -> return_layout:layout ->
+        arg_sort:Jkind.Sort.Const.t -> arg_layout:layout -> return_layout:layout ->
         Location.t -> int ref option -> lambda -> (pattern * lambda) list ->
         partial ->
         lambda
@@ -31,12 +31,12 @@ val for_trywith:
         lambda -> (pattern * lambda) list ->
         lambda
 val for_let:
-        scopes:scopes -> arg_sort:Jkind.sort -> return_layout:layout ->
+        scopes:scopes -> arg_sort:Jkind.Sort.Const.t -> return_layout:layout ->
         Location.t -> lambda -> pattern -> lambda ->
         lambda
 val for_multiple_match:
         scopes:scopes -> return_layout:layout -> Location.t ->
-        (lambda * Jkind.sort * layout) list -> locality_mode ->
+        (lambda * Jkind.Sort.Const.t * layout) list -> locality_mode ->
         (pattern * lambda) list -> partial ->
         lambda
 
@@ -57,7 +57,7 @@ val for_tupled_function:
 *)
 val for_optional_arg_default:
   scopes:scopes -> Location.t -> pattern -> param:Ident.t ->
-  default_arg:lambda -> default_arg_sort:Jkind.sort ->
+  default_arg:lambda -> default_arg_sort:Jkind.Sort.Const.t ->
   return_layout:layout -> lambda -> lambda
 
 exception Cannot_flatten
diff --git a/lambda/transl_array_comprehension.ml b/lambda/transl_array_comprehension.ml
index 440ae5779c0..4f6183380aa 100644
--- a/lambda/transl_array_comprehension.ml
+++ b/lambda/transl_array_comprehension.ml
@@ -444,7 +444,7 @@ let iterator ~transl_exp ~scopes ~loc :
   | Texp_comp_range { ident; pattern = _; start; stop; direction } ->
     let bound name value =
       Let_binding.make (Immutable Strict) layout_int name
-        (transl_exp ~scopes Jkind.Sort.for_predef_value value)
+        (transl_exp ~scopes Jkind.Sort.Const.for_predef_value value)
     in
     let start = bound "start" start in
     let stop = bound "stop" stop in
@@ -462,7 +462,7 @@ let iterator ~transl_exp ~scopes ~loc :
   | Texp_comp_in { pattern; sequence = iter_arr_exp } ->
     let iter_arr =
       Let_binding.make (Immutable Strict) layout_any_value "iter_arr"
-        (transl_exp ~scopes Jkind.Sort.for_predef_value iter_arr_exp)
+        (transl_exp ~scopes Jkind.Sort.Const.for_predef_value iter_arr_exp)
     in
     let iter_arr_kind =
       (* CR layouts v4: [~elt_sort:None] here is not ideal and
@@ -493,7 +493,7 @@ let iterator ~transl_exp ~scopes ~loc :
           for_dir = Upto;
           for_body =
             Matching.for_let ~scopes
-              ~arg_sort:Jkind.Sort.for_array_comprehension_element
+              ~arg_sort:Jkind.Sort.Const.for_array_comprehension_element
               ~return_layout:layout_int pattern.pat_loc
               (Lprim
                  ( Parrayrefu
@@ -550,7 +550,7 @@ let clause ~transl_exp ~scopes ~loc = function
   | Texp_comp_when cond ->
     fun body ->
       Lifthenelse
-        ( transl_exp ~scopes Jkind.Sort.for_predef_value cond,
+        ( transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond,
           body,
           lambda_unit,
           layout_unit )
@@ -874,8 +874,8 @@ let comprehension ~transl_exp ~scopes ~loc ~(array_kind : Lambda.array_kind)
                   (* CR layouts v4: Ensure that the [transl_exp] here can cope
                      with non-values. *)
                 ~body:
-                  (transl_exp ~scopes Jkind.Sort.for_array_comprehension_element
-                     comp_body)),
+                  (transl_exp ~scopes
+                     Jkind.Sort.Const.for_array_comprehension_element comp_body)),
            (* If it was dynamically grown, cut it down to size *)
            match array_sizing with
            | Fixed_size -> array.var
diff --git a/lambda/transl_array_comprehension.mli b/lambda/transl_array_comprehension.mli
index 2caeddc2f1d..690183c9fe6 100644
--- a/lambda/transl_array_comprehension.mli
+++ b/lambda/transl_array_comprehension.mli
@@ -22,7 +22,7 @@ open Debuginfo.Scoped_location
     so is parameterized by [Translcore.transl_exp], its [scopes] argument, and
     the [loc]ation. *)
 val comprehension :
-  transl_exp:(scopes:scopes -> Jkind.sort -> expression -> lambda) ->
+  transl_exp:(scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda) ->
   scopes:scopes ->
   loc:scoped_location ->
   array_kind:array_kind ->
diff --git a/lambda/transl_list_comprehension.ml b/lambda/transl_list_comprehension.ml
index 02d85b9d28c..8694c387c19 100644
--- a/lambda/transl_list_comprehension.ml
+++ b/lambda/transl_list_comprehension.ml
@@ -169,7 +169,7 @@ let iterator ~transl_exp ~scopes = function
        correct (i.e., left-to-right) order *)
     let transl_bound var bound =
       Let_binding.make (Immutable Strict) layout_int var
-        (transl_exp ~scopes Jkind.Sort.for_predef_value bound)
+        (transl_exp ~scopes Jkind.Sort.Const.for_predef_value bound)
     in
     let start = transl_bound "start" start in
     let stop = transl_bound "stop" stop in
@@ -185,7 +185,7 @@ let iterator ~transl_exp ~scopes = function
   | Texp_comp_in { pattern; sequence } ->
     let iter_list =
       Let_binding.make (Immutable Strict) layout_any_value "iter_list"
-        (transl_exp ~scopes Jkind.Sort.for_predef_value sequence)
+        (transl_exp ~scopes Jkind.Sort.Const.for_predef_value sequence)
     in
     (* Create a fresh variable to use as the function argument *)
     let element = Ident.create_local "element" in
@@ -194,10 +194,10 @@ let iterator ~transl_exp ~scopes = function
       element;
       element_kind =
         Typeopt.layout pattern.pat_env pattern.pat_loc
-          Jkind.Sort.for_list_element pattern.pat_type;
+          Jkind.Sort.Const.for_list_element pattern.pat_type;
       add_bindings =
         (* CR layouts: to change when we allow non-values in sequences *)
-        Matching.for_let ~scopes ~arg_sort:Jkind.Sort.for_list_element
+        Matching.for_let ~scopes ~arg_sort:Jkind.Sort.Const.for_list_element
           ~return_layout:layout_any_value pattern.pat_loc (Lvar element) pattern
     }
 
@@ -287,7 +287,7 @@ let rec translate_clauses ~transl_exp ~scopes ~loc ~comprehension_body
       Let_binding.let_all arg_lets bindings
     | Texp_comp_when cond ->
       Lifthenelse
-        ( transl_exp ~scopes Jkind.Sort.for_predef_value cond,
+        ( transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond,
           body ~accumulator,
           accumulator,
           layout_any_value (* [list]s have the standard representation *) ))
@@ -298,7 +298,7 @@ let comprehension ~transl_exp ~scopes ~loc { comp_body; comp_clauses } =
     translate_clauses ~transl_exp ~scopes ~loc
       ~comprehension_body:(fun ~accumulator ->
         rev_list_snoc_local ~loc ~init:accumulator
-          ~last:(transl_exp ~scopes Jkind.Sort.for_list_element comp_body))
+          ~last:(transl_exp ~scopes Jkind.Sort.Const.for_list_element comp_body))
       ~accumulator:rev_list_nil comp_clauses
   in
   Lambda_utils.apply ~loc ~mode:alloc_heap
diff --git a/lambda/transl_list_comprehension.mli b/lambda/transl_list_comprehension.mli
index 65ec6208328..fcaeaaa2dca 100644
--- a/lambda/transl_list_comprehension.mli
+++ b/lambda/transl_list_comprehension.mli
@@ -15,7 +15,7 @@ open Debuginfo.Scoped_location
     so is parameterized by [Translcore.transl_exp], its [scopes] argument, and
     the [loc]ation. *)
 val comprehension :
-  transl_exp:(scopes:scopes -> Jkind.sort -> expression -> lambda) ->
+  transl_exp:(scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda) ->
   scopes:scopes ->
   loc:scoped_location ->
   comprehension ->
diff --git a/lambda/translclass.ml b/lambda/translclass.ml
index 49022547fb3..5c0df6fcf7a 100644
--- a/lambda/translclass.ml
+++ b/lambda/translclass.ml
@@ -105,7 +105,7 @@ let transl_meth_list lst =
 
 let set_inst_var ~scopes obj id expr =
   Lprim(Psetfield_computed (Typeopt.maybe_pointer expr, Assignment modify_heap),
-    [Lvar obj; Lvar id; transl_exp ~scopes Jkind.Sort.for_instance_var expr],
+    [Lvar obj; Lvar id; transl_exp ~scopes Jkind.Sort.Const.for_instance_var expr],
         Loc_unknown)
 
 let transl_val tbl create name =
@@ -212,7 +212,7 @@ let rec build_object_init ~scopes cl_table obj params inh_init obj_init cl =
       (inh_init,
        let build params rem =
          let param = name_pattern "param" pat in
-         let arg_sort = Jkind.Sort.for_class_arg in
+         let arg_sort = Jkind.Sort.Const.for_class_arg in
          let arg_layout =
            Typeopt.layout pat.pat_env pat.pat_loc arg_sort pat.pat_type
          in
@@ -366,7 +366,7 @@ let rec build_class_init ~scopes cla cstr super inh_init cl_init msubst top cl =
             | Tcf_method (name, _, Tcfk_concrete (_, exp)) ->
                 let scopes = enter_method_definition ~scopes name.txt in
                 let met_code =
-                  msubst true (transl_scoped_exp ~scopes Jkind.Sort.for_method exp)
+                  msubst true (transl_scoped_exp ~scopes Jkind.Sort.Const.for_method exp)
                 in
                 let met_code =
                   if !Clflags.native_code && List.length met_code = 1 then
@@ -383,7 +383,7 @@ let rec build_class_init ~scopes cla cstr super inh_init cl_init msubst top cl =
                  Lsequence(mkappl (oo_prim "add_initializer",
                                    Lvar cla :: msubst false
                                                  (transl_exp ~scopes
-                                                    Jkind.Sort.for_initializer exp),
+                                                    Jkind.Sort.Const.for_initializer exp),
                                    layout_unit),
                            cl_init),
                  methods, values)
@@ -502,7 +502,7 @@ let rec transl_class_rebind ~scopes obj_init cl vf =
         transl_class_rebind ~scopes obj_init cl vf in
       let build params rem =
         let param = name_pattern "param" pat in
-        let arg_sort = Jkind.Sort.for_class_arg in
+        let arg_sort = Jkind.Sort.Const.for_class_arg in
         let arg_layout =
           Typeopt.layout pat.pat_env pat.pat_loc arg_sort pat.pat_type
         in
diff --git a/lambda/translcore.ml b/lambda/translcore.ml
index 384e054c89f..146fe666902 100644
--- a/lambda/translcore.ml
+++ b/lambda/translcore.ml
@@ -51,13 +51,13 @@ let use_dup_for_constant_mutable_arrays_bigger_than = 4
    appropriately.
 *)
 let sort_must_not_be_void loc ty sort =
-  if Jkind.Sort.is_void_defaulting sort then raise (Error (loc, Void_sort ty))
+  if Jkind.Sort.Const.(equal void sort) then raise (Error (loc, Void_sort ty))
 
 let layout_exp sort e = layout e.exp_env e.exp_loc sort e.exp_type
 let layout_pat sort p = layout p.pat_env p.pat_loc sort p.pat_type
 
 let check_record_field_sort loc sort =
-  match Jkind.Sort.default_to_value_and_get sort with
+  match (sort : Jkind.Sort.Const.t) with
   | Base (Value | Float64 | Float32 | Bits8 | Bits16 | Bits32 | Bits64 | Vec128
          | Word) -> ()
   | Base Void -> raise (Error (loc, Illegal_void_record_field))
@@ -268,7 +268,7 @@ let assert_failed loc ~scopes exp =
 type fusable_function =
   { params : function_param list
   ; body : function_body
-  ; return_sort : Jkind.sort
+  ; return_sort : Jkind.Sort.Const.t
   ; return_mode : locality_mode
   ; region : bool
   }
@@ -310,10 +310,11 @@ let fuse_method_arity (parent : fusable_function) : fusable_function =
               Mode.Alloc.disallow_right Mode.Alloc.legacy }
         }
       in
+      let return_sort = Jkind.Sort.default_for_transl_and_get method_.ret_sort in
       { params = self_param :: method_.params;
         body = method_.body;
         return_mode = transl_alloc_mode_l method_.ret_mode;
-        return_sort = method_.ret_sort;
+        return_sort;
         region = true;
       }
   | _ -> parent
@@ -419,6 +420,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
         (event_before ~scopes body (transl_exp ~scopes sort body))
   | Texp_function { params; body; ret_sort; ret_mode; alloc_mode;
                     zero_alloc } ->
+      let ret_sort = Jkind.Sort.default_for_transl_and_get ret_sort in
       transl_function ~in_new_scope ~scopes e params body
         ~alloc_mode ~ret_mode ~ret_sort ~region:true ~zero_alloc
   | Texp_apply({ exp_desc = Texp_ident(path, _, {val_kind = Val_prim p},
@@ -433,8 +435,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
         | ((_, arg_repr) :: prim_repr), ((_, Arg (x, _)) :: oargs) ->
           let arg_exps, extra_args = cut_args prim_repr oargs in
           let arg_sort =
-            Jkind.Sort.of_const
-              (Translprim.sort_of_native_repr arg_repr ~poly_sort:psort)
+              Translprim.sort_of_native_repr arg_repr ~poly_sort:psort
           in
           (x, arg_sort) :: arg_exps, extra_args
         | _, ((_, Omitted _) :: _) -> assert false
@@ -490,9 +491,10 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
         (transl_apply ~scopes ~tailcall ~inlined ~specialised
            ~assume_zero_alloc
            ~result_layout
-           ~position ~mode (transl_exp ~scopes Jkind.Sort.for_function funct)
+           ~position ~mode (transl_exp ~scopes Jkind.Sort.Const.for_function funct)
            oargs (of_location ~scopes e.exp_loc))
   | Texp_match(arg, arg_sort, pat_expr_list, partial) ->
+      let arg_sort = Jkind.Sort.default_for_transl_and_get arg_sort in
       transl_match ~scopes ~arg_sort ~return_sort:sort e arg pat_expr_list
         partial
   | Texp_try(body, pat_expr_list) ->
@@ -505,7 +507,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
   | Texp_tuple (el, alloc_mode) ->
       let ll, shape =
         transl_value_list_with_shape ~scopes
-          (List.map (fun (_, a) -> (a, Jkind.Sort.for_tuple_element)) el)
+          (List.map (fun (_, a) -> (a, Jkind.Sort.Const.for_tuple_element)) el)
       in
       begin try
         Lconst(Const_block(0, List.map extract_constant ll))
@@ -516,6 +518,10 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
               (of_location ~scopes e.exp_loc))
       end
   | Texp_unboxed_tuple el ->
+      let el =
+        List.map (fun (l, e, s) ->
+            (l, e, Jkind.Sort.default_for_transl_and_get s)) el
+      in
       let shape = List.map (fun (_, e, s) -> layout_exp s e) el in
       let ll = List.map (fun (_, e, s) -> transl_exp ~scopes s e) el in
       Lprim(Pmake_unboxed_product shape,
@@ -523,10 +529,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
             of_location ~scopes e.exp_loc)
   | Texp_construct(_, cstr, args, alloc_mode) ->
       let args_with_sorts =
-        List.map2 (fun { ca_jkind } e ->
-            let sort = Jkind.sort_of_jkind ca_jkind in
-            e, sort)
-          cstr.cstr_args args
+        List.map2 (fun { ca_sort } e -> e, ca_sort) cstr.cstr_args args
       in
       let ll =
         List.map (fun (e, sort) -> transl_exp ~scopes sort e) args_with_sorts
@@ -618,7 +621,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
       begin match arg with
         None -> Lconst(const_int tag)
       | Some (arg, alloc_mode) ->
-          let lam = transl_exp ~scopes Jkind.Sort.for_poly_variant arg in
+          let lam = transl_exp ~scopes Jkind.Sort.Const.for_poly_variant arg in
           try
             Lconst(Const_block(0, [const_int tag;
                                    extract_constant lam]))
@@ -637,13 +640,12 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
       transl_record_unboxed_product ~scopes e.exp_loc e.exp_env
         fields representation extended_expression
   | Texp_field(arg, id, lbl, float, ubr) ->
-      let targ = transl_exp ~scopes Jkind.Sort.for_record arg in
+      let targ = transl_exp ~scopes Jkind.Sort.Const.for_record arg in
       let sem =
         if Types.is_mutable lbl.lbl_mut then Reads_vary else Reads_agree
       in
       let sem = add_barrier_to_read (transl_unique_barrier ubr) sem in
-      let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in
-      check_record_field_sort id.loc lbl_sort;
+      check_record_field_sort id.loc lbl.lbl_sort;
       begin match lbl.lbl_repres with
           Record_boxed _
         | Record_inlined (_, Constructor_uniform_value, Variant_boxed _) ->
@@ -701,10 +703,9 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
   | Texp_unboxed_field(arg, arg_sort, _id, lbl, _) ->
     begin match lbl.lbl_repres with
     | Record_unboxed_product ->
-      let lbl_layout l =
-        layout e.exp_env l.lbl_loc (Jkind.sort_of_jkind l.lbl_jkind) l.lbl_arg
-      in
+      let lbl_layout l = layout e.exp_env l.lbl_loc l.lbl_sort l.lbl_arg in
       let layouts = Array.to_list (Array.map lbl_layout lbl.lbl_all) in
+      let arg_sort = Jkind.Sort.default_for_transl_and_get arg_sort in
       let targ = transl_exp ~scopes arg_sort arg in
       if Array.length lbl.lbl_all == 1 then
         (* erase singleton unboxed records before lambda *)
@@ -718,8 +719,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
          representability on construction, [sort_of_jkind] will be unsafe here.
          Probably we should add a sort to `Texp_setfield` in the typed tree,
          then. *)
-      let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in
-      check_record_field_sort id.loc lbl_sort;
+      check_record_field_sort id.loc lbl.lbl_sort;
       let mode =
         Assignment (transl_modify_mode arg_mode)
       in
@@ -756,11 +756,12 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
            Psetmixedfield(lbl.lbl_pos, write, shape, mode)
         end
       in
-      Lprim(access, [transl_exp ~scopes Jkind.Sort.for_record arg;
-                     transl_exp ~scopes lbl_sort newval],
+      Lprim(access, [transl_exp ~scopes Jkind.Sort.Const.for_record arg;
+                     transl_exp ~scopes lbl.lbl_sort newval],
             of_location ~scopes e.exp_loc)
   | Texp_array (amut, element_sort, expr_list, alloc_mode) ->
       let mode = transl_alloc_mode alloc_mode in
+      let element_sort = Jkind.Sort.default_for_transl_and_get element_sort in
       let kind = array_kind e element_sort in
       let ll =
         transl_list ~scopes
@@ -836,6 +837,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
          type checker; both mutable and immutable arrays are created the same
          way *)
       let loc = of_location ~scopes e.exp_loc in
+      let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in
       let array_kind = Typeopt.array_kind e elt_sort in
       begin match array_kind with
       | Pgenarray | Paddrarray | Pintarray | Pfloatarray
@@ -848,22 +850,24 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
       Transl_array_comprehension.comprehension
         ~transl_exp ~scopes ~loc ~array_kind comp
   | Texp_ifthenelse(cond, ifso, Some ifnot) ->
-      Lifthenelse(transl_exp ~scopes Jkind.Sort.for_predef_value cond,
+      Lifthenelse(transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond,
                   event_before ~scopes ifso (transl_exp ~scopes sort ifso),
                   event_before ~scopes ifnot (transl_exp ~scopes sort ifnot),
                   layout_exp sort e)
   | Texp_ifthenelse(cond, ifso, None) ->
-      Lifthenelse(transl_exp ~scopes Jkind.Sort.for_predef_value cond,
+      Lifthenelse(transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond,
                   event_before ~scopes ifso (transl_exp ~scopes sort ifso),
                   lambda_unit,
                   Lambda.layout_unit)
   | Texp_sequence(expr1, sort', expr2) ->
+      let sort' = Jkind.Sort.default_for_transl_and_get sort' in
       sort_must_not_be_void expr1.exp_loc expr1.exp_type sort';
       Lsequence(transl_exp ~scopes sort' expr1,
                 event_before ~scopes expr2 (transl_exp ~scopes sort expr2))
   | Texp_while {wh_body; wh_body_sort; wh_cond} ->
+      let wh_body_sort = Jkind.Sort.default_for_transl_and_get wh_body_sort in
       sort_must_not_be_void wh_body.exp_loc wh_body.exp_type wh_body_sort;
-      let cond = transl_exp ~scopes Jkind.Sort.for_predef_value wh_cond in
+      let cond = transl_exp ~scopes Jkind.Sort.Const.for_predef_value wh_cond in
       let body = transl_exp ~scopes wh_body_sort wh_body in
       Lwhile {
         wh_cond = maybe_region_layout layout_int cond;
@@ -871,13 +875,14 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
                     (maybe_region_layout layout_unit body);
       }
   | Texp_for {for_id; for_from; for_to; for_dir; for_body; for_body_sort} ->
+      let for_body_sort = Jkind.Sort.default_for_transl_and_get for_body_sort in
       sort_must_not_be_void for_body.exp_loc for_body.exp_type for_body_sort;
       let body = transl_exp ~scopes for_body_sort for_body in
       Lfor {
         for_id;
         for_loc = of_location ~scopes e.exp_loc;
-        for_from = transl_exp ~scopes Jkind.Sort.for_predef_value for_from;
-        for_to = transl_exp ~scopes Jkind.Sort.for_predef_value for_to;
+        for_from = transl_exp ~scopes Jkind.Sort.Const.for_predef_value for_from;
+        for_to = transl_exp ~scopes Jkind.Sort.Const.for_predef_value for_to;
         for_dir;
         for_body = event_before ~scopes for_body
                      (maybe_region_layout layout_unit body);
@@ -890,10 +895,10 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
         let layout = layout_exp sort e in
         match met with
         | Tmeth_val id ->
-            let obj = transl_exp ~scopes Jkind.Sort.for_object expr in
+            let obj = transl_exp ~scopes Jkind.Sort.Const.for_object expr in
             Lsend (Self, Lvar id, obj, [], pos, mode, loc, layout)
         | Tmeth_name nm ->
-            let obj = transl_exp ~scopes Jkind.Sort.for_object expr in
+            let obj = transl_exp ~scopes Jkind.Sort.Const.for_object expr in
             let (tag, cache) = Translobj.meth obj nm in
             let kind = if cache = [] then Public else Cached in
             Lsend (kind, tag, obj, cache, pos, mode, loc, layout)
@@ -987,7 +992,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
       then lambda_unit
       else begin
         Lifthenelse
-          (transl_exp ~scopes Jkind.Sort.for_predef_value cond,
+          (transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond,
            lambda_unit,
            assert_failed loc ~scopes e,
            Lambda.layout_unit)
@@ -1000,14 +1005,14 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
       | `Constant_or_function ->
         (* A constant expr (of type <> float if [Config.flat_float_array] is
            true) gets compiled as itself. *)
-         transl_exp ~scopes Jkind.Sort.for_lazy_body e
+         transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e
       | `Float_that_cannot_be_shortcut ->
           (* We don't need to wrap with Popaque: this forward
              block will never be shortcutted since it points to a float
              and Config.flat_float_array is true. *)
          Lprim(Pmakeblock(Obj.forward_tag, Immutable, None,
                           alloc_heap),
-                [transl_exp ~scopes Jkind.Sort.for_lazy_body e],
+                [transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e],
                of_location ~scopes e.exp_loc)
       | `Identifier `Forward_value ->
          (* CR-someday mshinwell: Consider adding a new primitive
@@ -1019,11 +1024,11 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
          Lprim (Popaque Lambda.layout_lazy,
                 [Lprim(Pmakeblock(Obj.forward_tag, Immutable, None,
                                   alloc_heap),
-                       [transl_exp ~scopes Jkind.Sort.for_lazy_body e],
+                       [transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e],
                        of_location ~scopes e.exp_loc)],
                 of_location ~scopes e.exp_loc)
       | `Identifier `Other ->
-         transl_exp ~scopes Jkind.Sort.for_lazy_body e
+         transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e
       | `Other ->
          (* other cases compile to a lazy block holding a function.  The
             typechecker enforces that e has jkind value.  *)
@@ -1045,7 +1050,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
                             ~region:true
                             ~body:(maybe_region_layout
                                      Lambda.layout_lazy_contents
-                                     (transl_exp ~scopes Jkind.Sort.for_lazy_body e))
+                                     (transl_exp ~scopes Jkind.Sort.Const.for_lazy_body e))
          in
           Lprim(Pmakeblock(Config.lazy_tag, Mutable, None, alloc_heap), [fn],
                 of_location ~scopes e.exp_loc)
@@ -1061,6 +1066,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
           cl_attributes = [];
          }
   | Texp_letop{let_; ands; param; param_sort; body; body_sort; partial} ->
+      let body_sort = Jkind.Sort.default_for_transl_and_get body_sort in
       event_after ~scopes e
         (transl_letop ~scopes e.exp_loc e.exp_env let_ ands
            param param_sort body body_sort partial)
@@ -1093,7 +1099,7 @@ and transl_exp0 ~in_new_scope ~scopes sort e =
       end
   | Texp_probe {name; handler=exp; enabled_at_init} ->
     if !Clflags.native_code && !Clflags.probes then begin
-      let lam = transl_exp ~scopes Jkind.Sort.for_probe_body exp in
+      let lam = transl_exp ~scopes Jkind.Sort.Const.for_probe_body exp in
       let map =
         Ident.Set.fold (fun v acc -> Ident.Map.add v (Ident.rename v) acc)
           (free_variables lam)
@@ -1262,7 +1268,7 @@ and transl_guard ~scopes guard rhs_sort rhs =
   | None -> expr
   | Some cond ->
       event_before ~scopes cond
-        (Lifthenelse(transl_exp ~scopes Jkind.Sort.for_predef_value cond,
+        (Lifthenelse(transl_exp ~scopes Jkind.Sort.Const.for_predef_value cond,
                      expr, staticfail, layout))
 
 and transl_case ~scopes rhs_sort {c_lhs; c_guard; c_rhs} =
@@ -1397,6 +1403,8 @@ and transl_apply ~scopes
           let mode = transl_alloc_mode_r mode_closure in
           let arg_mode = transl_alloc_mode_l mode_arg in
           let ret_mode = transl_alloc_mode_l mode_ret in
+          let sort_arg = Jkind.Sort.default_for_transl_and_get sort_arg in
+          let sort_ret = Jkind.Sort.default_for_transl_and_get sort_ret in
           let result_layout = layout_of_sort (to_location loc) sort_ret in
           let body =
             build_apply handle [Lvar id_arg] loc Rc_normal ret_mode
@@ -1438,6 +1446,7 @@ and transl_apply ~scopes
          match arg with
          | Omitted _ as arg -> arg
          | Arg (exp, sort_arg) ->
+           let sort_arg = Jkind.Sort.default_for_transl_and_get sort_arg in
            Arg (transl_exp ~scopes sort_arg exp, layout_exp sort_arg exp))
       sargs
   in
@@ -1480,9 +1489,11 @@ and transl_tupled_function
       Tfunction_cases
         { fc_cases = { c_lhs; _ } :: _ as cases;
           fc_partial; fc_arg_mode; fc_arg_sort } ->
+        let fc_arg_sort = Jkind.Sort.default_for_transl_and_get fc_arg_sort in
         Some (cases, fc_partial, c_lhs, fc_arg_mode, fc_arg_sort)
     | [{ fp_kind = Tparam_pat pat; fp_partial; fp_mode; fp_sort }],
       Tfunction_body body ->
+        let fp_sort = Jkind.Sort.default_for_transl_and_get fp_sort in
         let case = { c_lhs = pat; c_guard = None; c_rhs = body } in
         Some ([ case ], fp_partial, pat, fp_mode, fp_sort)
     | _ -> None
@@ -1560,6 +1571,7 @@ and transl_curried_function ~scopes loc repr params body
     | Tfunction_cases
         { fc_cases; fc_partial; fc_param; fc_loc; fc_arg_sort; fc_arg_mode }
       ->
+        let fc_arg_sort = Jkind.Sort.default_for_transl_and_get fc_arg_sort in
         let arg_layout =
           match fc_cases with
           | { c_lhs } :: _ -> layout_pat fc_arg_sort c_lhs
@@ -1601,6 +1613,7 @@ and transl_curried_function ~scopes loc repr params body
           | Tparam_optional_default (pat, expr, _) ->
               expr.exp_env, Predef.type_option expr.exp_type, Translattribute.transl_param_attributes pat
         in
+        let fp_sort = Jkind.Sort.default_for_transl_and_get fp_sort in
         let arg_layout = layout arg_env fp_loc fp_sort arg_type in
         let arg_mode = transl_alloc_mode_l fp_mode in
         let param =
@@ -1619,6 +1632,7 @@ and transl_curried_function ~scopes loc repr params body
                 ~arg_sort:fp_sort ~arg_layout
                 ~return_layout
           | Tparam_optional_default (pat, default_arg, default_arg_sort) ->
+              let default_arg_sort = Jkind.Sort.default_for_transl_and_get default_arg_sort in
               let default_arg =
                 event_before ~scopes default_arg
                   (transl_exp ~scopes default_arg_sort default_arg)
@@ -1810,6 +1824,7 @@ and transl_let ~scopes ~return_layout ?(add_regions=false) ?(in_structure=false)
           fun body -> body
       | {vb_pat=pat; vb_expr=expr; vb_sort=sort; vb_rec_kind=_; vb_attributes; vb_loc}
         :: rem ->
+          let sort = Jkind.Sort.default_for_transl_and_get sort in
           let lam =
             transl_bound_exp ~scopes ~in_structure pat sort expr vb_loc vb_attributes
           in
@@ -1832,6 +1847,7 @@ and transl_let ~scopes ~return_layout ?(add_regions=false) ?(in_structure=false)
       let transl_case
             {vb_expr=expr; vb_sort; vb_attributes; vb_rec_kind = rkind;
              vb_loc; vb_pat} id =
+        let vb_sort = Jkind.Sort.default_for_transl_and_get vb_sort in
         let def =
           transl_bound_exp ~scopes ~in_structure vb_pat vb_sort expr vb_loc vb_attributes
         in
@@ -1844,7 +1860,7 @@ and transl_let ~scopes ~return_layout ?(add_regions=false) ?(in_structure=false)
 
 and transl_setinstvar ~scopes loc self var expr =
   Lprim(Psetfield_computed (maybe_pointer expr, Assignment modify_heap),
-    [self; var; transl_exp ~scopes Jkind.Sort.for_instance_var expr], loc)
+    [self; var; transl_exp ~scopes Jkind.Sort.Const.for_instance_var expr], loc)
 
 (* CR layouts v5: Invariant - this is only called on values.  Relax that. *)
 and transl_record ~scopes loc env mode fields repres opt_init_expr =
@@ -1862,8 +1878,7 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr =
     let copy_id = Ident.create_local "newrecord" in
     let update_field cont (lbl, definition) =
       (* CR layouts v5: allow more unboxed types here. *)
-      let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in
-      check_record_field_sort lbl.lbl_loc lbl_sort;
+      check_record_field_sort lbl.lbl_loc lbl.lbl_sort;
       match definition with
       | Kept _ -> cont
       | Overridden (_lid, expr) ->
@@ -1910,14 +1925,14 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr =
               end
           in
           Lsequence(Lprim(upd, [Lvar copy_id;
-                                transl_exp ~scopes lbl_sort expr],
+                                transl_exp ~scopes lbl.lbl_sort expr],
                           of_location ~scopes loc),
                     cont)
     in
     assert (is_heap_mode (Option.get mode)); (* Pduprecord must be Alloc_heap and not unboxed *)
     Llet(Strict, Lambda.layout_block, copy_id,
          Lprim(Pduprecord (repres, size),
-               [transl_exp ~scopes Jkind.Sort.for_record init_expr],
+               [transl_exp ~scopes Jkind.Sort.Const.for_record init_expr],
                of_location ~scopes loc),
          Array.fold_left update_field (Lvar copy_id) fields)
   | Some _ | None ->
@@ -1928,14 +1943,9 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr =
     let lv =
       Array.mapi
         (fun i (lbl, definition) ->
-           (* CR layouts v2.5: When we allow `any` in record fields and check
-              representability on construction, [sort_of_layout] will be unsafe
-              here.  Probably we should add sorts to record construction in the
-              typed tree, then. *)
-           let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in
            match definition with
            | Kept (typ, mut, _) ->
-               let field_layout = layout env lbl.lbl_loc lbl_sort typ in
+               let field_layout = layout env lbl.lbl_loc lbl.lbl_sort typ in
                let sem =
                  if Types.is_mutable mut then Reads_vary else Reads_agree
                in
@@ -1991,8 +2001,8 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr =
                      of_location ~scopes loc),
                field_layout
            | Overridden (_lid, expr) ->
-               let field_layout = layout_exp lbl_sort expr in
-               transl_exp ~scopes lbl_sort expr, field_layout)
+               let field_layout = layout_exp lbl.lbl_sort expr in
+               transl_exp ~scopes lbl.lbl_sort expr, field_layout)
         fields
     in
     let ll, shape = List.split (Array.to_list lv) in
@@ -2077,7 +2087,7 @@ and transl_record ~scopes loc env mode fields repres opt_init_expr =
     begin match opt_init_expr with
       None -> lam
     | Some (init_expr, _) -> Llet(Strict, Lambda.layout_block, init_id,
-                             transl_exp ~scopes Jkind.Sort.for_record init_expr, lam)
+                             transl_exp ~scopes Jkind.Sort.Const.for_record init_expr, lam)
     end
 
 and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr =
@@ -2087,10 +2097,9 @@ and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr =
     let shape =
       Array.map
         (fun (lbl, definition) ->
-            let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in
             match definition with
-            | Kept (typ, _mut, _) -> layout env lbl.lbl_loc lbl_sort typ
-            | Overridden (_lid, expr) -> layout_exp lbl_sort expr)
+            | Kept (typ, _mut, _) -> layout env lbl.lbl_loc lbl.lbl_sort typ
+            | Overridden (_lid, expr) -> layout_exp lbl.lbl_sort expr)
         fields
       |> Array.to_list
     in
@@ -2102,8 +2111,7 @@ and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr =
               let access = Punboxed_product_field (i, shape) in
               Lprim (access, [Lvar init_id], of_location ~scopes loc)
             | Overridden (_lid, expr) ->
-              let lbl_sort = Jkind.sort_of_jkind lbl.lbl_jkind in
-              transl_exp ~scopes lbl_sort expr)
+              transl_exp ~scopes lbl.lbl_sort expr)
         fields
       |> Array.to_list
     in
@@ -2114,6 +2122,9 @@ and transl_record_unboxed_product ~scopes loc env fields repres opt_init_expr =
     match opt_init_expr with
     | None -> lam
     | Some (init_expr, init_expr_sort) ->
+      let init_expr_sort =
+        Jkind.Sort.default_for_transl_and_get init_expr_sort
+      in
       let layout = layout_exp init_expr_sort init_expr in
       let exp = transl_exp ~scopes init_expr_sort init_expr in
       Llet(Strict, layout, init_id, exp, lam)
@@ -2210,13 +2221,13 @@ and transl_match ~scopes ~arg_sort ~return_sort e arg pat_expr_list partial =
       assert (static_handlers = []);
       let mode = transl_alloc_mode alloc_mode in
       let argl =
-        List.map (fun (_, a) -> (a, Jkind.Sort.for_tuple_element)) argl
+        List.map (fun (_, a) -> (a, Jkind.Sort.Const.for_tuple_element)) argl
       in
       Matching.for_multiple_match ~scopes ~return_layout e.exp_loc
         (transl_list_with_layout ~scopes argl) mode val_cases partial
     | {exp_desc = Texp_tuple (argl, alloc_mode)}, _ :: _ ->
         let argl =
-          List.map (fun (_, a) -> (a, Jkind.Sort.for_tuple_element)) argl
+          List.map (fun (_, a) -> (a, Jkind.Sort.Const.for_tuple_element)) argl
         in
         let val_ids, lvars =
           List.map
@@ -2260,10 +2271,16 @@ and transl_letop ~scopes loc env let_ ands param param_sort case case_sort
           transl_ident (of_location ~scopes and_.bop_op_name.loc) env
             and_.bop_op_type and_.bop_op_path and_.bop_op_val Id_value
         in
-        let exp = transl_exp ~scopes and_.bop_exp_sort and_.bop_exp in
-        let right_layout = layout_exp and_.bop_exp_sort and_.bop_exp in
+        let and_bop_exp_sort =
+          Jkind.Sort.default_for_transl_and_get and_.bop_exp_sort
+        in
+        let and_bop_op_return_sort =
+          Jkind.Sort.default_for_transl_and_get and_.bop_op_return_sort
+        in
+        let exp = transl_exp ~scopes and_bop_exp_sort and_.bop_exp in
+        let right_layout = layout_exp and_bop_exp_sort and_.bop_exp in
         let result_layout =
-          function2_return_layout env and_.bop_loc and_.bop_op_return_sort
+          function2_return_layout env and_.bop_loc and_bop_op_return_sort
             and_.bop_op_type
         in
         let lam =
@@ -2287,9 +2304,15 @@ and transl_letop ~scopes loc env let_ ands param param_sort case case_sort
     transl_ident (of_location ~scopes let_.bop_op_name.loc) env
       let_.bop_op_type let_.bop_op_path let_.bop_op_val Id_value
   in
+  let let_bop_exp_sort =
+    Jkind.Sort.default_for_transl_and_get let_.bop_exp_sort
+  in
+  let let_bop_op_return_sort =
+    Jkind.Sort.default_for_transl_and_get let_.bop_op_return_sort
+  in
   let exp =
-    loop (layout_exp let_.bop_exp_sort let_.bop_exp)
-      (transl_exp ~scopes let_.bop_exp_sort let_.bop_exp) ands
+    loop (layout_exp let_bop_exp_sort let_.bop_exp)
+      (transl_exp ~scopes let_bop_exp_sort let_.bop_exp) ands
   in
   let func =
     let return_mode = alloc_heap (* XXX fixme: use result of is_function_type *) in
@@ -2320,7 +2343,7 @@ and transl_letop ~scopes loc env let_ ands param param_sort case case_sort
     ap_func = op;
     ap_args=[exp; func];
     ap_result_layout=
-      function2_return_layout env let_.bop_loc let_.bop_op_return_sort
+      function2_return_layout env let_.bop_loc let_bop_op_return_sort
         let_.bop_op_type;
     ap_region_close=Rc_normal;
     ap_mode=alloc_heap;
diff --git a/lambda/translcore.mli b/lambda/translcore.mli
index f50cf21e65f..bf1caa95227 100644
--- a/lambda/translcore.mli
+++ b/lambda/translcore.mli
@@ -24,7 +24,7 @@ open Debuginfo.Scoped_location
 val pure_module : module_expr -> let_kind
 
 (* Used for translating Alloc_heap values in classes and modules. *)
-val transl_exp: scopes:scopes -> Jkind.sort -> expression -> lambda
+val transl_exp: scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda
 val transl_apply: scopes:scopes
                   -> ?tailcall:tailcall_attribute
                   -> ?inlined:inlined_attribute
@@ -42,7 +42,7 @@ val transl_extension_constructor: scopes:scopes ->
   Env.t -> Longident.t option ->
   extension_constructor -> lambda
 
-val transl_scoped_exp : scopes:scopes -> Jkind.sort -> expression -> lambda
+val transl_scoped_exp : scopes:scopes -> Jkind.Sort.Const.t -> expression -> lambda
 
 type error =
     Free_super_var
diff --git a/lambda/translmod.ml b/lambda/translmod.ml
index 09a80649091..d9869c2532a 100644
--- a/lambda/translmod.ml
+++ b/lambda/translmod.ml
@@ -39,7 +39,7 @@ type unsafe_info =
 type error =
   Circular_dependency of (Ident.t * unsafe_info) list
 | Conflicting_inline_attributes
-| Non_value_jkind of type_expr * Jkind.sort
+| Non_value_jkind of type_expr * Jkind.Sort.Const.t
 | Instantiating_packed of Compilation_unit.t
 
 exception Error of Location.t * error
@@ -56,7 +56,7 @@ exception Error of Location.t * error
    When this sanity check is removed, consider whether it must be replaced with
    some defaulting. *)
 let sort_must_not_be_void loc ty sort =
-  if Jkind.Sort.is_void_defaulting sort then
+  if Jkind.Sort.Const.(equal void sort) then
     raise (Error (loc, Non_value_jkind (ty, sort)))
 
 let cons_opt x_opt xs =
@@ -607,7 +607,7 @@ and transl_module ~scopes cc rootpath mexp =
       transl_module ~scopes (compose_coercions cc ccarg) rootpath arg
   | Tmod_unpack(arg, _) ->
       apply_coercion loc Strict cc
-        (Translcore.transl_exp ~scopes Jkind.Sort.for_module arg)
+        (Translcore.transl_exp ~scopes Jkind.Sort.Const.for_module arg)
 
 and transl_apply ~scopes ~loc ~cc mod_env funct translated_arg =
   let inlined_attribute =
@@ -694,6 +694,7 @@ and transl_structure ~scopes loc fields cc rootpath final_env = function
           let body, size =
             transl_structure ~scopes loc fields cc rootpath final_env rem
           in
+          let sort = Jkind.Sort.default_for_transl_and_get sort in
           sort_must_not_be_void expr.exp_loc expr.exp_type sort;
           Lsequence(transl_exp ~scopes sort expr, body), size
       | Tstr_value(rec_flag, pat_expr_list) ->
@@ -1225,6 +1226,7 @@ let transl_store_structure ~scopes glob map prims aliases str =
     | item :: rem ->
         match item.str_desc with
         | Tstr_eval (expr, sort, _attrs) ->
+            let sort = Jkind.Sort.default_for_transl_and_get sort in
             sort_must_not_be_void expr.exp_loc expr.exp_type sort;
             Lsequence(Lambda.subst no_env_update subst
                         (transl_exp ~scopes sort expr),
@@ -1652,6 +1654,7 @@ let transl_store_structure_gen
       match str with
       | [ { str_desc = Tstr_eval (expr, sort, _attrs) } ] when topl ->
         assert (size = 0);
+        let sort = Jkind.Sort.default_for_transl_and_get sort in
         sort_must_not_be_void expr.exp_loc expr.exp_type sort;
         Lambda.subst (fun _ _ env -> env) !transl_store_subst
           (transl_exp ~scopes sort expr)
@@ -1779,11 +1782,13 @@ let transl_toplevel_item ~scopes item =
        Otherwise, the normal compilation would result in a Lsequence returning
        unit. *)
     Tstr_eval (expr, sort, _) ->
+      let sort = Jkind.Sort.default_for_transl_and_get sort in
       sort_must_not_be_void expr.exp_loc expr.exp_type sort;
       transl_exp ~scopes sort expr
   | Tstr_value(Nonrecursive,
                [{vb_pat = {pat_desc=Tpat_any}; vb_expr = expr;
                  vb_sort = sort}]) ->
+      let sort = Jkind.Sort.default_for_transl_and_get sort in
       transl_exp ~scopes sort expr
   | Tstr_value(rec_flag, pat_expr_list) ->
       let idents = let_bound_idents pat_expr_list in
@@ -2142,7 +2147,7 @@ let report_error loc = function
       Location.errorf
         "Non-value sort %a detected in [translmod] in type %a:@ \
          Please report this error to the Jane Street compilers team."
-        Jkind.Sort.format sort
+        Jkind.Sort.Const.format sort
         Printtyp.type_expr ty
   | Instantiating_packed comp_unit ->
       Location.errorf ~loc
diff --git a/lambda/translmod.mli b/lambda/translmod.mli
index d519998fd78..66c24d9d6a7 100644
--- a/lambda/translmod.mli
+++ b/lambda/translmod.mli
@@ -81,7 +81,7 @@ type unsafe_info =
 type error =
   Circular_dependency of (Ident.t * unsafe_info) list
 | Conflicting_inline_attributes
-| Non_value_jkind of Types.type_expr * Jkind.sort
+| Non_value_jkind of Types.type_expr * Jkind.Sort.Const.t
 | Instantiating_packed of Compilation_unit.t
 
 exception Error of Location.t * error
diff --git a/lambda/translprim.ml b/lambda/translprim.ml
index 97611290c8c..054701e0d85 100644
--- a/lambda/translprim.ml
+++ b/lambda/translprim.ml
@@ -1274,7 +1274,7 @@ let specialize_primitive env loc ty ~has_constant_constructor prim =
       let shape =
         List.map (fun typ ->
           Lambda.must_be_value (Typeopt.layout env (to_location loc)
-                                  Jkind.Sort.for_block_element typ))
+                                  Jkind.Sort.Const.for_block_element typ))
           fields
       in
       let useful = List.exists (fun knd -> knd <> Lambda.generic_value) shape in
@@ -1634,10 +1634,7 @@ let transl_primitive loc p env ty ~poly_mode ~poly_sort path =
   let rec make_params ty repr_args repr_res =
     match repr_args, repr_res with
     | [], (_, res_repr) ->
-      let res_sort =
-        Jkind.Sort.of_const
-          (sort_of_native_repr res_repr ~poly_sort)
-      in
+      let res_sort = sort_of_native_repr res_repr ~poly_sort in
       [], Typeopt.layout env error_loc res_sort ty
     | (((_, arg_repr) as arg) :: repr_args), _ ->
       match Typeopt.is_function_type env ty with
@@ -1645,10 +1642,7 @@ let transl_primitive loc p env ty ~poly_mode ~poly_sort path =
           Misc.fatal_errorf "Primitive %s type does not correspond to arity"
             (Primitive.byte_name p)
       | Some (arg_ty, ret_ty) ->
-          let arg_sort =
-            Jkind.Sort.of_const
-              (sort_of_native_repr arg_repr ~poly_sort)
-          in
+          let arg_sort = sort_of_native_repr arg_repr ~poly_sort in
           let arg_layout =
             Typeopt.layout env error_loc arg_sort arg_ty
           in
diff --git a/middle_end/flambda2/from_lambda/closure_conversion.ml b/middle_end/flambda2/from_lambda/closure_conversion.ml
index 46c8fc0a2c7..f2fbb88756b 100644
--- a/middle_end/flambda2/from_lambda/closure_conversion.ml
+++ b/middle_end/flambda2/from_lambda/closure_conversion.ml
@@ -527,7 +527,7 @@ let unarize_extern_repr alloc_mode (extern_repr : Lambda.extern_repr) =
   match extern_repr with
   | Same_as_ocaml_repr (Base _ as sort) ->
     let kind =
-      Typeopt.layout_of_const_sort sort
+      Typeopt.layout_of_non_void_sort sort
       |> K.With_subkind.from_lambda_values_and_unboxed_numbers_only
       |> K.With_subkind.kind
     in
diff --git a/ocamldoc/odoc_sig.ml b/ocamldoc/odoc_sig.ml
index b24c1ce2e9d..cc75da1715d 100644
--- a/ocamldoc/odoc_sig.ml
+++ b/ocamldoc/odoc_sig.ml
@@ -496,7 +496,7 @@ module Analyser =
           { Typedtree.ld_id; ld_mutable; ld_type; ld_loc; ld_attributes } =
         get_field env comments @@
         {Types.ld_id; ld_mutable; ld_modalities = Mode.Modality.Value.Const.id;
-         ld_jkind=Jkind.Builtin.any ~why:Dummy_jkind (* ignored *);
+         ld_sort=Jkind.Sort.Const.void (* ignored *);
          ld_type=ld_type.Typedtree.ctyp_type;
          ld_loc; ld_attributes; ld_uid=Types.Uid.internal_not_actually_unique} in
       let open Typedtree in
diff --git a/otherlibs/stdlib_alpha/capsule.ml b/otherlibs/stdlib_alpha/capsule.ml
index f7778126658..c1238df02d5 100644
--- a/otherlibs/stdlib_alpha/capsule.ml
+++ b/otherlibs/stdlib_alpha/capsule.ml
@@ -121,6 +121,9 @@ end
    it never returns is also [portable] *)
 external reraise : exn -> 'a @ portable @@ portable = "%reraise"
 
+external raise_with_backtrace :
+  exn -> Printexc.raw_backtrace -> 'a @ portable @@ portable = "%raise_with_backtrace"
+
 module Data = struct
   type ('a, 'k) t : value mod portable uncontended
 
@@ -349,6 +352,21 @@ module Rwlock = struct
 
 end
 
+module Condition = struct
+
+  type 'k t : value mod portable uncontended
+
+  external create : unit -> 'k t @@ portable = "caml_ml_condition_new"
+  external wait : 'k t -> M.t -> unit @@ portable = "caml_ml_condition_wait"
+  external signal : 'k t -> unit @@ portable = "caml_ml_condition_signal"
+  external broadcast : 'k t -> unit @@ portable = "caml_ml_condition_broadcast"
+
+  let wait t (mut : 'k Mutex.t) _password =
+    (* [mut] is locked, so we know it is not poisoned. *)
+    wait t mut.mutex
+
+end
+
 let create_with_mutex () =
   let (P name) = Name.make () in
   Mutex.P { name; mutex = M.create (); poisoned = false }
@@ -356,3 +374,17 @@ let create_with_mutex () =
 let create_with_rwlock () =
   let (P name) = Name.make () in
   Rwlock.P { name; rwlock = Rw.create (); poisoned = false }
+
+exception Protected : 'k Mutex.t * (exn, 'k) Data.t -> exn
+
+(* CR-soon mslater: replace with portable stdlib *)
+let get_raw_backtrace : unit -> Printexc.raw_backtrace @@ portable =
+  O.magic O.magic Printexc.get_raw_backtrace
+
+let protect f =
+  try f () with
+  | exn ->
+    let (P mut) = create_with_mutex () in
+    raise_with_backtrace (Protected (mut, Data.unsafe_mk exn)) (get_raw_backtrace ())
+  ;;
+
diff --git a/otherlibs/stdlib_alpha/capsule.mli b/otherlibs/stdlib_alpha/capsule.mli
index 51bccbc855e..4dfc63af086 100644
--- a/otherlibs/stdlib_alpha/capsule.mli
+++ b/otherlibs/stdlib_alpha/capsule.mli
@@ -147,9 +147,6 @@ val access_shared :
     with a shared {!Access.t} for ['k]. The result is within ['k] so it
     must be [portable] and it is marked [contended]. *)
 
-(** Mutual exclusion primtives for controlling uncontended access to a capsule.
-
-    Requires OCaml 5 runtime. *)
 module Mutex : sig
 
     type 'k t : value mod portable uncontended
@@ -236,6 +233,40 @@ module Rwlock : sig
         lock as poisoned. *)
 end
 
+module Condition : sig
+
+    type 'k t : value mod portable uncontended
+    (** ['k t] is the type of a condition variable associated with the capsule ['k].
+        This condition may only be used with the matching ['k Mutex.t]. *)
+
+  val create : unit -> 'k t @@ portable
+  (** [create ()] creates and returns a new condition variable.
+      This condition variable is associated with the matching ['k Mutex.t]
+      and with a certain property {i P} that is protected by the mutex. *)
+
+  val wait : 'k t -> 'k Mutex.t -> 'k Password.t @ local -> unit @@ portable
+  (** [wait c m] atomically unlocks the mutex [m] and suspends the
+      current thread on the condition variable [c]. This thread can
+      later be woken up after the condition variable [c] has been signaled
+      via {!signal} or {!broadcast}; however, it can also be woken up for
+      no reason. The mutex [m] is locked again before [wait] returns. One
+      cannot assume that the property {i P} associated with the condition
+      variable [c] holds when [wait] returns; one must explicitly test
+      whether {i P} holds after calling [wait]. *)
+
+  val signal : 'k t -> unit @@ portable
+  (** [signal c] wakes up one of the threads waiting on the condition
+      variable [c], if there is one. If there is none, this call has no effect.
+      It is recommended to call [signal c] inside a critical section, that is,
+      while the mutex [m] associated with [c] is locked. *)
+
+  val broadcast : 'k t -> unit @@ portable
+  (** [broadcast c] wakes up all threads waiting on the condition
+      variable [c]. If there are none, this call has no effect.
+      It is recommended to call [broadcast c] inside a critical section,
+      that is, while the mutex [m] associated with [c] is locked. *)
+end
+
 val create_with_mutex : unit -> Mutex.packed @@ portable
 (** [create_with_mutex ()] creates a new capsule with an associated mutex. *)
 
@@ -373,7 +404,6 @@ module Data : sig
         so it must be [portable] and it is marked [contended]. Since [nonportable]
         functions may enclose [uncontended] (and thus write) access to data,
         ['a] must cross [portability] *)
-
 end
 
 exception Encapsulated : 'k Name.t * (exn, 'k) Data.t -> exn
@@ -381,3 +411,16 @@ exception Encapsulated : 'k Name.t * (exn, 'k) Data.t -> exn
     exception, it is wrapped in [Encapsulated] to avoid leaking access to
     the data. The [Name.t] can be used to associate the [Data.t] with a
     particular [Password.t] or [Mutex.t]. *)
+
+exception Protected : 'k Mutex.t * (exn, 'k) Data.t -> exn
+(** If a function passed to [protect] raises an exception, it is wrapped
+    in [Protected] to provide access to the capsule in which the function ran. *)
+(* CR-soon mslater: this should return a key, not a mutex. *)
+
+val protect
+  :  (unit -> 'a @ portable contended) @ local portable
+  -> 'a @ portable contended
+  @@ portable
+(** [protect f] runs [f] in a fresh capsule. If [f] returns normally, [protect]
+    merges this capsule into the caller's capsule. If [f] raises, [protect]
+    raises [Protected], giving the caller access to the encapsulated exception. *)
diff --git a/otherlibs/stdlib_beta/.ocamlformat-enable b/otherlibs/stdlib_beta/.ocamlformat-enable
index 6a5f81486bb..038c19ee974 100644
--- a/otherlibs/stdlib_beta/.ocamlformat-enable
+++ b/otherlibs/stdlib_beta/.ocamlformat-enable
@@ -1,6 +1,7 @@
-stdlib_beta.mli
-stdlib_beta.ml
 int8.ml
 int8.mli
 int16.ml
 int16.mli
+int_wrapper.ml
+stdlib_beta.ml
+stdlib_beta.mli
diff --git a/otherlibs/stdlib_beta/int16.ml b/otherlibs/stdlib_beta/int16.ml
index 81dac52f206..e88be132034 100644
--- a/otherlibs/stdlib_beta/int16.ml
+++ b/otherlibs/stdlib_beta/int16.ml
@@ -12,81 +12,19 @@
 (*                                                                        *)
 (**************************************************************************)
 
-open! Stdlib
-
-type t = int16
-
 [@@@ocaml.flambda_o3]
 
-external to_int : t -> int = "%identity"
-
-external unsafe_of_int : int -> t = "%identity"
-
-let int_size = 16
-
-let max_int = (1 lsl (int_size - 1)) - 1
-
-let min_int = -1 lsl (int_size - 1)
-
-let mask = (1 lsl int_size) - 1
-
-let[@inline] of_int i = unsafe_of_int (((i - min_int) land mask) + min_int)
-
-let zero = of_int 0
-
-let one = of_int 1
-
-let minus_one = of_int (-1)
-
-let neg x = of_int (Int.neg (to_int x))
-
-let add x y = of_int (Int.add (to_int x) (to_int y))
-
-let sub x y = of_int (Int.sub (to_int x) (to_int y))
-
-let mul x y = of_int (Int.mul (to_int x) (to_int y))
-
-let div x y = of_int (Int.div (to_int x) (to_int y))
-
-let rem x y = of_int (Int.rem (to_int x) (to_int y))
-
-let succ x = of_int (Int.succ (to_int x))
-
-let pred x = of_int (Int.pred (to_int x))
-
-let abs x = of_int (Int.abs (to_int x))
-
-let logand x y = unsafe_of_int (Int.logand (to_int x) (to_int y))
-
-let logor x y = unsafe_of_int (Int.logor (to_int x) (to_int y))
-
-let logxor x y = unsafe_of_int (Int.logxor (to_int x) (to_int y))
-
-let lognot x = unsafe_of_int (Int.lognot (to_int x))
-
-let shift_left x y = of_int (Int.shift_left (to_int x) y)
-
-let shift_right x y = of_int (Int.shift_right (to_int x) y)
-
-let shift_right_logical x y =
-  of_int (Int.shift_right_logical (to_int x land mask) y)
-
-let equal x y = Int.equal (to_int x) (to_int y)
-
-let compare x y = Int.compare (to_int x) (to_int y)
-
-let min x y = if to_int x > to_int y then y else x
-
-let max x y = if to_int x < to_int y then y else x
-
-let of_float f =
-  let i = Int.of_float f in
-  if min_int <= i && i <= max_int then unsafe_of_int i else zero
+type t = int16
 
-let to_float t = Int.to_float (to_int t)
+include
+  Int_wrapper.Make
+    (Int_wrapper)
+    (struct
+      type nonrec t = t
 
-let to_string t = Int.to_string (to_int t)
+      let int_size = 16
 
-let max_int = of_int max_int
+      external inject : t -> int = "%identity"
 
-let min_int = of_int min_int
+      external unchecked_project : int -> t = "%identity"
+    end)
diff --git a/otherlibs/stdlib_beta/int16.mli b/otherlibs/stdlib_beta/int16.mli
index 14864cafb43..bca8493bb13 100644
--- a/otherlibs/stdlib_beta/int16.mli
+++ b/otherlibs/stdlib_beta/int16.mli
@@ -2,11 +2,9 @@
 (*                                                                        *)
 (*                                 OCaml                                  *)
 (*                                                                        *)
-(*                         The OCaml programmers                          *)
 (*                 Jacob Van Buren, Jane Street, New York                 *)
 (*                                                                        *)
-(*   Copyright 2018 Institut National de Recherche en Informatique et     *)
-(*     en Automatique.                                                    *)
+(*   Copyright 2024 Jane Street Group LLC                                 *)
 (*                                                                        *)
 (*   All rights reserved.  This file is distributed under the terms of    *)
 (*   the GNU Lesser General Public License version 2.1, with the          *)
@@ -16,121 +14,11 @@
 
 (** Signed 16-bit integer values.
 
-    These integers are {16} bits wide and use two's complement
-    representation. All operations are taken modulo
-    2{^16}. They do not fail on overflow. *)
-
-(** {1:ints 16-bit Integers} *)
+    These integers are {16} bits wide and use two's complement representation.
+    All operations are taken modulo 2{^16}. They do not fail on overflow. *)
 
 (** The type for 16-bit integer values. *)
-type t = int16
-
-(** [int_size] is the number of bits in an integer (i.e., 16). *)
-val int_size : int
-
-(** [zero] is the integer [0]. *)
-val zero : int16
-
-(** [one] is the integer [1]. *)
-val one : int16
-
-(** [minus_one] is the integer [-1]. *)
-val minus_one : int16
-
-(** [neg x] is [~-x]. *)
-val neg : int16 -> int16
-
-(** [add x y] is the addition [x + y]. *)
-val add : int16 -> int16 -> int16
-
-(** [sub x y] is the subtraction [x - y]. *)
-val sub : int16 -> int16 -> int16
-
-(** [mul x y] is the multiplication [x * y]. *)
-val mul : int16 -> int16 -> int16
-
-(** [div x y] is the division [x / y]. See {!Stdlib.( / )} for details. *)
-val div : int16 -> int16 -> int16
-
-(** [rem x y] is the remainder [x mod y]. See {!Stdlib.( mod )} for details. *)
-val rem : int16 -> int16 -> int16
-
-(** [succ x] is [add x 1]. *)
-val succ : int16 -> int16
-
-(** [pred x] is [sub x 1]. *)
-val pred : int16 -> int16
-
-(** [abs x] is the absolute value of [x]. That is [x] if [x] is positive
-    and [neg x] if [x] is negative. {b Warning.} This may be negative if
-    the argument is {!min_int}. *)
-val abs : int16 -> int16
-
-(** [max_int] is the greatest representable integer,
-    [2{^[16 - 1]} - 1]. *)
-val max_int : int16
-
-(** [min_int] is the smallest representable integer,
-    [-2{^[16 - 1]}]. *)
-val min_int : int16
-
-(** [logand x y] is the bitwise logical and of [x] and [y]. *)
-val logand : int16 -> int16 -> int16
-
-(** [logor x y] is the bitwise logical or of [x] and [y]. *)
-val logor : int16 -> int16 -> int16
-
-(** [logxor x y] is the bitwise logical exclusive or of [x] and [y]. *)
-val logxor : int16 -> int16 -> int16
-
-(** [lognot x] is the bitwise logical negation of [x]. *)
-val lognot : int16 -> int16
-
-(** [shift_left x n] shifts [x] to the left by [n] bits. The result
-    is unspecified if [n < 0] or [n > ]{!16}. *)
-val shift_left : int16 -> int -> int16
-
-(** [shift_right x n] shifts [x] to the right by [n] bits. This is an
-    arithmetic shift: the sign bit of [x] is replicated and inserted
-    in the vacated bits. The result is unspecified if [n < 0] or
-    [n > ]{!16}. *)
-val shift_right : int16 -> int -> int16
-
-(** [shift_right x n] shifts [x] to the right by [n] bits. This is a
-    logical shift: zeroes are inserted in the vacated bits regardless
-    of the sign of [x]. The result is unspecified if [n < 0] or
-    [n > ]{!16}. *)
-val shift_right_logical : int16 -> int -> int16
-
-(** {1:preds Predicates and comparisons} *)
-
-(** [equal x y] is [true] if and only if [x = y]. *)
-val equal : int16 -> int16 -> bool
-
-(** [compare x y] is {!Stdlib.compare}[ x y] but more efficient. *)
-val compare : int16 -> int16 -> int
-
-(** Return the smaller of the two arguments. *)
-val min : int16 -> int16 -> int16
-
-(** Return the greater of the two arguments. *)
-val max : int16 -> int16 -> int16
-
-(** {1:convert Converting} *)
-
-(** [to_int x] is [x] as an {!int}. *)
-val to_int : int16 -> int
-
-(** [of_int x] represents [x] as an 16-bit integer. *)
-val of_int : int -> int16
-
-(** [to_float x] is [x] as a floating point number. *)
-val to_float : int16 -> float
-
-(** [of_float x] truncates [x] to an integer. The result is
-    unspecified if the argument is [nan] or falls outside the range of
-    representable integers. *)
-val of_float : float -> int16
+type t = int16 [@@immediate]
 
-(** [to_string x] is the written representation of [x] in decimal. *)
-val to_string : int16 -> string
+(** @inline *)
+include Int_wrapper.S with type t := int16
diff --git a/otherlibs/stdlib_beta/int8.ml b/otherlibs/stdlib_beta/int8.ml
index 6c3c7b4f2be..e0b1d5c6157 100644
--- a/otherlibs/stdlib_beta/int8.ml
+++ b/otherlibs/stdlib_beta/int8.ml
@@ -12,81 +12,19 @@
 (*                                                                        *)
 (**************************************************************************)
 
-open! Stdlib
-
-type t = int8
-
 [@@@ocaml.flambda_o3]
 
-external to_int : t -> int = "%identity"
-
-external unsafe_of_int : int -> t = "%identity"
-
-let int_size = 8
-
-let max_int = (1 lsl (int_size - 1)) - 1
-
-let min_int = -1 lsl (int_size - 1)
-
-let mask = (1 lsl int_size) - 1
-
-let[@inline] of_int i = unsafe_of_int (((i - min_int) land mask) + min_int)
-
-let zero = of_int 0
-
-let one = of_int 1
-
-let minus_one = of_int (-1)
-
-let neg x = of_int (Int.neg (to_int x))
-
-let add x y = of_int (Int.add (to_int x) (to_int y))
-
-let sub x y = of_int (Int.sub (to_int x) (to_int y))
-
-let mul x y = of_int (Int.mul (to_int x) (to_int y))
-
-let div x y = of_int (Int.div (to_int x) (to_int y))
-
-let rem x y = of_int (Int.rem (to_int x) (to_int y))
-
-let succ x = of_int (Int.succ (to_int x))
-
-let pred x = of_int (Int.pred (to_int x))
-
-let abs x = of_int (Int.abs (to_int x))
-
-let logand x y = unsafe_of_int (Int.logand (to_int x) (to_int y))
-
-let logor x y = unsafe_of_int (Int.logor (to_int x) (to_int y))
-
-let logxor x y = unsafe_of_int (Int.logxor (to_int x) (to_int y))
-
-let lognot x = unsafe_of_int (Int.lognot (to_int x))
-
-let shift_left x y = of_int (Int.shift_left (to_int x) y)
-
-let shift_right x y = of_int (Int.shift_right (to_int x) y)
-
-let shift_right_logical x y =
-  of_int (Int.shift_right_logical (to_int x land mask) y)
-
-let equal x y = Int.equal (to_int x) (to_int y)
-
-let compare x y = Int.compare (to_int x) (to_int y)
-
-let min x y = if to_int x > to_int y then y else x
-
-let max x y = if to_int x < to_int y then y else x
-
-let of_float f =
-  let i = Int.of_float f in
-  if min_int <= i && i <= max_int then unsafe_of_int i else zero
+type t = int8
 
-let to_float t = Int.to_float (to_int t)
+include
+  Int_wrapper.Make
+    (Int_wrapper)
+    (struct
+      type nonrec t = t
 
-let to_string t = Int.to_string (to_int t)
+      let int_size = 8
 
-let max_int = of_int max_int
+      external inject : t -> int = "%identity"
 
-let min_int = of_int min_int
+      external unchecked_project : int -> t = "%identity"
+    end)
diff --git a/otherlibs/stdlib_beta/int8.mli b/otherlibs/stdlib_beta/int8.mli
index 10041a11cdb..b461517acfd 100644
--- a/otherlibs/stdlib_beta/int8.mli
+++ b/otherlibs/stdlib_beta/int8.mli
@@ -2,11 +2,9 @@
 (*                                                                        *)
 (*                                 OCaml                                  *)
 (*                                                                        *)
-(*                         The OCaml programmers                          *)
 (*                 Jacob Van Buren, Jane Street, New York                 *)
 (*                                                                        *)
-(*   Copyright 2018 Institut National de Recherche en Informatique et     *)
-(*     en Automatique.                                                    *)
+(*   Copyright 2024 Jane Street Group LLC                                 *)
 (*                                                                        *)
 (*   All rights reserved.  This file is distributed under the terms of    *)
 (*   the GNU Lesser General Public License version 2.1, with the          *)
@@ -16,121 +14,11 @@
 
 (** Signed 8-bit integer values.
 
-    These integers are {8} bits wide and use two's complement
-    representation. All operations are taken modulo
-    2{^8}. They do not fail on overflow. *)
-
-(** {1:ints 8-bit Integers} *)
+    These integers are {8} bits wide and use two's complement representation.
+    All operations are taken modulo 2{^8}. They do not fail on overflow. *)
 
 (** The type for 8-bit integer values. *)
-type t = int8
-
-(** [int_size] is the number of bits in an integer (i.e., 8). *)
-val int_size : int
-
-(** [zero] is the integer [0]. *)
-val zero : int8
-
-(** [one] is the integer [1]. *)
-val one : int8
-
-(** [minus_one] is the integer [-1]. *)
-val minus_one : int8
-
-(** [neg x] is [~-x]. *)
-val neg : int8 -> int8
-
-(** [add x y] is the addition [x + y]. *)
-val add : int8 -> int8 -> int8
-
-(** [sub x y] is the subtraction [x - y]. *)
-val sub : int8 -> int8 -> int8
-
-(** [mul x y] is the multiplication [x * y]. *)
-val mul : int8 -> int8 -> int8
-
-(** [div x y] is the division [x / y]. See {!Stdlib.( / )} for details. *)
-val div : int8 -> int8 -> int8
-
-(** [rem x y] is the remainder [x mod y]. See {!Stdlib.( mod )} for details. *)
-val rem : int8 -> int8 -> int8
-
-(** [succ x] is [add x 1]. *)
-val succ : int8 -> int8
-
-(** [pred x] is [sub x 1]. *)
-val pred : int8 -> int8
-
-(** [abs x] is the absolute value of [x]. That is [x] if [x] is positive
-    and [neg x] if [x] is negative. {b Warning.} This may be negative if
-    the argument is {!min_int}. *)
-val abs : int8 -> int8
-
-(** [max_int] is the greatest representable integer,
-    [2{^[8 - 1]} - 1]. *)
-val max_int : int8
-
-(** [min_int] is the smallest representable integer,
-    [-2{^[8 - 1]}]. *)
-val min_int : int8
-
-(** [logand x y] is the bitwise logical and of [x] and [y]. *)
-val logand : int8 -> int8 -> int8
-
-(** [logor x y] is the bitwise logical or of [x] and [y]. *)
-val logor : int8 -> int8 -> int8
-
-(** [logxor x y] is the bitwise logical exclusive or of [x] and [y]. *)
-val logxor : int8 -> int8 -> int8
-
-(** [lognot x] is the bitwise logical negation of [x]. *)
-val lognot : int8 -> int8
-
-(** [shift_left x n] shifts [x] to the left by [n] bits. The result
-    is unspecified if [n < 0] or [n > ]{!8}. *)
-val shift_left : int8 -> int -> int8
-
-(** [shift_right x n] shifts [x] to the right by [n] bits. This is an
-    arithmetic shift: the sign bit of [x] is replicated and inserted
-    in the vacated bits. The result is unspecified if [n < 0] or
-    [n > ]{!8}. *)
-val shift_right : int8 -> int -> int8
-
-(** [shift_right x n] shifts [x] to the right by [n] bits. This is a
-    logical shift: zeroes are inserted in the vacated bits regardless
-    of the sign of [x]. The result is unspecified if [n < 0] or
-    [n > ]{!8}. *)
-val shift_right_logical : int8 -> int -> int8
-
-(** {1:preds Predicates and comparisons} *)
-
-(** [equal x y] is [true] if and only if [x = y]. *)
-val equal : int8 -> int8 -> bool
-
-(** [compare x y] is {!Stdlib.compare}[ x y] but more efficient. *)
-val compare : int8 -> int8 -> int
-
-(** Return the smaller of the two arguments. *)
-val min : int8 -> int8 -> int8
-
-(** Return the greater of the two arguments. *)
-val max : int8 -> int8 -> int8
-
-(** {1:convert Converting} *)
-
-(** [to_int x] is [x] as an {!int}. *)
-val to_int : int8 -> int
-
-(** [of_int x] represents [x] as an 8-bit integer. *)
-val of_int : int -> int8
-
-(** [to_float x] is [x] as a floating point number. *)
-val to_float : int8 -> float
-
-(** [of_float x] truncates [x] to an integer. The result is
-    unspecified if the argument is [nan] or falls outside the range of
-    representable integers. *)
-val of_float : float -> int8
+type t = int8 [@@immediate]
 
-(** [to_string x] is the written representation of [x] in decimal. *)
-val to_string : int8 -> string
+(** @inline *)
+include Int_wrapper.S with type t := int8
diff --git a/otherlibs/stdlib_beta/int_wrapper.ml b/otherlibs/stdlib_beta/int_wrapper.ml
new file mode 100644
index 00000000000..baf31d5d07f
--- /dev/null
+++ b/otherlibs/stdlib_beta/int_wrapper.ml
@@ -0,0 +1,321 @@
+(**************************************************************************)
+(*                                                                        *)
+(*                                 OCaml                                  *)
+(*                                                                        *)
+(*                         The OCaml programmers                          *)
+(*                 Jacob Van Buren, Jane Street, New York                 *)
+(*                                                                        *)
+(*   Copyright 2018 Institut National de Recherche en Informatique et     *)
+(*     en Automatique.                                                    *)
+(*   Copyright 2024 Jane Street Group LLC                                 *)
+(*                                                                        *)
+(*   All rights reserved.  This file is distributed under the terms of    *)
+(*   the GNU Lesser General Public License version 2.1, with the          *)
+(*   special exception on linking described in the file LICENSE.          *)
+(*                                                                        *)
+(**************************************************************************)
+
+[@@@ocaml.flambda_o3]
+
+include Stdlib.Int
+
+let int_size = Sys.int_size
+
+let[@inline available] of_int t = t
+
+let[@inline available] to_int t = t
+
+let[@inline available] unsigned_to_int t = t
+
+let[@inline available] unsigned_compare n m =
+  compare (sub n min_int) (sub m min_int)
+
+let[@inline] unsigned_lt n m = sub n min_int < sub m min_int
+
+(* Unsigned division from signed division of the same bitness. See Warren Jr.,
+   Henry S. (2013). Hacker's Delight (2 ed.), Sec 9-3. *)
+let[@inline available] unsigned_div n d =
+  if d < zero
+  then if unsigned_lt n d then zero else one
+  else
+    let q = shift_left (div (shift_right_logical n 1) d) 1 in
+    let r = sub n (mul q d) in
+    if unsigned_lt r d then q else succ q
+
+let[@inline available] unsigned_rem n d =
+  sub n (mul ((unsigned_div [@inlined]) n d) d)
+
+let seeded_hash seed x = Stdlib.Hashtbl.seeded_hash seed (x : int)
+
+let hash x = Stdlib.Hashtbl.hash (x : int)
+
+module type S = sig
+  (** Signed {n}-bit tagged integer values.
+
+    These integers are {n} bits wide and use two's complement representation.
+    All operations are taken modulo 2{^n}. They do not fail on overflow. *)
+
+  (** {1:ints n-bit Integers} *)
+
+  (** The type for n-bit integer values. *)
+  type t
+
+  (** The number of bits in an integer of type {!t}. *)
+  val int_size : int
+
+  val zero : t
+
+  val one : t
+
+  val minus_one : t
+
+  val neg : t -> t
+
+  val add : t -> t -> t
+
+  val sub : t -> t -> t
+
+  val mul : t -> t -> t
+
+  (** Integer division. This division rounds the real quotient of
+      its arguments towards zero, as specified for {!Stdlib.(/)}.
+      @raise Division_by_zero if the second argument is zero. *)
+  val div : t -> t -> t
+
+  (** Same as {!div}, except that arguments and result are interpreted as {e
+      unsigned} integers. *)
+  val unsigned_div : t -> t -> t
+
+  (** Integer remainder. If [y] is not zero, [rem x y = sub x (mul (div x y)
+      y)]. If [y] is zero, [rem x y] raises [Division_by_zero]. *)
+  val rem : t -> t -> t
+
+  (** Same as {!rem}, except that arguments and result are interpreted as {e
+      unsigned} integers. *)
+  val unsigned_rem : t -> t -> t
+
+  (** [succ x] is [add x 1]. *)
+  val succ : t -> t
+
+  (** [pred x] is [sub x 1]. *)
+  val pred : t -> t
+
+  (** [abs x] is the absolute value of [x]. That is [x] if [x] is positive and
+      [neg x] if [x] is negative. {b Warning.} This may be negative if the
+      argument is {!min_int}. *)
+  val abs : t -> t
+
+  (** [max_int] is the greatest representable integer,
+      [2{^[int_size - 1]} - 1]. *)
+  val max_int : t
+
+  (** [min_int] is the smallest representable integer,
+      [-2{^[int_size - 1]}]. *)
+  val min_int : t
+
+  (** Bitwise logical and. *)
+  val logand : t -> t -> t
+
+  (** Bitwise logical or. *)
+  val logor : t -> t -> t
+
+  (** Bitwise logical exclusive or. *)
+  val logxor : t -> t -> t
+
+  (** Bitwise logical negation. *)
+  val lognot : t -> t
+
+  (** [shift_left x n] shifts [x] to the left by [n] bits. The result
+      is unspecified if [n < 0] or [n >= ]{!int_size}. *)
+  val shift_left : t -> int -> t
+
+  (** [shift_right x n] shifts [x] to the right by [n] bits. This is an
+      arithmetic shift: the sign bit of [x] is replicated and inserted
+      in the vacated bits. The result is unspecified if [n < 0] or
+      [n >=]{!int_size}. *)
+  val shift_right : t -> int -> t
+
+  (** [shift_right x n] shifts [x] to the right by [n] bits. This is a
+      logical shift: zeroes are inserted in the vacated bits regardless
+      of the sign of [x]. The result is unspecified if [n < 0] or
+      [n >=]{!int_size}. *)
+  val shift_right_logical : t -> int -> t
+
+  (** {1:preds Predicates and comparisons} *)
+
+  (** [equal x y] is [true] if and only if [x = y]. *)
+  val equal : t -> t -> bool
+
+  (** [compare x y] is {!Stdlib.compare}[ x y] but more efficient. *)
+  val compare : t -> t -> int
+
+  (** Same as {!compare}, except that arguments are interpreted as {e unsigned} integers. *)
+  val unsigned_compare : t -> t -> int
+
+  (** Return the lesser of the two arguments. *)
+  val min : t -> t -> t
+
+  (** Return the greater of the two arguments. *)
+  val max : t -> t -> t
+
+  (** {1:convert Converting} *)
+
+  (** [to_int x] is [x] as an {!int}. If [int_size > Sys.int_size], the topmost
+      bits will be lost in the conversion *)
+  val to_int : t -> int
+
+  (** [of_int x] truncates the representation of [x] to fit in {!t}. *)
+  val of_int : int -> t
+
+  (** Same as {!to_int}, but interprets the argument as an {e unsigned} integer. *)
+  val unsigned_to_int : t -> int
+
+  (** [to_float x] is [x] as a floating point number. *)
+  val to_float : t -> float
+
+  (** [of_float x] truncates [x] to an integer. The result is
+      unspecified if the argument is [nan] or falls outside the range of
+      representable integers. *)
+  val of_float : float -> t
+
+  (** [to_string x] is the written representation of [x] in decimal. *)
+  val to_string : t -> string
+
+  (** A seeded hash function for ints, with the same output value as
+      {!Hashtbl.seeded_hash}. This function allows this module to be passed as
+      argument to the functor {!Hashtbl.MakeSeeded}. *)
+  val seeded_hash : int -> t -> int
+
+  (** An unseeded hash function for ints, with the same output value as
+      {!Hashtbl.hash}. This function allows this module to be passed as argument
+      to the functor {!Hashtbl.Make}. *)
+  val hash : t -> int
+end
+
+module Make
+    (Container : S) (Spec : sig
+      type t
+
+      val int_size : int
+
+      val inject : t -> Container.t
+
+      val unchecked_project : Container.t -> t
+    end) : S with type t := Spec.t = struct
+  include Spec
+
+  let () = assert (0 < int_size && int_size <= Container.int_size)
+
+  let unused_bits = Container.int_size - int_size
+
+  let[@inline] sign_extend i =
+    unchecked_project
+      (Container.shift_right (Container.shift_left i unused_bits) unused_bits)
+
+  let[@inline] project i =
+    let t = sign_extend i in
+    if Container.equal i (inject t) then Some t else None
+
+  let[@inline] zero_extend t =
+    Container.shift_right_logical
+      (Container.shift_left (inject t) unused_bits)
+      unused_bits
+
+  let zero = sign_extend Container.zero
+
+  let one = sign_extend Container.one
+
+  let minus_one = sign_extend Container.minus_one
+
+  let[@inline available] neg x = sign_extend (Container.neg (inject x))
+
+  let[@inline available] add x y =
+    sign_extend (Container.add (inject x) (inject y))
+
+  let[@inline available] sub x y =
+    sign_extend (Container.sub (inject x) (inject y))
+
+  let[@inline available] mul x y =
+    sign_extend (Container.mul (inject x) (inject y))
+
+  let[@inline available] div x y =
+    sign_extend (Container.div (inject x) (inject y))
+
+  let[@inline available] rem x y =
+    sign_extend (Container.rem (inject x) (inject y))
+
+  let[@inline available] succ x = sign_extend (Container.succ (inject x))
+
+  let[@inline available] pred x = sign_extend (Container.pred (inject x))
+
+  let[@inline available] abs x = sign_extend (Container.abs (inject x))
+
+  let[@inline available] equal x y = Container.equal (inject x) (inject y)
+
+  let[@inline available] compare x y = Container.compare (inject x) (inject y)
+
+  (* since the values are stored sign-extended, we can skip the sign-extension
+     for bitwise operations as the sign bits will all be treated the same *)
+
+  let[@inline available] logand x y =
+    unchecked_project (Container.logand (inject x) (inject y))
+
+  let[@inline available] logor x y =
+    unchecked_project (Container.logor (inject x) (inject y))
+
+  let[@inline available] logxor x y =
+    unchecked_project (Container.logxor (inject x) (inject y))
+
+  let[@inline available] lognot x =
+    unchecked_project (Container.lognot (inject x))
+
+  let[@inline available] shift_left x y =
+    sign_extend (Container.shift_left (inject x) y)
+
+  let[@inline available] shift_right x y =
+    sign_extend (Container.shift_right (inject x) y)
+
+  let[@inline available] shift_right_logical x y =
+    sign_extend (Container.shift_right_logical (zero_extend x) y)
+
+  let max_int = shift_right_logical minus_one 1
+
+  let min_int = succ max_int
+
+  let[@inline available] unsigned_compare n m =
+    Container.unsigned_compare (zero_extend n) (zero_extend m)
+
+  (* Unsigned division from signed division of the same bitness. See Warren Jr.,
+     Henry S. (2013). Hacker's Delight (2 ed.), Sec 9-3. *)
+  let[@inline available] unsigned_div n d =
+    sign_extend (Container.unsigned_div (zero_extend n) (zero_extend d))
+
+  let[@inline available] unsigned_rem n d =
+    sign_extend (Container.unsigned_rem (zero_extend n) (zero_extend d))
+
+  let[@inline available] min x y =
+    unchecked_project (Container.min (inject x) (inject y))
+
+  let[@inline available] max x y =
+    unchecked_project (Container.max (inject x) (inject y))
+
+  let[@inline available] of_float f =
+    Option.value (project (Container.of_float f)) ~default:zero
+
+  let[@inline available] to_float t = Container.to_float (inject t)
+
+  let[@inline available] to_string t = Container.to_string (inject t)
+
+  let[@inline available] seeded_hash seed t =
+    Container.seeded_hash seed (inject t)
+
+  let[@inline available] hash t = Container.hash (inject t)
+
+  let[@inline available] to_int t = Container.to_int (inject t)
+
+  let[@inline available] of_int i = sign_extend (Container.of_int i)
+
+  let[@inline available] unsigned_to_int t =
+    Container.unsigned_to_int (zero_extend t)
+end
+[@@inline available]
diff --git a/otherlibs/stdlib_beta/stdlib_beta.ml b/otherlibs/stdlib_beta/stdlib_beta.ml
index d3ead8bb464..2ef32596265 100644
--- a/otherlibs/stdlib_beta/stdlib_beta.ml
+++ b/otherlibs/stdlib_beta/stdlib_beta.ml
@@ -2,3 +2,4 @@ module Int8 = Int8
 module Int8_u = Int8_u
 module Int16 = Int16
 module Int16_u = Int16_u
+module Int_wrapper = Int_wrapper
diff --git a/otherlibs/stdlib_beta/stdlib_beta.mli b/otherlibs/stdlib_beta/stdlib_beta.mli
index d3ead8bb464..2ef32596265 100644
--- a/otherlibs/stdlib_beta/stdlib_beta.mli
+++ b/otherlibs/stdlib_beta/stdlib_beta.mli
@@ -2,3 +2,4 @@ module Int8 = Int8
 module Int8_u = Int8_u
 module Int16 = Int16
 module Int16_u = Int16_u
+module Int_wrapper = Int_wrapper
diff --git a/runtime/caml/osdeps.h b/runtime/caml/osdeps.h
index 4f8007712ca..9728770bcbb 100644
--- a/runtime/caml/osdeps.h
+++ b/runtime/caml/osdeps.h
@@ -106,9 +106,9 @@ extern int caml_format_timestamp(char* buf, size_t sz, int formatted);
 
 /* Memory management platform-specific operations */
 
-void *caml_plat_mem_map(uintnat, int);
-void *caml_plat_mem_commit(void *, uintnat);
-void caml_plat_mem_decommit(void *, uintnat);
+void *caml_plat_mem_map(uintnat, int, const char*);
+void *caml_plat_mem_commit(void *, uintnat, const char*);
+void caml_plat_mem_decommit(void *, uintnat, const char*);
 void caml_plat_mem_unmap(void *, uintnat);
 
 #ifdef _WIN32
diff --git a/runtime/caml/platform.h b/runtime/caml/platform.h
index ab44885b296..4db2d19e901 100644
--- a/runtime/caml/platform.h
+++ b/runtime/caml/platform.h
@@ -434,9 +434,9 @@ uintnat caml_mem_round_up_mapping_size(uintnat size);
    caml_plat_pagesize. The size given to caml_mem_unmap and caml_mem_decommit
    must match the size given to caml_mem_map/caml_mem_commit for mem.
 */
-void* caml_mem_map(uintnat size, int reserve_only);
-void* caml_mem_commit(void* mem, uintnat size);
-void caml_mem_decommit(void* mem, uintnat size);
+void* caml_mem_map(uintnat size, int reserve_only, const char* name);
+void* caml_mem_commit(void* mem, uintnat size, const char* name);
+void caml_mem_decommit(void* mem, uintnat size, const char* name);
 void caml_mem_unmap(void* mem, uintnat size);
 
 
diff --git a/runtime/domain.c b/runtime/domain.c
index 1a57defb07c..f5a667bce2b 100644
--- a/runtime/domain.c
+++ b/runtime/domain.c
@@ -464,7 +464,8 @@ static void free_minor_heap(void) {
      no race whereby other code could attempt to reuse the memory. */
   caml_mem_decommit(
       (void*)domain_self->minor_heap_area_start,
-      Bsize_wsize(domain_state->minor_heap_wsz));
+      Bsize_wsize(domain_state->minor_heap_wsz),
+      "minor reservation");
 
   domain_state->young_start   = NULL;
   domain_state->young_end     = NULL;
@@ -487,8 +488,10 @@ static int allocate_minor_heap(asize_t wsize) {
   caml_gc_log ("trying to allocate minor heap: %"
                ARCH_SIZET_PRINTF_FORMAT "uk words", wsize / 1024);
 
+  char name[32];
+  snprintf(name, sizeof name, "minor heap %d", domain_self->id);
   if (!caml_mem_commit(
-          (void*)domain_self->minor_heap_area_start, Bsize_wsize(wsize))) {
+          (void*)domain_self->minor_heap_area_start, Bsize_wsize(wsize), name)) {
     return -1;
   }
 
@@ -827,7 +830,7 @@ static void reserve_minor_heaps_from_stw_single(void) {
   minor_heap_reservation_bsize = minor_heap_max_bsz * Max_domains;
 
   /* reserve memory space for minor heaps */
-  heaps_base = caml_mem_map(minor_heap_reservation_bsize, 1 /* reserve_only */);
+  heaps_base = caml_mem_map(minor_heap_reservation_bsize, 1 /* reserve_only */, "minor reservation");
   if (heaps_base == NULL)
     caml_fatal_error("Not enough heap memory to reserve minor heaps");
 
diff --git a/runtime/fiber.c b/runtime/fiber.c
index 72e8cd9b07d..9ff5091a6e3 100644
--- a/runtime/fiber.c
+++ b/runtime/fiber.c
@@ -20,6 +20,7 @@
 
 #include "caml/config.h"
 #include <string.h>
+#include <stdio.h>
 #ifdef HAS_UNISTD
 #include <unistd.h>
 #endif
@@ -44,6 +45,11 @@
 #if defined(USE_MMAP_MAP_STACK) || !defined(STACK_CHECKS_ENABLED)
 #include <sys/mman.h>
 #endif
+#ifdef __linux__
+/* for gettid */
+#include <sys/types.h>
+#include <sys/syscall.h>
+#endif
 
 #ifdef DEBUG
 #define fiber_debug_log(...) caml_gc_log(__VA_ARGS__)
@@ -183,7 +189,15 @@ Caml_inline struct stack_info* alloc_for_stack (mlsize_t wosize)
   // 2Mb (= extra_size)
   // -------------------- <- [stack], returned from [mmap], page-aligned
   char* stack;
-  stack = caml_mem_map(len + stack_extra_size_for_mmap, 0);
+#ifdef __linux__
+  /* On Linux, record the current TID in the mapping name */
+  char mapping_name[64];
+  snprintf(mapping_name, sizeof mapping_name,
+           "stack (tid %ld)", (long)syscall(SYS_gettid));
+#else
+  const char* mapping_name = "stack";
+#endif
+  stack = caml_mem_map(len + stack_extra_size_for_mmap, 0, mapping_name);
   if (stack == MAP_FAILED) {
     return NULL;
   }
diff --git a/runtime/gc_ctrl.c b/runtime/gc_ctrl.c
index 3dfd0d274d9..87f4d7cb8ac 100644
--- a/runtime/gc_ctrl.c
+++ b/runtime/gc_ctrl.c
@@ -132,9 +132,13 @@ CAMLprim value caml_gc_get(value v)
 
   res = caml_alloc_tuple (11);
   Store_field (res, 0, Val_long (Caml_state->minor_heap_wsz));          /* s */
+  Store_field (res, 1, Val_long (0));
   Store_field (res, 2, Val_long (caml_percent_free));                   /* o */
   Store_field (res, 3, Val_long (atomic_load_relaxed(&caml_verb_gc)));  /* v */
+  Store_field (res, 4, Val_long (0));
   Store_field (res, 5, Val_long (caml_max_stack_wsize));                /* l */
+  Store_field (res, 6, Val_long (0));
+  Store_field (res, 7, Val_long (0));
   Store_field (res, 8, Val_long (caml_custom_major_ratio));             /* M */
   Store_field (res, 9, Val_long (caml_custom_minor_ratio));             /* m */
   Store_field (res, 10, Val_long (caml_custom_minor_max_bsz));          /* n */
diff --git a/runtime/platform.c b/runtime/platform.c
index 1e541d1354f..4dbacae1345 100644
--- a/runtime/platform.c
+++ b/runtime/platform.c
@@ -400,9 +400,9 @@ uintnat caml_mem_round_up_mapping_size(uintnat size)
 
 #define Is_page_aligned(size) ((size & (caml_plat_pagesize - 1)) == 0)
 
-void* caml_mem_map(uintnat size, int reserve_only)
+void* caml_mem_map(uintnat size, int reserve_only, const char* name)
 {
-  void* mem = caml_plat_mem_map(size, reserve_only);
+  void* mem = caml_plat_mem_map(size, reserve_only, name);
 
   if (mem == 0) {
     caml_gc_message(0x1000, "mmap %" ARCH_INTNAT_PRINTF_FORMAT "d bytes failed",
@@ -411,32 +411,32 @@ void* caml_mem_map(uintnat size, int reserve_only)
   }
 
   caml_gc_message(0x1000, "mmap %" ARCH_INTNAT_PRINTF_FORMAT "d"
-                          " bytes at %p for heaps\n", size, mem);
+                          " bytes at %p for %s\n", size, mem, name);
 
   return mem;
 }
 
-void* caml_mem_commit(void* mem, uintnat size)
+void* caml_mem_commit(void* mem, uintnat size, const char* name)
 {
   CAMLassert(Is_page_aligned(size));
   caml_gc_message(0x1000, "commit %" ARCH_INTNAT_PRINTF_FORMAT "d"
-                          " bytes at %p for heaps\n", size, mem);
-  return caml_plat_mem_commit(mem, size);
+                          " bytes at %p for %s\n", size, mem, name);
+  return caml_plat_mem_commit(mem, size, name);
 }
 
-void caml_mem_decommit(void* mem, uintnat size)
+void caml_mem_decommit(void* mem, uintnat size, const char* name)
 {
   if (size) {
     caml_gc_message(0x1000, "decommit %" ARCH_INTNAT_PRINTF_FORMAT "d"
-                            " bytes at %p for heaps\n", size, mem);
-    caml_plat_mem_decommit(mem, size);
+                            " bytes at %p for %s\n", size, mem, name);
+    caml_plat_mem_decommit(mem, size, name);
   }
 }
 
 void caml_mem_unmap(void* mem, uintnat size)
 {
   caml_gc_message(0x1000, "munmap %" ARCH_INTNAT_PRINTF_FORMAT "d"
-                          " bytes at %p for heaps\n", size, mem);
+                          " bytes at %p\n", size, mem);
   caml_plat_mem_unmap(mem, size);
 }
 
diff --git a/runtime/shared_heap.c b/runtime/shared_heap.c
index db0199723a5..85c25e5d0a6 100644
--- a/runtime/shared_heap.c
+++ b/runtime/shared_heap.c
@@ -75,6 +75,7 @@ static struct {
   /* Mapped but not yet active pools */
   uintnat fresh_pools;
   char* next_fresh_pool;
+  uintnat next_chunk_index;
 
   /* Count of all pools in use across all domains and the global lists below.
 
@@ -93,6 +94,7 @@ static struct {
   0,
   NULL,
   0,
+  0,
   { 0, },
   { NULL, },
   { NULL, },
@@ -210,8 +212,11 @@ static pool* pool_acquire(struct caml_heap_state* local) {
       uintnat mapping_size =
         caml_mem_round_up_mapping_size(Bsize_wsize(POOL_WSIZE) * new_pools);
       new_pools = mapping_size / Bsize_wsize(POOL_WSIZE);
-
-      void* mem = caml_mem_map(mapping_size, 0);
+      uintnat chunk_ix = pool_freelist.next_chunk_index++;
+      char mapping_name[64];
+      snprintf(mapping_name, sizeof mapping_name,
+               "major heap (chunk %lu)", (unsigned long)chunk_ix);
+      void* mem = caml_mem_map(mapping_size, 0, mapping_name);
       if (mem) {
         pool_freelist.fresh_pools = new_pools;
         pool_freelist.next_fresh_pool = mem;
diff --git a/runtime/unix.c b/runtime/unix.c
index 546eb25efc8..a940f223e07 100644
--- a/runtime/unix.c
+++ b/runtime/unix.c
@@ -61,6 +61,9 @@
 #ifdef HAS_SYS_MMAN_H
 #include <sys/mman.h>
 #endif
+#ifdef __linux__
+#include <sys/prctl.h>
+#endif
 #include "caml/fail.h"
 #include "caml/memory.h"
 #include "caml/misc.h"
@@ -508,7 +511,31 @@ void caml_init_os_params(void)
 
 #ifndef __CYGWIN__
 
-void *caml_plat_mem_map(uintnat size, int reserve_only)
+static void* mmap_named(void* addr, size_t length, int prot, int flags,
+                        int fd, off_t offset, const char* name)
+{
+  void* p = mmap(addr, length, prot, flags, fd, offset);
+#ifdef __linux__
+  if (p != MAP_FAILED) {
+    /* On Linux, use PR_SET_VMA_ANON_NAME to name the allocation */
+    char buf[80];
+    snprintf(buf, sizeof buf, "OCaml: %s", name);
+    /* The constants PR_SET_VMA and PR_SET_VMA_ANON_NAME are stable
+       (part of the Linux kernel ABI), but may not be provided by the
+       libc headers (e.g. a newer kernel with an older userspace).
+       So, it's more portable to hardcode these numbers */
+    enum { PR_SET_VMA_ = 0x53564d41, PR_SET_VMA_ANON_NAME_ = 0 };
+    prctl(PR_SET_VMA_, PR_SET_VMA_ANON_NAME_,
+          (unsigned long)p, length, (unsigned long)buf);
+    /* No error checking or reporting here: it's a best-effort tool
+       for debugging, and may fail if e.g. this prctl is not supported
+       on this kernel version. */
+  }
+#endif
+  return p;
+}
+
+void *caml_plat_mem_map(uintnat size, int reserve_only, const char* name)
 {
   void* mem;
   int prot = reserve_only ? PROT_NONE : (PROT_READ | PROT_WRITE);
@@ -518,7 +545,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only)
   if (size < alignment || alignment < caml_plat_pagesize) {
     /* Short mapping or unknown/bad hugepagesize.
        Either way, not worth bothering with alignment. */
-    mem = mmap(0, size, prot, flags, -1, 0);
+    mem = mmap_named(0, size, prot, flags, -1, 0, name);
     if (mem == MAP_FAILED) mem = NULL;
     return mem;
   }
@@ -526,7 +553,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only)
   /* Sensible kernels (on Linux, that means >= 6.7) will always provide aligned
      mappings. To avoid penalising such kernels, try mapping the exact desired
      size first and see if it happens to be aligned. */
-  mem = mmap(0, size, prot, flags, -1, 0);
+  mem = mmap_named(0, size, prot, flags, -1, 0, name);
   if (mem == MAP_FAILED) return NULL;
   if ((((uintnat)mem) & (alignment - 1)) == 0) return mem;
 
@@ -536,7 +563,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only)
   munmap(mem, size);
 
   /* Allocate a longer region than needed and trim it afterwards */
-  mem = mmap(0, size + alignment, prot, flags, -1, 0);
+  mem = mmap_named(0, size + alignment, prot, flags, -1, 0, name);
   if (mem == MAP_FAILED) return NULL;
 
   uintnat aligned = ((uintnat)mem + alignment) & ~(alignment - 1);
@@ -546,10 +573,10 @@ void *caml_plat_mem_map(uintnat size, int reserve_only)
   return (void*)aligned;
 }
 
-static void* map_fixed(void* mem, uintnat size, int prot)
+static void* map_fixed(void* mem, uintnat size, int prot, const char* name)
 {
-  if (mmap(mem, size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
-           -1, 0) == MAP_FAILED) {
+  if (mmap_named(mem, size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+                 -1, 0, name) == MAP_FAILED) {
     return 0;
   } else {
     return mem;
@@ -564,7 +591,7 @@ static void* map_fixed(void* mem, uintnat size, int prot)
    done using mprotect, since Cygwin's mmap doesn't implement the required
    functions for committing using mmap. */
 
-void *caml_plat_mem_map(uintnat size, int reserve_only)
+void *caml_plat_mem_map(uintnat size, int reserve_only, const char* name)
 {
   void* mem;
 
@@ -576,7 +603,7 @@ void *caml_plat_mem_map(uintnat size, int reserve_only)
   return mem;
 }
 
-static void* map_fixed(void* mem, uintnat size, int prot)
+static void* map_fixed(void* mem, uintnat size, int prot, const char* name)
 {
   if (mprotect(mem, size, prot) != 0) {
     return 0;
@@ -587,9 +614,9 @@ static void* map_fixed(void* mem, uintnat size, int prot)
 
 #endif /* !__CYGWIN__ */
 
-void* caml_plat_mem_commit(void* mem, uintnat size)
+void* caml_plat_mem_commit(void* mem, uintnat size, const char* name)
 {
-  void* p = map_fixed(mem, size, PROT_READ | PROT_WRITE);
+  void* p = map_fixed(mem, size, PROT_READ | PROT_WRITE, name);
   /*
     FIXME: On Linux, it might be useful to populate page tables with
     MAP_POPULATE to reduce the time spent blocking on page faults at
@@ -598,9 +625,9 @@ void* caml_plat_mem_commit(void* mem, uintnat size)
   return p;
 }
 
-void caml_plat_mem_decommit(void* mem, uintnat size)
+void caml_plat_mem_decommit(void* mem, uintnat size, const char* name)
 {
-  map_fixed(mem, size, PROT_NONE);
+  map_fixed(mem, size, PROT_NONE, name);
 }
 
 void caml_plat_mem_unmap(void* mem, uintnat size)
diff --git a/stdlib/gc.mli b/stdlib/gc.mli
index 9037008e99e..5cb8a66e738 100644
--- a/stdlib/gc.mli
+++ b/stdlib/gc.mli
@@ -45,7 +45,7 @@ type stat =
 
     heap_chunks : int;
     (** Number of contiguous pieces of memory that make up the major heap.
-        This metrics is currently not available in OCaml 5: the field value is
+        This metric is currently not available in OCaml 5: the field value is
         always [0]. *)
 
     live_words : int;
@@ -74,12 +74,12 @@ type stat =
 
     free_blocks : int;
     (** Number of blocks in the free list.
-        This metrics is currently not available in OCaml 5: the field value is
+        This metric is currently not available in OCaml 5: the field value is
         always [0]. *)
 
     largest_free : int;
     (** Size (in words) of the largest block in the free list.
-        This metrics is currently not available in OCaml 5: the field value
+        This metric is currently not available in OCaml 5: the field value
         is always [0]. *)
 
     fragments : int;
@@ -95,7 +95,7 @@ type stat =
 
     stack_size: int;
     (** Current size of the stack, in words.
-        This metrics is currently not available in OCaml 5: the field value is
+        This metric is currently not available in OCaml 5: the field value is
         always [0].
         @since 3.12 *)
 
@@ -127,7 +127,9 @@ type control =
         number is less than or equal to 1000, it is a percentage of
         the current heap size (i.e. setting it to 100 will double the heap
         size at each increase). If it is more than 1000, it is a fixed
-        number of words that will be added to the heap. Default: 15. *)
+        number of words that will be added to the heap. Default: 15.
+        This metric is currently not available in OCaml 5: the field value is
+        always [0]. *)
 
     space_overhead : int;
     (** The major GC speed is computed from this parameter.
@@ -164,7 +166,9 @@ type control =
        If [max_overhead >= 1000000], compaction is never triggered.
        On runtime4, if compaction is permanently disabled, it is strongly
        suggested to set [allocation_policy] to 2.
-       Default: 500. *)
+       Default: 500.
+       This metric is currently not available in OCaml 5: the field value is
+       always [0]. *)
 
     stack_limit : int;
     (** The maximum size of the fiber stacks (in words).
@@ -217,6 +221,9 @@ type control =
 
         Default: 2.
 
+        This metric is currently not available in OCaml 5: the field value is
+        always [0].
+
         ----------------------------------------------------------------
 
         @since 3.11 *)
@@ -226,6 +233,8 @@ type control =
         out variations in its workload. This is an integer between
         1 and 50.
         Default: 1.
+        This metric is currently not available in OCaml 5: the field value is
+        always [0].
         @since 4.03 *)
 
     custom_major_ratio : int;
diff --git a/testsuite/tests/capsule-api/condition.ml b/testsuite/tests/capsule-api/condition.ml
new file mode 100644
index 00000000000..1f7e33cafed
--- /dev/null
+++ b/testsuite/tests/capsule-api/condition.ml
@@ -0,0 +1,53 @@
+(* TEST
+ include stdlib_alpha;
+ flags = "-extension-universe alpha";
+ runtime5;
+ { bytecode; }
+ { native; }
+*)
+
+[@@@ocaml.alert "-unsafe_parallelism"]
+
+module Capsule = Stdlib_alpha.Capsule
+
+external ref : 'a -> 'a ref @@ portable = "%makemutable"
+external ( ! ) : 'a ref -> 'a @@ portable = "%field0"
+external ( := ) : 'a ref -> 'a -> unit @@ portable = "%setfield0"
+
+let () = (* Signal *)
+  let (P mut) = Capsule.create_with_mutex () in
+  let cond = Capsule.Condition.create () in
+  let go = Capsule.Data.create (fun () -> ref true) in
+  let wait = Atomic.make true in
+  let domain = Domain.spawn (fun () ->
+    Capsule.Mutex.with_lock mut (fun password ->
+      Atomic.set wait false;
+      while Capsule.Data.extract password (fun go : bool -> !go) go do
+        Capsule.Condition.wait cond mut password
+      done))
+  in
+  while Atomic.get wait do () done;
+  Capsule.Mutex.with_lock mut (fun password ->
+    Capsule.Data.iter password (fun go -> go := false) go;
+  Capsule.Condition.signal cond);
+  Domain.join domain
+;;
+
+let () = (* Broadcast *)
+  let (P mut) = Capsule.create_with_mutex () in
+  let cond = Capsule.Condition.create () in
+  let go = Capsule.Data.create (fun () -> ref true) in
+  let ready = Atomic.make 0 in
+  let domains = List.init 4 (fun _ -> Domain.spawn (fun () ->
+    Capsule.Mutex.with_lock mut (fun password ->
+      Atomic.incr ready;
+      while Capsule.Data.extract password (fun go : bool -> !go) go do
+        Capsule.Condition.wait cond mut password
+      done)))
+  in
+  while Atomic.get ready < 4 do () done;
+  Capsule.Mutex.with_lock mut (fun password ->
+    Capsule.Data.iter password (fun go -> go := false) go;
+  Capsule.Condition.broadcast cond);
+  List.iter Domain.join domains
+;;
diff --git a/testsuite/tests/capsule-api/data.ml b/testsuite/tests/capsule-api/data.ml
index 15368fd1921..24a9e5f58ce 100644
--- a/testsuite/tests/capsule-api/data.ml
+++ b/testsuite/tests/capsule-api/data.ml
@@ -176,3 +176,31 @@ let ptr' : (int, lost_capsule) Capsule.Data.t =
 let () =
   assert (Capsule.Data.project ptr' = 111)
 ;;
+
+
+(* [protect]. *)
+exception Exn of string
+
+let () =
+  match Capsule.protect (fun () -> "ok") with
+  | s -> assert (s = "ok")
+  | exception _ -> assert false
+;;
+
+let () =
+  match Capsule.protect (fun () -> Exn "ok") with
+  | Exn s -> assert (s = "ok")
+  | _ -> assert false
+;;
+
+let () =
+  match Capsule.protect (fun () -> reraise (Exn "fail")) with
+  | exception (Capsule.Protected (mut, exn)) ->
+    let s = Capsule.Mutex.with_lock mut (fun password ->
+      Capsule.Data.extract password (fun exn ->
+        match exn with
+        | Exn s -> s
+        | _ -> assert false) exn) in
+    assert (s = "fail")
+  | _ -> assert false
+;;
diff --git a/testsuite/tests/lib-smallint/.ocamlformat-enable b/testsuite/tests/lib-smallint/.ocamlformat-enable
index a0b681cc67c..6ae035c5dad 100644
--- a/testsuite/tests/lib-smallint/.ocamlformat-enable
+++ b/testsuite/tests/lib-smallint/.ocamlformat-enable
@@ -1,3 +1,2 @@
-test_int8.ml
-test_int16.ml
+test_smallint.ml
 
diff --git a/testsuite/tests/lib-smallint/test_int16.ml b/testsuite/tests/lib-smallint/test_int16.ml
index c2eb1503a03..767bde7d724 100644
--- a/testsuite/tests/lib-smallint/test_int16.ml
+++ b/testsuite/tests/lib-smallint/test_int16.ml
@@ -1,143 +1,11 @@
-(* TEST include stdlib_beta; flags = "-extension small_numbers_beta"; *)
-
-module Int16 = Stdlib_beta.Int16
-
-let int_size = Int16.int_size
-
-let () = assert (0 < int_size && int_size < Sys.int_size)
-
-let max_int = (1 lsl (int_size - 1)) - 1
-
-let min_int = lnot max_int
-
-let mask = (1 lsl int_size) - 1
-
-let to_int x : int =
-  let i : int = Int16.to_int x in
-  assert (Obj.repr i == Obj.repr x);
-  assert (min_int <= i && i <= max_int);
-  i
-
-let of_int (i : int) =
-  let x = Int16.of_int i in
-  assert (to_int x land mask == i land mask);
-  x
-
-let rng = Random.State.make [| int_size |]
-
-(** sparse test cases, concentrated around 0 and the endpoints *)
-let test_cases =
-  let is_even = 1 - (int_size land 1) in
-  List.init (int_size - is_even) (fun size ->
-      let bit = 1 lsl size in
-      let rand () =
-        Random.State.int_in_range rng ~min:(bit lsr 1) ~max:(bit - 1)
-      in
-      [rand (); lnot (rand ()); max_int - rand (); lnot (max_int - rand ())])
-  |> List.concat |> List.sort Int.compare
-
-let test1 f = ListLabels.iter test_cases ~f
-
-let test2 f = test1 (fun x -> test1 (fun y -> f x y))
-
-let test_round_trip () =
-  let test hi lo =
-    let hi = hi lsl int_size in
-    assert (lo == to_int (of_int (hi lxor lo)))
-  in
-  test2 (fun hi lo ->
-      (* generate test cases with different hi bits *)
-      test hi lo;
-      test (Random.bits ()) lo)
-
-let equal_arith x i = x == of_int i
-
-let equal_logical x i = to_int x == i
-
-let same_float x y = Int64.equal (Int64.bits_of_float x) (Int64.bits_of_float y)
-
-let assert_equal equal x y =
-  let x = try Ok (x ()) with exn -> Error exn in
-  let y = try Ok (y ()) with exn -> Error exn in
-  match x, y with
-  | Ok x, Ok y -> assert (equal x y)
-  | Error exn, Error exn' -> assert (exn = exn')
-  | Ok _, Error exn | Error exn, Ok _ -> raise exn
-
-let test_conv1 int16_f int_f ~equal =
-  test1 (fun x ->
-      assert_equal equal (fun () -> int16_f (of_int x)) (fun () -> int_f x))
-
-let test_conv2 int16_f int_f ~equal =
-  test2 (fun x y ->
-      assert_equal equal
-        (fun () -> int16_f (of_int x) (of_int y))
-        (fun () -> int_f x y))
-
-let test_arith1 = test_conv1 ~equal:equal_arith
-
-let test_arith2 = test_conv2 ~equal:equal_arith
-
-let test_logical1 = test_conv1 ~equal:equal_logical
-
-let test_logical2 = test_conv2 ~equal:equal_logical
-
-let reference_shift_right_logical x i =
-  (* we need to ensure that we shift in zero bytes, which is incompatible with
-     sign-extension *)
-  Int.shift_right_logical (if i > 0 then x land mask else x) i
+(* TEST
+   include stdlib_beta;
+   modules = "test_smallint.ml";
+   flags = "-extension small_numbers_beta";
+*)
 
 let () =
-  test_round_trip ();
-  assert (to_int Int16.zero == Int.zero);
-  assert (to_int Int16.one == Int.one);
-  assert (to_int Int16.minus_one == Int.minus_one);
-  test_arith2 Int16.add Int.add;
-  test_arith2 Int16.sub Int.sub;
-  test_arith2 Int16.mul Int.mul;
-  test_arith2 Int16.div Int.div;
-  test_arith2 Int16.rem Int.rem;
-  test_arith1 Int16.succ Int.succ;
-  test_arith1 Int16.pred Int.pred;
-  test_arith1 Int16.abs Int.abs;
-  test_logical2 Int16.logand Int.logand;
-  test_logical2 Int16.logor Int.logor;
-  test_logical2 Int16.logxor Int.logxor;
-  test_logical1 Int16.lognot Int.lognot;
-  for shift = 0 to int_size do
-    let apply_shift f x = f x shift in
-    test_logical1 (apply_shift Int16.shift_right) (apply_shift Int.shift_right);
-    test_conv1
-      (apply_shift Int16.shift_right_logical)
-      (apply_shift reference_shift_right_logical)
-      ~equal:equal_logical;
-    test_conv1
-      (apply_shift Int16.shift_left)
-      (apply_shift Int.shift_left)
-      ~equal:(if shift = 0 then equal_logical else equal_arith)
-  done;
-  test_conv2 Int16.equal Int.equal ~equal:Bool.equal;
-  test_conv2 Int16.compare Int.compare ~equal:Int.equal;
-  test_conv1 Int16.to_float Int.to_float ~equal:same_float;
-  assert (Int16.of_float (-0.) = Int16.zero);
-  test1 (fun x ->
-      let f = Int.to_float x in
-      assert (equal_logical (Int16.of_float f) x));
-  test1 (fun x ->
-      (* test fractional values round toward zero *)
-      let f = Int.to_float x in
-      let f' =
-        let almost_one = Random.State.float rng (Float.pred 1.0) in
-        if x < 0
-        then f -. almost_one
-        else if x > 0
-        then f +. almost_one
-        else if Random.State.bool rng
-        then almost_one
-        else -.almost_one
-      in
-      assert (equal_logical (Int16.of_float f') x));
-  test1 (fun x -> assert (Int16.to_string (of_int x) = Int.to_string x));
-  test_logical2 Int16.min Int.min;
-  test_logical2 Int16.max Int.max;
-  ()
+  Test_smallint.run
+    (module Stdlib_beta.Int16)
+    ~min_int:(-0x8000)
+    ~max_int:0x7fff
diff --git a/testsuite/tests/lib-smallint/test_int8.ml b/testsuite/tests/lib-smallint/test_int8.ml
index 785bc9e30ad..77b08a5274b 100644
--- a/testsuite/tests/lib-smallint/test_int8.ml
+++ b/testsuite/tests/lib-smallint/test_int8.ml
@@ -1,143 +1,11 @@
-(* TEST include stdlib_beta; flags = "-extension small_numbers_beta"; *)
-
-module Int8 = Stdlib_beta.Int8
-
-let int_size = Int8.int_size
-
-let () = assert (0 < int_size && int_size < Sys.int_size)
-
-let max_int = (1 lsl (int_size - 1)) - 1
-
-let min_int = lnot max_int
-
-let mask = (1 lsl int_size) - 1
-
-let to_int x : int =
-  let i : int = Int8.to_int x in
-  assert (Obj.repr i == Obj.repr x);
-  assert (min_int <= i && i <= max_int);
-  i
-
-let of_int (i : int) =
-  let x = Int8.of_int i in
-  assert (to_int x land mask == i land mask);
-  x
-
-let rng = Random.State.make [| int_size |]
-
-(** sparse test cases, concentrated around 0 and the endpoints *)
-let test_cases =
-  let is_even = 1 - (int_size land 1) in
-  List.init (int_size - is_even) (fun size ->
-      let bit = 1 lsl size in
-      let rand () =
-        Random.State.int_in_range rng ~min:(bit lsr 1) ~max:(bit - 1)
-      in
-      [rand (); lnot (rand ()); max_int - rand (); lnot (max_int - rand ())])
-  |> List.concat |> List.sort Int.compare
-
-let test1 f = ListLabels.iter test_cases ~f
-
-let test2 f = test1 (fun x -> test1 (fun y -> f x y))
-
-let test_round_trip () =
-  let test hi lo =
-    let hi = hi lsl int_size in
-    assert (lo == to_int (of_int (hi lxor lo)))
-  in
-  test2 (fun hi lo ->
-      (* generate test cases with different hi bits *)
-      test hi lo;
-      test (Random.bits ()) lo)
-
-let equal_arith x i = x == of_int i
-
-let equal_logical x i = to_int x == i
-
-let same_float x y = Int64.equal (Int64.bits_of_float x) (Int64.bits_of_float y)
-
-let assert_equal equal x y =
-  let x = try Ok (x ()) with exn -> Error exn in
-  let y = try Ok (y ()) with exn -> Error exn in
-  match x, y with
-  | Ok x, Ok y -> assert (equal x y)
-  | Error exn, Error exn' -> assert (exn = exn')
-  | Ok _, Error exn | Error exn, Ok _ -> raise exn
-
-let test_conv1 int8_f int_f ~equal =
-  test1 (fun x ->
-      assert_equal equal (fun () -> int8_f (of_int x)) (fun () -> int_f x))
-
-let test_conv2 int8_f int_f ~equal =
-  test2 (fun x y ->
-      assert_equal equal
-        (fun () -> int8_f (of_int x) (of_int y))
-        (fun () -> int_f x y))
-
-let test_arith1 = test_conv1 ~equal:equal_arith
-
-let test_arith2 = test_conv2 ~equal:equal_arith
-
-let test_logical1 = test_conv1 ~equal:equal_logical
-
-let test_logical2 = test_conv2 ~equal:equal_logical
-
-let reference_shift_right_logical x i =
-  (* we need to ensure that we shift in zero bytes, which is incompatible with
-     sign-extension *)
-  Int.shift_right_logical (if i > 0 then x land mask else x) i
+(* TEST
+   include stdlib_beta;
+   modules = "test_smallint.ml";
+   flags = "-extension small_numbers_beta";
+*)
 
 let () =
-  test_round_trip ();
-  assert (to_int Int8.zero == Int.zero);
-  assert (to_int Int8.one == Int.one);
-  assert (to_int Int8.minus_one == Int.minus_one);
-  test_arith2 Int8.add Int.add;
-  test_arith2 Int8.sub Int.sub;
-  test_arith2 Int8.mul Int.mul;
-  test_arith2 Int8.div Int.div;
-  test_arith2 Int8.rem Int.rem;
-  test_arith1 Int8.succ Int.succ;
-  test_arith1 Int8.pred Int.pred;
-  test_arith1 Int8.abs Int.abs;
-  test_logical2 Int8.logand Int.logand;
-  test_logical2 Int8.logor Int.logor;
-  test_logical2 Int8.logxor Int.logxor;
-  test_logical1 Int8.lognot Int.lognot;
-  for shift = 0 to int_size do
-    let apply_shift f x = f x shift in
-    test_logical1 (apply_shift Int8.shift_right) (apply_shift Int.shift_right);
-    test_conv1
-      (apply_shift Int8.shift_right_logical)
-      (apply_shift reference_shift_right_logical)
-      ~equal:equal_logical;
-    test_conv1
-      (apply_shift Int8.shift_left)
-      (apply_shift Int.shift_left)
-      ~equal:(if shift = 0 then equal_logical else equal_arith)
-  done;
-  test_conv2 Int8.equal Int.equal ~equal:Bool.equal;
-  test_conv2 Int8.compare Int.compare ~equal:Int.equal;
-  test_conv1 Int8.to_float Int.to_float ~equal:same_float;
-  assert (Int8.of_float (-0.) = Int8.zero);
-  test1 (fun x ->
-      let f = Int.to_float x in
-      assert (equal_logical (Int8.of_float f) x));
-  test1 (fun x ->
-      (* test fractional values round toward zero *)
-      let f = Int.to_float x in
-      let f' =
-        let almost_one = Random.State.float rng (Float.pred 1.0) in
-        if x < 0
-        then f -. almost_one
-        else if x > 0
-        then f +. almost_one
-        else if Random.State.bool rng
-        then almost_one
-        else -.almost_one
-      in
-      assert (equal_logical (Int8.of_float f') x));
-  test1 (fun x -> assert (Int8.to_string (of_int x) = Int.to_string x));
-  test_logical2 Int8.min Int.min;
-  test_logical2 Int8.max Int.max;
-  ()
+  Test_smallint.run
+    (module Stdlib_beta.Int8)
+    ~min_int:(-0x80)
+    ~max_int:0x7f
diff --git a/testsuite/tests/lib-smallint/test_smallint.ml b/testsuite/tests/lib-smallint/test_smallint.ml
new file mode 100644
index 00000000000..ec2b1a4cb64
--- /dev/null
+++ b/testsuite/tests/lib-smallint/test_smallint.ml
@@ -0,0 +1,156 @@
+module Int = Stdlib_beta.Int_wrapper
+
+let same_float x y = Int64.equal (Int64.bits_of_float x) (Int64.bits_of_float y)
+
+let phys_same x y = Obj.repr x == Obj.repr y
+
+(** generates a random float that rounds toward zero to the same integer value *)
+let nudge rng f =
+  let f_pos = Float.abs f in
+  if not (Float.is_finite f)
+  then f
+  else if f_pos < 1.
+  then Random.State.float rng (Float.pred 1.0)
+  else
+    let lo = Float.floor f_pos in
+    let hi = Float.pred (lo +. 1.) in
+    if not (lo < hi)
+    then f
+    else
+      (* the mantissa is the low bits, and we are only generating normal
+         fractional values so we never need to change the exponent *)
+      let lo = Int64.bits_of_float lo in
+      let hi = Int64.bits_of_float hi in
+      assert (Int64.shift_right lo 52 = Int64.shift_right hi 52);
+      Float.copy_sign
+        (Int64.float_of_bits (Random.State.int64_in_range rng ~min:lo ~max:hi))
+        f
+
+let run (module Smallint : Int.S) ~min_int ~max_int =
+  let int_size = Smallint.int_size in
+  assert (0 < int_size && int_size <= Sys.int_size);
+  assert (max_int = (1 lsl (int_size - 1)) - 1);
+  assert (min_int = lnot max_int);
+  let mask = (1 lsl int_size) - 1 in
+  let to_int x : int =
+    let i : int = Smallint.to_int x in
+    assert (phys_same i x);
+    assert (min_int <= i && i <= max_int);
+    i
+  in
+  let of_int (i : int) =
+    let x = Smallint.of_int i in
+    if not (to_int x land mask = i land mask)
+    then
+      failwith
+        (Printf.sprintf "%x (%x) <> %x (%x)"
+           (to_int x land mask)
+           (to_int x) (i land mask) i);
+    x
+  in
+  let rng = Random.State.make [| int_size |] in
+  let test_cases =
+    (* sparse test cases, concentrated around 0 and the endpoints *)
+    List.init (int_size - 1) (fun size ->
+        let bit = 1 lsl size in
+        let rand () =
+          Random.State.int_in_range rng ~min:(bit lsr 1) ~max:(bit - 1)
+        in
+        [rand (); lnot (rand ()); max_int - rand (); lnot (max_int - rand ())])
+    |> List.concat |> List.sort Int.compare
+  in
+  let special_floats = Float.[infinity; nan; neg_infinity; epsilon; -0.; 0.] in
+  let test1 f = ListLabels.iter test_cases ~f in
+  let test2 f = test1 (fun x -> test1 (fun y -> f x y)) in
+  let test_round_trip () =
+    let test hi lo =
+      let hi = hi lsl int_size in
+      assert (phys_same lo (to_int (of_int (hi lxor lo))))
+    in
+    test2 (fun hi lo ->
+        (* generate test cases with different hi bits *)
+        test hi lo;
+        test (Random.bits ()) lo)
+  in
+  let equal_arith x i = phys_same x (of_int i) in
+  let equal_logical x i = to_int x == i in
+  let assert_equal equal x y =
+    let x = try Ok (x ()) with exn -> Error exn in
+    let y = try Ok (y ()) with exn -> Error exn in
+    match x, y with
+    | Ok x, Ok y -> assert (equal x y)
+    | Error exn, Error exn' -> assert (exn = exn')
+    | Ok _, Error exn | Error exn, Ok _ -> raise exn
+  in
+  let test_conv1 int16_f int_f ~equal =
+    test1 (fun x ->
+        assert_equal equal (fun () -> int16_f (of_int x)) (fun () -> int_f x))
+  in
+  let test_conv2 ?(unsigned = false) int16_f int_f ~equal =
+    test2 (fun x y ->
+        assert_equal equal
+          (fun () -> int16_f (of_int x) (of_int y))
+          (fun () ->
+            if unsigned then int_f (x land mask) (y land mask) else int_f x y))
+  in
+  let test_arith1 = test_conv1 ~equal:equal_arith in
+  let test_arith2 ?__LINE__ = test_conv2 ~equal:equal_arith in
+  let test_logical1 = test_conv1 ~equal:equal_logical in
+  let test_logical2 = test_conv2 ~equal:equal_logical in
+  let reference_shift_right_logical x i =
+    (* we need to ensure that we shift in zero bytes, which is incompatible with
+       sign-extension *)
+    Int.shift_right_logical (if i > 0 then x land mask else x) i
+  in
+  test_round_trip ();
+  assert (phys_same Smallint.zero Int.zero);
+  assert (phys_same Smallint.one Int.one);
+  assert (phys_same Smallint.minus_one Int.minus_one);
+  assert (phys_same Smallint.max_int max_int);
+  assert (phys_same Smallint.min_int min_int);
+  assert (phys_same Smallint.(abs min_int) Smallint.min_int);
+  assert (phys_same (Smallint.succ Smallint.max_int) Smallint.min_int);
+  assert (phys_same (Smallint.pred Smallint.min_int) Smallint.max_int);
+  test_arith2 Smallint.add Int.add;
+  test_arith2 Smallint.sub Int.sub;
+  test_arith2 Smallint.mul Int.mul;
+  test_arith2 Smallint.div Int.div;
+  test_arith2 Smallint.unsigned_div Int.unsigned_div ~unsigned:true;
+  test_arith2 Smallint.unsigned_rem Int.unsigned_rem ~unsigned:true;
+  test_arith2 Smallint.rem Int.rem;
+  test_arith1 Smallint.succ Int.succ;
+  test_arith1 Smallint.pred Int.pred;
+  test_arith1 Smallint.abs Int.abs;
+  test_arith1 Smallint.neg Int.neg;
+  test_logical2 Smallint.logand Int.logand;
+  test_logical2 Smallint.logor Int.logor;
+  test_logical2 Smallint.logxor Int.logxor;
+  test_logical1 Smallint.lognot Int.lognot;
+  for shift = 0 to int_size - 1 do
+    let apply_shift f x = f x shift in
+    test_logical1
+      (apply_shift Smallint.shift_right)
+      (apply_shift Int.shift_right);
+    test_logical1
+      (apply_shift Smallint.shift_right_logical)
+      (apply_shift reference_shift_right_logical);
+    test_conv1
+      (apply_shift Smallint.shift_left)
+      (apply_shift Int.shift_left)
+      ~equal:(if shift = 0 then equal_logical else equal_arith)
+  done;
+  test_conv2 Smallint.equal Int.equal ~equal:Bool.equal;
+  test_conv2 Smallint.compare Int.compare ~equal:Int.equal;
+  test_conv2 Smallint.unsigned_compare Int.unsigned_compare ~equal:Int.equal;
+  test_conv1 Smallint.to_float Int.to_float ~equal:same_float;
+  ListLabels.iter
+    (special_floats @ List.map Int.to_float test_cases)
+    ~f:(fun f -> assert (equal_arith (Smallint.of_float f) (Int.of_float f)));
+  test1 (fun x ->
+      (* test that fractional values round toward zero *)
+      let f = nudge rng (Int.to_float x) in
+      assert (equal_logical (Smallint.of_float f) x));
+  test1 (fun x -> assert (Smallint.to_string (of_int x) = Int.to_string x));
+  test_logical2 Smallint.min Int.min;
+  test_logical2 Smallint.max Int.max;
+  ()
diff --git a/testsuite/tests/parsetree/source_jane_street.ml b/testsuite/tests/parsetree/source_jane_street.ml
index c6010930211..55d134d925f 100644
--- a/testsuite/tests/parsetree/source_jane_street.ml
+++ b/testsuite/tests/parsetree/source_jane_street.ml
@@ -24,10 +24,10 @@ let f (type (a : immediate) (b : immediate)) (x : a) = x;;
 let f (type (a : immediate) (b : immediate) c) (x : a) = x;;
 
 [%%expect{|
-val f : ('a : immediate). 'a -> 'a @@ global many = <fun>
-val f : ('a : immediate). 'a -> 'a @@ global many = <fun>
-val f : ('a : immediate). 'a -> 'a @@ global many = <fun>
-val f : ('a : immediate). 'a -> 'a @@ global many = <fun>
+val f : ('a : immediate). 'a -> 'a = <fun>
+val f : ('a : immediate). 'a -> 'a = <fun>
+val f : ('a : immediate). 'a -> 'a = <fun>
+val f : ('a : immediate). 'a -> 'a = <fun>
 |}]
 
 let f y (type a : immediate) (x : a) = x;;
@@ -35,12 +35,9 @@ let f y (type (a : immediate)) (x : a) = x;;
 let f y (type (a : immediate) (b : immediate)) (x : a) = x;;
 
 [%%expect{|
-val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a @@ global many =
-  <fun>
-val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a @@ global many =
-  <fun>
-val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a @@ global many =
-  <fun>
+val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a = <fun>
+val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a = <fun>
+val f : ('b : value_or_null) ('a : immediate). 'b -> 'a -> 'a = <fun>
 |}]
 
 let f y (type a : immediate) = y;;
@@ -49,10 +46,10 @@ let f y (type (a : immediate) (b : immediate)) = y;;
 let f y (type (a : immediate) (b : immediate) c) = y;;
 
 [%%expect{|
-val f : ('a : value_or_null). 'a -> 'a @@ global many = <fun>
-val f : ('a : value_or_null). 'a -> 'a @@ global many = <fun>
-val f : ('a : value_or_null). 'a -> 'a @@ global many = <fun>
-val f : ('a : value_or_null). 'a -> 'a @@ global many = <fun>
+val f : ('a : value_or_null). 'a -> 'a = <fun>
+val f : ('a : value_or_null). 'a -> 'a = <fun>
+val f : ('a : value_or_null). 'a -> 'a = <fun>
+val f : ('a : value_or_null). 'a -> 'a = <fun>
 |}]
 
 (* Just newtypes, no value parameters *)
@@ -61,7 +58,7 @@ let f (type a : immediate) (type b : immediate)
   = ();;
 
 [%%expect{|
-val f : unit @@ global many = ()
+val f : unit = ()
 |}]
 
 module type S_for_layouts = sig
@@ -94,10 +91,10 @@ let f : (unit as (_ : immediate)) -> unit = fun () -> ()
 
 [%%expect{|
 type ('a : immediate) for_layouts = 'a
-val f : ('a : float64) ('b : bits64). 'a -> 'b -> 'a @@ global many = <fun>
-val f : ('a : float64). 'a -> 'a @@ global many = <fun>
-val f : (('a : float64). 'a -> 'a) -> int @@ global many = <fun>
-val f : unit -> unit @@ global many = <fun>
+val f : ('a : float64) ('b : bits64). 'a -> 'b -> 'a = <fun>
+val f : ('a : float64). 'a -> 'a = <fun>
+val f : (('a : float64). 'a -> 'a) -> int = <fun>
+val f : unit -> unit = <fun>
 |}]
 
 type (_ : any) ignore = K1
@@ -160,8 +157,7 @@ let f xs = match xs with
 
 [%%expect{|
 type t = #(int * float#)
-val f : ('a : value_or_null). #('a * float#) -> #('a * float#) @@ global many =
-  <fun>
+val f : ('a : value_or_null). #('a * float#) -> #('a * float#) = <fun>
 |}]
 
 module M = struct
@@ -171,7 +167,7 @@ let x () = #( M.Null, M.This "hi" )
 
 [%%expect{|
 module M : sig type 'a t = 'a or_null = Null | This of 'a end
-val x : unit -> #('a M.t * string M.t) @@ global many = <fun>
+val x : unit -> #('a M.t * string M.t) = <fun>
 |}]
 
 external id : ('a : any). 'a -> 'a = "%identity" [@@layout_poly]
@@ -189,21 +185,21 @@ let nums = [| x for x = 0 to 100 |];;
 let nums = [  x for x = 0 to 100   ];;
 
 [%%expect{|
-val nums : int iarray @@ global many =
+val nums : int iarray =
   [:0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20;
     21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32; 33; 34; 35; 36; 37; 38;
     39; 40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 54; 55; 56;
     57; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68; 69; 70; 71; 72; 73; 74;
     75; 76; 77; 78; 79; 80; 81; 82; 83; 84; 85; 86; 87; 88; 89; 90; 91; 92;
     93; 94; 95; 96; 97; 98; 99; 100:]
-val nums : int array @@ global many =
+val nums : int array =
   [|0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20;
     21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32; 33; 34; 35; 36; 37; 38;
     39; 40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 54; 55; 56;
     57; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68; 69; 70; 71; 72; 73; 74;
     75; 76; 77; 78; 79; 80; 81; 82; 83; 84; 85; 86; 87; 88; 89; 90; 91; 92;
     93; 94; 95; 96; 97; 98; 99; 100|]
-val nums : int list @@ global many =
+val nums : int list =
   [0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20;
    21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32; 33; 34; 35; 36; 37; 38;
    39; 40; 41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 52; 53; 54; 55; 56;
@@ -218,9 +214,9 @@ let nums = [| x for x in [| 1; 2; 3 |] |];;
 let nums = [  x for x in [  1; 2; 3  ]  ];;
 
 [%%expect{|
-val nums : int iarray @@ global many = [:1; 2; 3:]
-val nums : int array @@ global many = [|1; 2; 3|]
-val nums : int list @@ global many = [1; 2; 3]
+val nums : int iarray = [:1; 2; 3:]
+val nums : int array = [|1; 2; 3|]
+val nums : int list = [1; 2; 3]
 |}]
 
 (* complex comprehension *)
@@ -251,10 +247,10 @@ let nums = [| n, m, x
   |];;
 
 [%%expect{|
-val nums : int iarray @@ global many = [:7; 7; 8; 7; 8; 9:]
-val nums : int array @@ global many = [|7; 7; 8; 7; 8; 9|]
-val nums : int list @@ global many = [7; 7; 8; 7; 8; 9]
-val nums : (int * int * int) array @@ global many =
+val nums : int iarray = [:7; 7; 8; 7; 8; 9:]
+val nums : int array = [|7; 7; 8; 7; 8; 9|]
+val nums : int list = [7; 7; 8; 7; 8; 9]
+val nums : (int * int * int) array =
   [|(1, 5, 6); (1, 5, 2); (1, 2, 3); (1, 2, 3); (2, 4, 2); (2, 1, 3);
     (3, 3, 4); (3, 3, 2); (3, 0, 1); (3, 0, 3); (4, 5, 3); (4, 2, 2);
     (5, 4, 5); (5, 4, 3); (5, 1, 2); (5, 1, 2); (6, 3, 3); (6, 0, 2);
@@ -279,7 +275,7 @@ let nums =
   ([(x[@test.attr1]) for (x[@test.attr2]) in ([][@test.attr3])] [@test.attr4]);;
 
 [%%expect{|
-val nums : 'a list @@ global many = []
+val nums : 'a list = []
 |}]
 
 (*********)
@@ -296,8 +292,8 @@ val f :
     local_ 'a @ unique ->
     y:local_ 'b @ once ->
     z:'c @ once unique ->
-    ?foo:local_ int @ once unique -> ?bar:local_ int -> unit -> unit
-  @@ global many = <fun>
+    ?foo:local_ int @ once unique -> ?bar:local_ int -> unit -> unit =
+  <fun>
 |}]
 
 (* bindings *)
@@ -366,7 +362,7 @@ Line 12, characters 6-9:
            ^^^
 Warning 26 [unused-var]: unused variable foo.
 
-val g : unit -> unit @@ global many = <fun>
+val g : unit -> unit = <fun>
 |}]
 
 (* expressions *)
@@ -386,7 +382,7 @@ Line 3, characters 6-7:
           ^
 Warning 26 [unused-var]: unused variable f.
 
-val g : unit -> local_ unit @ once @@ global many = <fun>
+val g : unit -> local_ unit @ once = <fun>
 |}]
 
 (* types *)
@@ -463,16 +459,16 @@ val f :
     local_ 'd -> local_
     'b * string * (string -> string) * ('e -> 'e) * 'c * string * string *
     int array * string * (int -> local_ (int -> int)) *
-    (int -> local_ (int -> int)) @ contended
-  @@ global many = <fun>
+    (int -> local_ (int -> int)) @ contended =
+  <fun>
 |}]
 
 let f1 (_ @ local) = ()
 let f2 () = let x @ local = [1; 2; 3] in f1 x [@nontail]
 
 [%%expect{|
-val f1 : ('a : value_or_null). local_ 'a -> unit @@ global many = <fun>
-val f2 : unit -> unit @@ global many = <fun>
+val f1 : ('a : value_or_null). local_ 'a -> unit = <fun>
+val f2 : unit -> unit = <fun>
 |}]
 
 module type S = sig @@ portable contended
@@ -544,8 +540,7 @@ Error: This value escapes its region.
 let f2 (x @ local) (f @ once) : t2 = exclave_ { x; f }
 
 [%%expect{|
-val f2 : local_ float -> (float -> float) @ once -> local_ t2 @ once @@
-  global many = <fun>
+val f2 : local_ float -> (float -> float) @ once -> local_ t2 @ once = <fun>
 |}]
 
 
@@ -766,7 +761,7 @@ let f x =
   | _ -> assert false;;
 
 [%%expect{|
-val f : 'a iarray -> 'a iarray @@ global many = <fun>
+val f : 'a iarray -> 'a iarray = <fun>
 |}]
 
 (******************)
@@ -782,25 +777,25 @@ let (x : (x:int * y:int)) = (~x:1, ~y:2)
 let (x : ((x:int * y:int) [@test.attr])) = (~x:1, ~y:2)
 
 [%%expect{|
-val z : int @@ global many = 4
-val punned : int @@ global many = 5
-val x_must_be_even : ('a : value_or_null) ('b : value_or_null). 'a -> 'b @@
-  global many = <fun>
+val z : int = 4
+val punned : int = 5
+val x_must_be_even : ('a : value_or_null) ('b : value_or_null). 'a -> 'b =
+  <fun>
 exception Odd
-val x : x:int * y:int @@ global many = (~x:1, ~y:2)
-val x : x:int * y:int @@ global many = (~x:1, ~y:2)
+val x : x:int * y:int = (~x:1, ~y:2)
+val x : x:int * y:int = (~x:1, ~y:2)
 - : x:int * int * z:int * punned:int = (~x:5, 2, ~z:4, ~punned:5)
-val x : x:int * y:int @@ global many = (~x:1, ~y:2)
-val x : x:int * y:int @@ global many = (~x:1, ~y:2)
+val x : x:int * y:int = (~x:1, ~y:2)
+val x : x:int * y:int = (~x:1, ~y:2)
 |}]
 
 let (~x:x0, ~s, ~(y:int), ..) : (x:int * s:string * y:int * string) =
    (~x: 1, ~s: "a", ~y: 2, "ignore me")
 
 [%%expect{|
-val x0 : int @@ global many portable = 1
-val s : string @@ global many portable = "a"
-val y : int @@ global many portable = 2
+val x0 : int @@ portable = 1
+val s : string @@ portable = "a"
+val y : int @@ portable = 2
 |}]
 
 module M : sig
@@ -837,14 +832,14 @@ let f ((~(x:int),y) : (x:int * int)) : int = x + y
 [%%expect{|
 val foo :
   ('a : value_or_null) ('b : value_or_null).
-    'a -> (unit -> 'b) -> (unit -> 'b) -> 'b
-  @@ global many = <fun>
-val x : int @@ global many portable = 1
-val y : int @@ global many = 2
-val x : int @@ global many portable = 1
-val y : int @@ global many = 2
-val f : (foo:int * bar:int) -> int @@ global many = <fun>
-val f : (x:int * int) -> int @@ global many = <fun>
+    'a -> (unit -> 'b) -> (unit -> 'b) -> 'b =
+  <fun>
+val x : int @@ portable = 1
+val y : int = 2
+val x : int @@ portable = 1
+val y : int = 2
+val f : (foo:int * bar:int) -> int = <fun>
+val f : (x:int * int) -> int = <fun>
 |}]
 
 type xy = (x:int * y:int)
@@ -862,9 +857,9 @@ let matches =
 
 [%%expect{|
 type xy = x:int * y:int
-val lt : x:int * y:int * x:int * int @@ global many = (~x:1, ~y:2, ~x:3, 4)
-val matches : int @@ global many = 1
-val matches : int * int @@ global many = (1, 2)
+val lt : x:int * y:int * x:int * int = (~x:1, ~y:2, ~x:3, 4)
+val matches : int = 1
+val matches : int * int = (1, 2)
 |}]
 
 (********************)
@@ -892,10 +887,10 @@ let test_nativeint s f =
   Format.printf "%s: %s\n" s (Nativeint_u.to_string f); Format.print_flush ()
 
 [%%expect{|
-val test_float : string -> Float_u.t -> unit @@ global many = <fun>
-val test_int32 : string -> Int32_u.t -> unit @@ global many = <fun>
-val test_int64 : string -> Int64_u.t -> unit @@ global many = <fun>
-val test_nativeint : string -> Nativeint_u.t -> unit @@ global many = <fun>
+val test_float : string -> Float_u.t -> unit = <fun>
+val test_int32 : string -> Int32_u.t -> unit = <fun>
+val test_int64 : string -> Int64_u.t -> unit = <fun>
+val test_nativeint : string -> Nativeint_u.t -> unit = <fun>
 |}]
 
 (* Expressions *)
@@ -924,45 +919,45 @@ let x = test_int64 "forty_two_in_binary" (#0b101010L)
 
 [%%expect{|
 e: 2.718282
-val x : unit @@ global many = ()
+val x : unit = ()
 negative_one_half: -0.500000
-val x : unit @@ global many = ()
+val x : unit = ()
 negative_one_half: -0.500000
-val x : unit @@ global many = ()
+val x : unit = ()
 negative_one_half: -0.500000
-val x : unit @@ global many = ()
+val x : unit = ()
 negative_one_half: -0.500000
-val x : unit @@ global many = ()
+val x : unit = ()
 positive_one_dot: 1.000000
-val x : unit @@ global many = ()
+val x : unit = ()
 positive_one_dot: 1.000000
-val x : unit @@ global many = ()
+val x : unit = ()
 positive_one_dot: 1.000000
-val x : unit @@ global many = ()
+val x : unit = ()
 positive_one_dot: 1.000000
-val x : unit @@ global many = ()
+val x : unit = ()
 one_billion: 1000000000.000000
-val x : unit @@ global many = ()
+val x : unit = ()
 one_twenty_seven_point_two_five_in_floating_hex: 127.250000
-val x : unit @@ global many = ()
+val x : unit = ()
 five_point_three_seven_five_in_floating_hexponent: 5.375000
-val x : unit @@ global many = ()
+val x : unit = ()
 zero: 0
-val x : unit @@ global many = ()
+val x : unit = ()
 positive_one: 1
-val x : unit @@ global many = ()
+val x : unit = ()
 positive_one: 1
-val x : unit @@ global many = ()
+val x : unit = ()
 negative_one: -1
-val x : unit @@ global many = ()
+val x : unit = ()
 negative_one: -1
-val x : unit @@ global many = ()
+val x : unit = ()
 two_fifty_five_in_hex: 255
-val x : unit @@ global many = ()
+val x : unit = ()
 twenty_five_in_octal: 25
-val x : unit @@ global many = ()
+val x : unit = ()
 forty_two_in_binary: 42
-val x : unit @@ global many = ()
+val x : unit = ()
 |}]
 
 (* Patterns *)
@@ -987,9 +982,9 @@ let f x =
 ;;
 
 [%%expect{|
-val f : float# -> [> `Five | `Four | `Other ] @@ global many = <fun>
-val x : unit @@ global many = ()
-val f : float# -> float# @@ global many = <fun>
+val f : float# -> [> `Five | `Four | `Other ] = <fun>
+val x : unit = ()
+val f : float# -> float# = <fun>
 |}]
 ;;
 test_float "result" (f #7.);;
@@ -1020,10 +1015,10 @@ let f x =
 [%%expect{|
 result: 7.000000
 - : unit = ()
-val f : float# -> float# @@ global many = <fun>
+val f : float# -> float# = <fun>
 larger match result: 3.000000
 - : unit = ()
-val f : int64# -> [> `Five | `Four | `Other ] @@ global many = <fun>
+val f : int64# -> [> `Five | `Four | `Other ] = <fun>
 |}]
 
 let x =
@@ -1041,8 +1036,8 @@ let f x =
 test_int64 "result" (f #7L);;
 
 [%%expect{|
-val x : unit @@ global many = ()
-val f : int64# -> int64# @@ global many = <fun>
+val x : unit = ()
+val f : int64# -> int64# = <fun>
 result: 7
 - : unit = ()
 |}]
@@ -1056,9 +1051,9 @@ let #{ data = payload; _ } = #{ data = "payload" ; i = 0 }
 let inc r = #{ r with i = r.#i + 1 }
 [%%expect{|
 type 'a with_idx = #{ data : 'a; i : int; }
-val idx : 'a with_idx -> int @@ global many = <fun>
-val payload : string @@ global many = "payload"
-val inc : 'a with_idx -> 'a with_idx @@ global many = <fun>
+val idx : 'a with_idx -> int = <fun>
+val payload : string = "payload"
+val inc : 'a with_idx -> 'a with_idx = <fun>
 |}]
 
 (***************)
@@ -1088,9 +1083,10 @@ end
    supported. *)
 
 [%%expect{|
->> Fatal error: XXX unimplemented
-Uncaught exception: Misc.Fatal_error
-
+Line 11, characters 17-39:
+11 |   type 'a list : immutable_data with 'a
+                      ^^^^^^^^^^^^^^^^^^^^^^
+Error: Unimplemented kind syntax
 |}]
 
 (**************************)
@@ -1102,7 +1098,7 @@ let f (_ : 'a. 'a -> 'a) = ()
 
 [%%expect{|
 type t = ('a. 'a -> 'a) -> int
-val f : ('a. 'a -> 'a) -> unit @@ global many = <fun>
+val f : ('a. 'a -> 'a) -> unit = <fun>
 |}]
 
 (************************)
@@ -1140,8 +1136,8 @@ let x () = #3.14s
 [%%expect{|
 type t1 = float32
 type t2 = float32#
-val x : float32 @@ global many = 3.1400001s
-val x : unit -> float32# @@ global many = <fun>
+val x : float32 = 3.1400001s
+val x : unit -> float32# = <fun>
 |}]
 
 (********)
diff --git a/testsuite/tests/parsetree/test_ppx.compilers.reference b/testsuite/tests/parsetree/test_ppx.compilers.reference
index adf2b0368aa..055bcefb550 100644
--- a/testsuite/tests/parsetree/test_ppx.compilers.reference
+++ b/testsuite/tests/parsetree/test_ppx.compilers.reference
@@ -1,5 +1,5 @@
-File "source_jane_street.ml", line 156, characters 0-24:
-156 | type t = #(int * float#)
+File "source_jane_street.ml", line 153, characters 0-24:
+153 | type t = #(int * float#)
       ^^^^^^^^^^^^^^^^^^^^^^^^
 Error: Multiple definition of the type name "t".
        Names must be unique in a given structure or signature.
diff --git a/testsuite/tests/typing-gadts/existential_as_pattern.ml b/testsuite/tests/typing-gadts/existential_as_pattern.ml
new file mode 100644
index 00000000000..d371cb9b380
--- /dev/null
+++ b/testsuite/tests/typing-gadts/existential_as_pattern.ml
@@ -0,0 +1,65 @@
+(* TEST
+ expect;
+*)
+
+(** Test that as-patterns let us re-specialize the type of a constructor packing an existential *)
+
+(* No payload *)
+type 'a t =
+  | Left : [> `Left ] t
+  | Right : [> `Right ] t
+[%%expect {|
+type 'a t = Left : [> `Left ] t | Right : [> `Right ] t
+|}]
+
+let left : [ `Left | `Right ] t -> [ `Left ] t = function
+  | Left as t -> t
+  | Right -> assert false
+[%%expect {|
+val left : [ `Left | `Right ] t -> [ `Left ] t = <fun>
+|}]
+
+(* Concrete payload *)
+type ('a, 'e) t =
+  | Left : 'e -> ([> `Left ], 'e) t
+  | Right : 'e -> ([> `Right ], 'e) t
+[%%expect {|
+type ('a, 'e) t =
+    Left : 'e -> ([> `Left ], 'e) t
+  | Right : 'e -> ([> `Right ], 'e) t
+|}]
+
+let left : ([ `Left | `Right ], 'e) t -> ([ `Left ], 'e) t = function
+  | Left _ as t -> t
+  | Right _ -> assert false
+[%%expect {|
+val left : ([ `Left | `Right ], 'e) t -> ([ `Left ], 'e) t = <fun>
+|}]
+
+(* Pack payload *)
+type 'a t2 = P : ('a, 'e) t -> 'a t2 [@@unboxed]
+[%%expect {|
+type 'a t2 = P : ('a, 'e) t -> 'a t2 [@@unboxed]
+|}]
+
+let left : [ `Left | `Right ] t2 -> [ `Left ] t2 = function
+  | P (Left _ as t) -> P t
+  | P (Right _) -> assert false
+[%%expect {|
+val left : [ `Left | `Right ] t2 -> [ `Left ] t2 = <fun>
+|}]
+
+(* Existential payload - equivalent to packed concrete payload *)
+type 'a t =
+  | Left : 'e -> [> `Left ] t
+  | Right : 'e -> [> `Right ] t
+[%%expect {|
+type 'a t = Left : 'e -> [> `Left ] t | Right : 'e -> [> `Right ] t
+|}]
+
+let left : [ `Left | `Right ] t -> [ `Left ] t = function
+  | Left _ as t -> t
+  | Right _ -> assert false
+[%%expect {|
+val left : [ `Left | `Right ] t -> [ `Left ] t = <fun>
+|}]
diff --git a/testsuite/tests/typing-layouts-products/exhaustiveness.ml b/testsuite/tests/typing-layouts-products/exhaustiveness.ml
new file mode 100644
index 00000000000..289088f5ea6
--- /dev/null
+++ b/testsuite/tests/typing-layouts-products/exhaustiveness.ml
@@ -0,0 +1,18 @@
+(* TEST
+   flags = "-w +8";
+   expect;
+*)
+
+(* This is a regression test. The example below used to give an exhaustiveness
+   warning because we forgot a case in [Parmatch.simple_match]. *)
+
+type t = A | B
+
+let f t t' =
+  match #(t,t') with
+  | #(A, _) -> true
+  | #(B, _) -> false
+[%%expect{|
+type t = A | B
+val f : t -> 'a -> bool = <fun>
+|}]
diff --git a/testsuite/tests/typing-layouts-unboxed-records/exhaustiveness.ml b/testsuite/tests/typing-layouts-unboxed-records/exhaustiveness.ml
new file mode 100644
index 00000000000..55307a90abf
--- /dev/null
+++ b/testsuite/tests/typing-layouts-unboxed-records/exhaustiveness.ml
@@ -0,0 +1,38 @@
+(* TEST
+   flags = "-w +8 -extension layouts_beta";
+   expect;
+*)
+
+(* This is a regression test. The example below used to give an exhaustiveness
+   warning because we forgot a case in [Parmatch.simple_match]. *)
+
+type t = A | B
+type r = #{ x : t; y : t }
+
+let f t t' =
+  match #{ x = t; y = t' } with
+  | #{ x = A; y = _ } -> true
+  | #{ x = B; y = _ } -> false
+[%%expect{|
+type t = A | B
+type r = #{ x : t; y : t; }
+val f : t -> t -> bool = <fun>
+|}]
+
+(* This is a regression test. The example below used to give
+   #{y=A; _ } as a counterexample instead of #{y=A; x=B}. *)
+let g t t' =
+  match #{ x = t; y = t' } with
+  | #{ x = A; _ } -> true
+  | #{ y = B; _ } -> false
+[%%expect{|
+Lines 2-4, characters 2-26:
+2 | ..match #{ x = t; y = t' } with
+3 |   | #{ x = A; _ } -> true
+4 |   | #{ y = B; _ } -> false
+Warning 8 [partial-match]: this pattern-matching is not exhaustive.
+Here is an example of a case that is not matched:
+#{y=A; x=B}
+
+val g : t -> t -> bool = <fun>
+|}]
diff --git a/testsuite/tests/typing-layouts/jkinds.ml b/testsuite/tests/typing-layouts/jkinds.ml
index a210a685095..eec92625b42 100644
--- a/testsuite/tests/typing-layouts/jkinds.ml
+++ b/testsuite/tests/typing-layouts/jkinds.ml
@@ -9,33 +9,37 @@
 type 'a list : immutable_data with 'a
 
 [%%expect{|
->> Fatal error: XXX unimplemented
-Uncaught exception: Misc.Fatal_error
-
+Line 1, characters 15-37:
+1 | type 'a list : immutable_data with 'a
+                   ^^^^^^^^^^^^^^^^^^^^^^
+Error: Unimplemented kind syntax
 |}]
 
 type ('a, 'b) either : immutable_data with 'a * 'b
 
 [%%expect{|
->> Fatal error: XXX unimplemented
-Uncaught exception: Misc.Fatal_error
-
+Line 1, characters 23-50:
+1 | type ('a, 'b) either : immutable_data with 'a * 'b
+                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Error: Unimplemented kind syntax
 |}]
 
 type 'a gel : kind_of_ 'a mod global
 
 [%%expect{|
->> Fatal error: XXX unimplemented
-Uncaught exception: Misc.Fatal_error
-
+Line 1, characters 14-25:
+1 | type 'a gel : kind_of_ 'a mod global
+                  ^^^^^^^^^^^
+Error: Unimplemented kind syntax
 |}]
 
 type 'a t : _
 
 [%%expect{|
->> Fatal error: XXX unimplemented
-Uncaught exception: Misc.Fatal_error
-
+Line 1, characters 12-13:
+1 | type 'a t : _
+                ^
+Error: Unimplemented kind syntax
 |}]
 
 kind_abbrev_ immediate = value mod global unique many sync uncontended
@@ -82,9 +86,10 @@ module type S = sig
 end
 
 [%%expect{|
->> Fatal error: XXX unimplemented
-Uncaught exception: Misc.Fatal_error
-
+Line 2, characters 17-39:
+2 |   type 'a list : immutable_data with 'a
+                     ^^^^^^^^^^^^^^^^^^^^^^
+Error: Unimplemented kind syntax
 |}]
 
 (**************************************)
diff --git a/testsuite/tests/typing-modes/val_modalities.ml b/testsuite/tests/typing-modes/val_modalities.ml
index 645f058e348..e105ca35d73 100644
--- a/testsuite/tests/typing-modes/val_modalities.ml
+++ b/testsuite/tests/typing-modes/val_modalities.ml
@@ -14,17 +14,17 @@ type r = {
 let uncontended_use (_ @ uncontended) = ()
 [%%expect{|
 type r = { mutable x : string; }
-val uncontended_use : 'a -> unit @@ global many = <fun>
+val uncontended_use : 'a -> unit = <fun>
 |}]
 
 let share_use : 'a -> unit @@ portable = fun _ -> ()
 [%%expect{|
-val share_use : 'a -> unit @@ global many = <fun>
+val share_use : 'a -> unit = <fun>
 |}]
 
 let (portable_use @ portable) (_ @ portable) = ()
 [%%expect{|
-val portable_use : 'a @ portable -> unit @@ global many = <fun>
+val portable_use : 'a @ portable -> unit = <fun>
 |}]
 
 (* The compiler building itself is a comprehensive test of legacy modules/values.
@@ -34,7 +34,7 @@ module M = struct
   let foo = {x = "hello"}
 end
 [%%expect{|
-module M : sig val foo : r @@ global many end
+module M : sig val foo : r end
 |}]
 
 module type S = sig
@@ -62,7 +62,7 @@ module M = struct
     let x @ contended = "hello"
 end
 [%%expect{|
-module M : sig val x : string @@ global many portable contended end
+module M : sig val x : string @@ portable contended end
 |}]
 
 (* Testing the defaulting behaviour.
@@ -90,76 +90,72 @@ Error: This value is "nonportable" but expected to be "portable".
 
 module Module_type_of_monadic = struct
     module M = struct
-        let x @ uncontended = "hello"
+        let x @ uncontended = ref "hello"
     end
     module M' : module type of M = M
     (* for monadic axes, we try to push to the id = join_with_min. The original
     modality is pushed to floor. *)
     module M' : module type of M = struct
-        let x @ contended = "hello"
+        let x @ contended = ref "hello"
     end
 end
 [%%expect{|
 Lines 8-10, characters 35-7:
  8 | ...................................struct
- 9 |         let x @ contended = "hello"
+ 9 |         let x @ contended = ref "hello"
 10 |     end
 Error: Signature mismatch:
        Modules do not match:
-         sig val x : string @@ global many portable contended end
+         sig val x : string ref @@ contended end
        is not included in
-         sig val x : string end
+         sig val x : string ref end
        Values do not match:
-         val x : string @@ global many portable contended
+         val x : string ref @@ contended
        is not included in
-         val x : string
+         val x : string ref
        The second is uncontended and the first is contended.
 |}]
 
 module Module_type_nested = struct
     module M = struct
-        let x @ contended portable = "hello"
+        let x @ portable = fun t -> t
         module N = struct
-            let y @ uncontended portable = "world"
+            let y @ uncontended = ref "hello"
         end
     end
     module M' : module type of M = struct
-        let x = "hello"
+        let x @ nonportable = fun t -> t
         module N = struct
-            let y @ contended = "hello"
+            let y @ contended = ref "hello"
         end
     end
 end
 [%%expect{|
 Lines 8-13, characters 35-7:
  8 | ...................................struct
- 9 |         let x = "hello"
+ 9 |         let x @ nonportable = fun t -> t
 10 |         module N = struct
-11 |             let y @ contended = "hello"
+11 |             let y @ contended = ref "hello"
 12 |         end
 13 |     end
 Error: Signature mismatch:
        Modules do not match:
          sig
-           val x : string @@ global many portable
-           module N :
-             sig val y : string @@ global many portable contended end
+           val x : 'a -> 'a
+           module N : sig val y : string ref @@ contended end
          end
        is not included in
-         sig
-           val x : string @@ contended
-           module N : sig val y : string end
-         end
+         sig val x : 'a -> 'a module N : sig val y : string ref end end
        In module "N":
        Modules do not match:
-         sig val y : string @@ global many portable contended end
+         sig val y : string ref @@ contended end
        is not included in
-         sig val y : string end
+         sig val y : string ref end
        In module "N":
        Values do not match:
-         val y : string @@ global many portable contended
+         val y : string ref @@ contended
        is not included in
-         val y : string
+         val y : string ref
        The second is uncontended and the first is contended.
 |}]
 
@@ -176,7 +172,7 @@ module Without_inclusion = struct
 end
 [%%expect{|
 module Without_inclusion :
-  sig module M : sig val x : 'a -> 'a @@ global many portable end end
+  sig module M : sig val x : 'a -> 'a @@ portable end end
 |}]
 
 module Without_inclusion = struct
@@ -194,25 +190,25 @@ Error: This value is "nonportable" but expected to be "portable".
 
 module Inclusion_fail = struct
     module M : sig
-        val x : string @@ uncontended
+        val x : string ref @@ uncontended
     end = struct
-        let x @ contended = "hello"
+        let x @ contended = ref "hello"
     end
 end
 [%%expect{|
 Lines 4-6, characters 10-7:
 4 | ..........struct
-5 |         let x @ contended = "hello"
+5 |         let x @ contended = ref "hello"
 6 |     end
 Error: Signature mismatch:
        Modules do not match:
-         sig val x : string @@ global many portable contended end
+         sig val x : string ref @@ contended end
        is not included in
-         sig val x : string end
+         sig val x : string ref end
        Values do not match:
-         val x : string @@ global many portable contended
+         val x : string ref @@ contended
        is not included in
-         val x : string
+         val x : string ref
        The second is uncontended and the first is contended.
 |}]
 
@@ -270,8 +266,8 @@ end
 [%%expect{|
 module Close_over_value :
   sig
-    module M : sig val x : string @@ global many portable end
-    val foo : unit -> unit @@ global many portable
+    module M : sig val x : string @@ portable end
+    val foo : unit -> unit @@ portable
   end
 |}]
 
@@ -441,7 +437,7 @@ Lines 13-19, characters 6-3:
 Error: Signature mismatch:
        Modules do not match:
          sig
-           module Plain : sig val f : int -> int @@ global many end
+           module Plain : sig val f : int -> int end
            module type S_plain =
              sig module M : sig val f : int -> int end end
          end
@@ -453,12 +449,12 @@ Error: Signature mismatch:
          end
        In module "Plain":
        Modules do not match:
-         sig val f : int -> int @@ global many end
+         sig val f : int -> int end
        is not included in
          sig val f : int -> int @@ portable end
        In module "Plain":
        Values do not match:
-         val f : int -> int @@ global many
+         val f : int -> int
        is not included in
          val f : int -> int @@ portable
        The second is portable and the first is nonportable.
@@ -495,32 +491,65 @@ end
 module M : sig module N : sig val foo : 'a -> 'a @@ global many end end
 |}]
 
-(* CR zqian: inclusion check should cross modes, if we are comparing modes. *)
+(* inclusion check should cross modes, if we are comparing modes (instead of
+  modalities) *)
+module M : sig
+  val foo : int @@ portable uncontended
+end = struct
+  let foo @ nonportable contended = 42
+end
+[%%expect{|
+module M : sig val foo : int @@ portable end
+|}]
+
+(* The RHS type (expected type) is used for mode crossing. The following still
+passes because types are substituted. *)
 module M : sig
-  module N : sig val foo : int @@ portable end
+  type t
+  val t : t @@ portable uncontended
 end = struct
-  module N = struct let foo @ nonportable = 42 end
+  type t = int
+  let t @ nonportable contended = 42
+end
+[%%expect{|
+module M : sig type t val t : t @@ portable end
+|}]
+
+(* LHS type is a subtype of RHS type, which means more type-level information.
+That doesn't matter for mode crossing for most cases, except for poly variants.
+The following examples seem to suggest that we should use LHS type for mode
+crossing, but I couldn't find examples to really demonstrate that. *)
+module M : sig
+  val t : [`Foo | `Bar] @@ portable uncontended
+end = struct
+  let t @ nonportable contended = `Foo
+end
+[%%expect{|
+module M : sig val t : [ `Bar | `Foo ] @@ portable end
+|}]
+
+module M : sig
+  val t : [`Foo | `Bar of 'a -> 'a | `Baz of string ref] @@ portable uncontended
+end = struct
+  let t @ nonportable contended = `Foo
 end
 [%%expect{|
 Lines 3-5, characters 6-3:
 3 | ......struct
-4 |   module N = struct let foo @ nonportable = 42 end
+4 |   let t @ nonportable contended = `Foo
 5 | end
 Error: Signature mismatch:
        Modules do not match:
-         sig module N : sig val foo : int @@ global many end end
+         sig val t : [> `Foo ] @@ contended end
        is not included in
-         sig module N : sig val foo : int @@ portable end end
-       In module "N":
-       Modules do not match:
-         sig val foo : int @@ global many end
-       is not included in
-         sig val foo : int @@ portable end
-       In module "N":
+         sig
+           val t : [ `Bar of 'a -> 'a | `Baz of string ref | `Foo ] @@
+             portable
+         end
        Values do not match:
-         val foo : int @@ global many
+         val t : [> `Foo ] @@ contended
        is not included in
-         val foo : int @@ portable
+         val t : [ `Bar of 'a -> 'a | `Baz of string ref | `Foo ] @@ portable
        The second is portable and the first is nonportable.
 |}]
 
@@ -586,7 +615,7 @@ let f (x : (module S)) = (x : (module S) :> (module S'))
 [%%expect{|
 module type S = sig val foo : 'a -> 'a @@ global many end
 module type S' = sig val foo : 'a -> 'a end
-val f : (module S) -> (module S') @@ global many = <fun>
+val f : (module S) -> (module S') = <fun>
 |}]
 
 let f (x : (module S')) = (x : (module S') :> (module S))
@@ -825,8 +854,8 @@ module M_portable = struct
     let f @ portable = fun () -> ()
     end
 [%%expect{|
-module M_nonportable : sig val f : unit -> unit @@ global many end
-module M_portable : sig val f : unit -> unit @@ global many portable end
+module M_nonportable : sig val f : unit -> unit end
+module M_portable : sig val f : unit -> unit @@ portable end
 |}]
 
 let (foo @ portable) () =
@@ -846,7 +875,7 @@ let (_foo @ portable) () =
     ()
 
 [%%expect{|
-val _foo : unit -> unit @@ global many = <fun>
+val _foo : unit -> unit = <fun>
 |}]
 
 let () =
diff --git a/testsuite/tests/typing-small-numbers/test_beta.ml b/testsuite/tests/typing-small-numbers/test_beta.ml
index 7ff80a115dc..d20ba0991d8 100644
--- a/testsuite/tests/typing-small-numbers/test_beta.ml
+++ b/testsuite/tests/typing-small-numbers/test_beta.ml
@@ -199,17 +199,13 @@ type t = int16;;
 type t = int16
 |}];;
 
-let i16 = Stdlib_beta.Int16.minus_one;;
+let i8 = Stdlib_beta.Int8.minus_one;;
 [%%expect{|
-val i16 : int16 = -1
+val i8 : int8 = -1
 |}];;
 
-let () =
-  match #0.0s with
-| #0.0s -> ()
-| _ -> ()
-;;
+let i16 = Stdlib_beta.Int16.minus_one;;
 [%%expect{|
-Line 1:
-Error: float32 literal patterns are not supported.
+val i16 : int16 = -1
 |}];;
+
diff --git a/testsuite/tests/typing-unique/overwriting.ml b/testsuite/tests/typing-unique/overwriting.ml
index a653ad412c8..d81535c442a 100644
--- a/testsuite/tests/typing-unique/overwriting.ml
+++ b/testsuite/tests/typing-unique/overwriting.ml
@@ -42,7 +42,7 @@ let overwrite_shared (r : record_update) =
   let x = overwrite_ r with { x = "foo" } in
   x.x
 [%%expect{|
-val id : ('a : value_or_null). 'a -> 'a @@ global many = <fun>
+val id : ('a : value_or_null). 'a -> 'a = <fun>
 Line 5, characters 21-22:
 5 |   let x = overwrite_ r with { x = "foo" } in
                          ^
@@ -727,7 +727,7 @@ let guards_good = function
   end
   | OptionB _ -> false
 [%%expect{|
-val is_option_a : options -> bool @@ global many = <fun>
+val is_option_a : options -> bool = <fun>
 Line 8, characters 30-59:
 8 |     | Some s when is_option_a (overwrite_ v with OptionA s) -> true
                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -807,8 +807,7 @@ let mutable_field_aliased r =
 [%%expect{|
 type 'a mutable_record = { mutable m : 'a; }
 val mutable_field_aliased :
-  options mutable_record @ unique -> options mutable_record * options @@
-  global many = <fun>
+  options mutable_record @ unique -> options mutable_record * options = <fun>
 |}]
 
 let mutable_field_aliased r =
diff --git a/testsuite/tests/typing-unique/overwriting_lift_constants.ml b/testsuite/tests/typing-unique/overwriting_lift_constants.ml
index 2018d2ed4af..27a1bc6a030 100644
--- a/testsuite/tests/typing-unique/overwriting_lift_constants.ml
+++ b/testsuite/tests/typing-unique/overwriting_lift_constants.ml
@@ -73,7 +73,7 @@ let () =
 
 [%%expect{|
 type point = { mutable dim : int; x : float; y : float; z : float; }
-val unsafe_dup : '_a @ unique -> '_a * '_a @ unique @@ global many = <fun>
+val unsafe_dup : '_a @ unique -> '_a * '_a @ unique = <fun>
 Line 9, characters 10-66:
 9 |   let p = overwrite_ p with { dim = 4; x = 1.0; y = 2.0; z = 3.0 } in
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml b/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml
index e48b22188f3..5167c0bea10 100644
--- a/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml
+++ b/testsuite/tests/typing-unique/overwriting_proj_push_down_bug.ml
@@ -19,15 +19,14 @@ let aliased_use x = x
 [%%expect{|
 (let (aliased_use/282 = (function {nlocal = 0} x/284 x/284))
   (apply (field_imm 1 (global Toploop!)) "aliased_use" aliased_use/282))
-val aliased_use : ('a : value_or_null). 'a -> 'a @@ global many = <fun>
+val aliased_use : ('a : value_or_null). 'a -> 'a = <fun>
 |}]
 
 let unique_use (unique_ x) = x
 [%%expect{|
 (let (unique_use/285 = (function {nlocal = 0} x/287 x/287))
   (apply (field_imm 1 (global Toploop!)) "unique_use" unique_use/285))
-val unique_use : ('a : value_or_null). 'a @ unique -> 'a @@ global many =
-  <fun>
+val unique_use : ('a : value_or_null). 'a @ unique -> 'a = <fun>
 |}]
 
 (* This output is fine with overwriting: The [r.y] is not pushed down. *)
@@ -48,7 +47,7 @@ let proj_aliased r =
             (apply aliased_use/282 r/290))
          (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/292 y/291))))
   (apply (field_imm 1 (global Toploop!)) "proj_aliased" proj_aliased/288))
-val proj_aliased : record -> record * string @@ global many = <fun>
+val proj_aliased : record -> record * string = <fun>
 |}]
 
 let proj_unique r =
@@ -68,7 +67,7 @@ let proj_unique r =
             (apply unique_use/285 r/295))
          (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/297 y/296))))
   (apply (field_imm 1 (global Toploop!)) "proj_unique" proj_unique/293))
-val proj_unique : record @ unique -> record * string @@ global many = <fun>
+val proj_unique : record @ unique -> record * string = <fun>
 |}]
 
 (* This output would be unsound if [aliased_use] was able to overwrite [r]
@@ -91,7 +90,7 @@ let match_aliased r =
          (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/302
            (field_imm 1 r/300)))))
   (apply (field_imm 1 (global Toploop!)) "match_aliased" match_aliased/298))
-val match_aliased : record -> record * string @@ global many = <fun>
+val match_aliased : record -> record * string = <fun>
 |}]
 
 (* This is sound since we bind [y] before the [unique_use] *)
@@ -113,7 +112,7 @@ let match_unique r =
             (apply unique_use/285 r/306))
          (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/308 y/307))))
   (apply (field_imm 1 (global Toploop!)) "match_unique" match_unique/304))
-val match_unique : record @ unique -> record * string @@ global many = <fun>
+val match_unique : record @ unique -> record * string = <fun>
 |}]
 
 (* Similarly, this would be unsound since Lambda performs a mini ANF pass. *)
@@ -139,7 +138,7 @@ let match_mini_anf_aliased r =
            (field_imm 1 r/312)))))
   (apply (field_imm 1 (global Toploop!)) "match_mini_anf_aliased"
     match_mini_anf_aliased/310))
-val match_mini_anf_aliased : record -> record * string @@ global many = <fun>
+val match_mini_anf_aliased : record -> record * string = <fun>
 |}]
 
 (* This is sound since we bind [y] before the [unique_use] *)
@@ -165,8 +164,7 @@ let match_mini_anf_unique r =
          (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/325 y/324))))
   (apply (field_imm 1 (global Toploop!)) "match_mini_anf_unique"
     match_mini_anf_unique/320))
-val match_mini_anf_unique : record @ unique -> record * string @@ global many =
-  <fun>
+val match_mini_anf_unique : record @ unique -> record * string = <fun>
 |}]
 
 let match_anf_aliased r =
@@ -196,7 +194,7 @@ let match_anf_aliased r =
              y/333)))))
   (apply (field_imm 1 (global Toploop!)) "match_anf_aliased"
     match_anf_aliased/330))
-val match_anf_aliased : record -> record * string @@ global many = <fun>
+val match_anf_aliased : record -> record * string = <fun>
 |}]
 
 (* This is sound since we bind [y] using [field_mut] *)
@@ -228,8 +226,7 @@ let match_anf_unique r =
              y/345)))))
   (apply (field_imm 1 (global Toploop!)) "match_anf_unique"
     match_anf_unique/342))
-val match_anf_unique : record @ unique -> record * string @@ global many =
-  <fun>
+val match_anf_unique : record @ unique -> record * string = <fun>
 |}]
 
 type tree =
@@ -325,7 +322,7 @@ let swap_inner (t : tree) =
            (exit 19))
         with (19) t/362)))
   (apply (field_imm 1 (global Toploop!)) "swap_inner" swap_inner/360))
-val swap_inner : tree -> tree @@ global many = <fun>
+val swap_inner : tree -> tree = <fun>
 |}]
 
 (* CR uniqueness: Update this test once overwriting is fully implemented.
@@ -374,7 +371,7 @@ let match_guard r =
              (makeblock 0 ([(consts ()) (non_consts ([0: *, *]))],*) r/453
                y/382))))))
   (apply (field_imm 1 (global Toploop!)) "match_guard" match_guard/378))
-val match_guard : record @ unique -> record * string @@ global many = <fun>
+val match_guard : record @ unique -> record * string = <fun>
 |}]
 
 let match_guard_unique (unique_ r) =
diff --git a/testsuite/tests/typing-unique/overwriting_syntax.ml b/testsuite/tests/typing-unique/overwriting_syntax.ml
index 8b2ff6eeb5f..462442d51ae 100644
--- a/testsuite/tests/typing-unique/overwriting_syntax.ml
+++ b/testsuite/tests/typing-unique/overwriting_syntax.ml
@@ -42,7 +42,7 @@ Uncaught exception: File "parsing/location.ml", line 1107, characters 2-8: Asser
 let with_record = function
     { a; b } as t -> { t with b = a }
 [%%expect{|
-val with_record : record -> record @@ global many = <fun>
+val with_record : record -> record = <fun>
 |}]
 
 let overwrite_record = function
diff --git a/testsuite/tests/typing-unique/rbtree.ml b/testsuite/tests/typing-unique/rbtree.ml
index dfc4f9ac938..1e2e47cd62d 100644
--- a/testsuite/tests/typing-unique/rbtree.ml
+++ b/testsuite/tests/typing-unique/rbtree.ml
@@ -472,13 +472,13 @@ type ('k, 'v) tree =
   | Leaf
 val fold :
   'a 'b ('c : value_or_null).
-    ('a -> 'b -> 'c -> 'c) -> 'c -> ('a, 'b) tree -> 'c
-  @@ global many = <fun>
+    ('a -> 'b -> 'c -> 'c) -> 'c -> ('a, 'b) tree -> 'c =
+  <fun>
 val work :
   ('a : value_or_null) ('b : value_or_null) ('c : value_or_null).
     insert:(int -> bool -> 'a -> 'a) ->
-    fold:(('b -> bool -> int -> int) -> int -> 'a -> 'c) -> empty:'a -> 'c
-  @@ global many = <fun>
+    fold:(('b -> bool -> int -> int) -> int -> 'a -> 'c) -> empty:'a -> 'c =
+  <fun>
 Line 85, characters 16-71:
 85 |                 balance_right (Node { t with right = ins k v t.right }) [@nontail]
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -508,16 +508,11 @@ module Make_Okasaki :
       val fold :
         'a 'b ('c : value_or_null).
           ('a -> 'b -> 'c -> 'c) -> 'c -> ('a, 'b) tree -> 'c
-        @@ global many
-      val balance_left : ('a, 'b) tree -> ('a, 'b) tree @@ global many
-        portable
-      val balance_right : ('a, 'b) tree -> ('a, 'b) tree @@ global many
-        portable
-      val ins : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree @@ global
-        many
-      val set_black : ('a, 'b) tree -> ('a, 'b) tree @@ global many portable
-      val insert : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree @@
-        global many
+      val balance_left : ('a, 'b) tree -> ('a, 'b) tree @@ portable
+      val balance_right : ('a, 'b) tree -> ('a, 'b) tree @@ portable
+      val ins : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree
+      val set_black : ('a, 'b) tree -> ('a, 'b) tree @@ portable
+      val insert : Ord.t -> 'a -> (Ord.t, 'a) tree -> (Ord.t, 'a) tree
     end
 Line 110, characters 16-52:
 110 |     | Node _ -> overwrite_ t with Node { color = c }
diff --git a/toplevel/genprintval.ml b/toplevel/genprintval.ml
index 9de4991762d..e17b6f876f9 100644
--- a/toplevel/genprintval.ml
+++ b/toplevel/genprintval.ml
@@ -255,16 +255,9 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct
       | Print_as_value (* can interpret as a value and print *)
       | Print_as of string (* can't print *)
 
-    let get_and_default_jkind_for_printing jkind =
-      let layout = Jkind.get_layout_defaulting_to_value jkind in
-      match layout with
-      (* CR layouts v3.0: [Value_or_null] should probably require special
-         printing to avoid descending into NULL. (This module uses
-         lots of unsafe Obj features.)
-      *)
+    let print_sort : Jkind.Sort.Const.t -> _ = function
       | Base Value -> Print_as_value
       | Base Void -> Print_as "<void>"
-      | Any -> Print_as "<any>"
       | Base (Float64 | Float32 | Bits8 | Bits16 | Bits32 | Bits64 | Vec128 | Word)
         -> Print_as "<abstr>"
       | Product _ -> Print_as "<unboxed product>"
@@ -464,9 +457,8 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct
                             instantiate_types env type_params ty_list l in
                           let ty_args =
                             List.map2
-                              (fun { ca_jkind } ty_arg ->
-                                 (ty_arg,
-                                 get_and_default_jkind_for_printing ca_jkind)
+                              (fun { ca_sort } ty_arg ->
+                                 (ty_arg, print_sort ca_sort)
                               ) l ty_args
                           in
                           tree_of_constr_with_args (tree_of_constr env path)
@@ -581,12 +573,12 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct
           lbl_list pos obj rep =
         let rec tree_of_fields first pos = function
           | [] -> []
-          | {ld_id; ld_type; ld_jkind} :: remainder ->
+          | {ld_id; ld_type; ld_sort} :: remainder ->
               let ty_arg = instantiate_type env type_params ty_list ld_type in
               let name = Ident.name ld_id in
               (* PR#5722: print full module path only
                  for first record field *)
-              let is_void = Jkind.is_void_defaulting ld_jkind in
+              let is_void = Jkind.Sort.Const.(equal void ld_sort) in
               let lid =
                 if first then tree_of_label env path (Out_name.create name)
                 else Oide_ident (Out_name.create name)
@@ -627,17 +619,17 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct
             ty_list lbl_list pos obj =
         let rec tree_of_fields first pos = function
           | [] -> []
-          | {ld_id; ld_type; ld_jkind} :: remainder ->
+          | {ld_id; ld_type; ld_sort} :: remainder ->
               let ty_arg = instantiate_type env type_params ty_list ld_type in
               let name = Ident.name ld_id in
               (* PR#5722: print full module path only
                  for first record field *)
-              let is_void = Jkind.is_void_defaulting ld_jkind in
+              let is_void = Jkind.Sort.Const.(equal void ld_sort) in
               let lid =
                 if first then tree_of_label env path (Out_name.create name)
                 else Oide_ident (Out_name.create name)
               and v =
-                match get_and_default_jkind_for_printing ld_jkind with
+                match print_sort ld_sort with
                 | Print_as msg -> Oval_stuff msg
                 | Print_as_value ->
                   match lbl_list with
@@ -745,8 +737,8 @@ module Make(O : OBJ)(EVP : EVALPATH with type valu = O.t) = struct
           | _ -> assert false
         in
         let args = instantiate_types env type_params ty_list cstr.cstr_args in
-        let args = List.map2 (fun { ca_jkind } arg ->
-            (arg, get_and_default_jkind_for_printing ca_jkind))
+        let args = List.map2 (fun { ca_sort } arg ->
+            (arg, print_sort ca_sort))
             cstr.cstr_args args
         in
         tree_of_constr_with_args
diff --git a/typing/datarepr.ml b/typing/datarepr.ml
index cb9daeac2d6..4d4ccb6ad01 100644
--- a/typing/datarepr.ml
+++ b/typing/datarepr.ml
@@ -70,7 +70,7 @@ let constructor_args ~current_unit priv cd_args cd_res path rep =
       in
       let type_params = TypeSet.elements arg_vars_set in
       let arity = List.length type_params in
-      let is_void_label lbl = Jkind.is_void_defaulting lbl.ld_jkind in
+      let is_void_label lbl = Jkind.Sort.Const.(equal void lbl.ld_sort) in
       let jkind =
         Jkind.for_boxed_record ~all_void:(List.for_all is_void_label lbls)
       in
@@ -97,7 +97,7 @@ let constructor_args ~current_unit priv cd_args cd_res path rep =
       [
         {
           ca_type = newgenconstr path type_params;
-          ca_jkind = jkind;
+          ca_sort = Jkind.Sort.Const.value;
           ca_modalities = Mode.Modality.Value.Const.id;
           ca_loc = Location.none
         }
@@ -121,21 +121,21 @@ let constructor_descrs ~current_unit ty_path decl cstrs rep =
          written here should be irrelevant, and so would like to understand
          this interaction better. *)
       begin match cd_args with
-      | Cstr_tuple [{ ca_jkind = jkind }]
-      | Cstr_record [{ ld_jkind = jkind }] ->
-        [| Constructor_uniform_value, [| jkind |] |]
+      | Cstr_tuple [{ ca_sort = sort }]
+      | Cstr_record [{ ld_sort = sort }] ->
+        [| Constructor_uniform_value, [| sort |] |]
       | Cstr_tuple ([] | _ :: _) | Cstr_record ([] | _ :: _) ->
-        Misc.fatal_error "Multiple or 0 arguments in [@@unboxed] variant"
+        Misc.fatal_error "Multiple arguments in [@@unboxed] variant"
       end
     | Variant_unboxed, ([] | _ :: _) ->
       Misc.fatal_error "Multiple or 0 constructors in [@@unboxed] variant"
   in
-  let all_void jkinds = Array.for_all Jkind.is_void_defaulting jkinds in
+  let all_void sorts = Array.for_all Jkind.Sort.Const.(equal void) sorts in
   let num_consts = ref 0 and num_nonconsts = ref 0 in
   let cstr_constant =
     Array.map
-      (fun (_, jkinds) ->
-         let all_void = all_void jkinds in
+      (fun (_, sorts) ->
+         let all_void = all_void sorts in
          if all_void then incr num_consts else incr num_nonconsts;
          all_void)
       cstr_shapes_and_arg_jkinds
@@ -229,7 +229,7 @@ let dummy_label (type rep) (record_form : rep record_form)
   in
   { lbl_name = ""; lbl_res = none; lbl_arg = none;
     lbl_mut = Immutable; lbl_modalities = Mode.Modality.Value.Const.id;
-    lbl_jkind = Jkind.Builtin.any ~why:Dummy_jkind;
+    lbl_sort = Jkind.Sort.Const.void;
     lbl_num = -1; lbl_pos = -1; lbl_all = [||];
     lbl_repres = repres;
     lbl_private = Public;
@@ -243,14 +243,14 @@ let label_descrs record_form ty_res lbls repres priv =
   let rec describe_labels num pos = function
       [] -> []
     | l :: rest ->
-        let is_void = Jkind.is_void_defaulting l.ld_jkind  in
+        let is_void = Jkind.Sort.Const.(equal void l.ld_sort) in
         let lbl =
           { lbl_name = Ident.name l.ld_id;
             lbl_res = ty_res;
             lbl_arg = l.ld_type;
             lbl_mut = l.ld_mutable;
             lbl_modalities = l.ld_modalities;
-            lbl_jkind = l.ld_jkind;
+            lbl_sort = l.ld_sort;
             lbl_pos = if is_void then lbl_pos_void else pos;
             lbl_num = num;
             lbl_all = all_labels;
diff --git a/typing/includecore.ml b/typing/includecore.ml
index a4d5cd2bb97..ba2b434ebae 100644
--- a/typing/includecore.ml
+++ b/typing/includecore.ml
@@ -51,6 +51,19 @@ type mmodes =
   | All
   | Legacy
 
+(** Mode cross a right mode *)
+(* This is very similar to Ctype.mode_cross_right. Any bugs here are likely bugs
+   there, too. *)
+let right_mode_cross_jkind jkind mode =
+  let upper_bounds = Jkind.get_modal_upper_bounds jkind in
+  let upper_bounds = Const.alloc_as_value upper_bounds in
+  Value.imply upper_bounds mode
+
+let right_mode_cross env ty mode=
+  if not (Ctype.is_principal ty) then mode else
+  let jkind = Ctype.type_jkind_purely env ty in
+  right_mode_cross_jkind jkind mode
+
 let native_repr_args nra1 nra2 =
   let rec loop i nra1 nra2 =
     match nra1, nra2 with
@@ -119,11 +132,14 @@ let value_descriptions ~loc env name
         inferred modalities, which we need to workaround. *)
       ()
   | Legacy ->
-      let mmode1, mmode2 = Mode.Value.legacy, Mode.Value.legacy in
+      let mmode1, mmode2 =
+        Mode.Value.(disallow_right legacy), Mode.Value.(disallow_left legacy)
+      in
       let mode1 = Mode.Modality.Value.apply vd1.val_modalities mmode1 in
       let mode2 =
         Mode.Modality.Value.(Const.apply (to_const_exn vd2.val_modalities) mmode2)
       in
+      let mode2 = right_mode_cross env vd2.val_type mode2 in
       begin match Mode.Value.submode mode1 mode2 with
       | Ok () -> ()
       | Error e -> raise (Dont_match (Mode e))
diff --git a/typing/jkind.ml b/typing/jkind.ml
index f7244f98cba..705387349fe 100644
--- a/typing/jkind.ml
+++ b/typing/jkind.ml
@@ -361,6 +361,7 @@ module Error = struct
         { from_annotation : Parsetree.jkind_annotation;
           from_attribute : Builtin_attributes.jkind_attribute Location.loc
         }
+    | Unimplemented_syntax
 
   exception User_error of Location.t * t
 end
@@ -812,7 +813,8 @@ module Const = struct
         List.map (of_user_written_annotation_unchecked_level context) ts
       in
       jkind_of_product_annotations jkinds
-    | Default | With _ | Kind_of _ -> Misc.fatal_error "XXX unimplemented"
+    | Default | With _ | Kind_of _ ->
+      raise ~loc:jkind.pjkind_loc Unimplemented_syntax
 
   (* The [annotation_context] parameter can be used to allow annotations / kinds
      in different contexts to be enabled with different extension settings.
@@ -2121,6 +2123,8 @@ let report_error ~loc : Error.t -> _ = function
          layouts extension.@;\
          %t@]"
         Pprintast.jkind_annotation jkind hint)
+  | Unimplemented_syntax ->
+    Location.errorf ~loc "@[<v>Unimplemented kind syntax@]"
 
 let () =
   Location.register_error_of_exn (function
diff --git a/typing/jkind_intf.ml b/typing/jkind_intf.ml
index 5d915abb56d..45bd5b77ffb 100644
--- a/typing/jkind_intf.ml
+++ b/typing/jkind_intf.ml
@@ -68,6 +68,54 @@ module type Sort = sig
     module Debug_printers : sig
       val t : Format.formatter -> t -> unit
     end
+
+    (* CR layouts: These are sorts for the types of ocaml expressions that are
+       currently required to be values, but for which we expect to relax that
+       restriction in versions 2 and beyond.  Naming them makes it easy to find
+       where in the translation to lambda they are assume to be value. *)
+    (* CR layouts: add similarly named jkinds and use those names everywhere (not
+       just the translation to lambda) rather than writing specific jkinds and
+       sorts in the code. *)
+    val for_class_arg : t
+
+    val for_instance_var : t
+
+    val for_lazy_body : t
+
+    val for_tuple_element : t
+
+    val for_variant_arg : t
+
+    val for_record : t
+
+    val for_block_element : t
+
+    val for_array_get_result : t
+
+    val for_array_comprehension_element : t
+
+    val for_list_element : t
+
+    (** These are sorts for the types of ocaml expressions that we expect will
+        always be "value".  These names are used in the translation to lambda to
+        make the code clearer. *)
+    val for_function : t
+
+    val for_probe_body : t
+
+    val for_poly_variant : t
+
+    val for_object : t
+
+    val for_initializer : t
+
+    val for_method : t
+
+    val for_module : t
+
+    val for_predef_value : t (* Predefined value types, e.g. int and string *)
+
+    val for_tuple : t
   end
 
   module Var : sig
@@ -125,6 +173,13 @@ module type Sort = sig
       it is set to [value] first. *)
   val default_to_value_and_get : t -> Const.t
 
+  (* CR layouts v12: Default this to void. *)
+
+  (** [default_for_transl_and_get] extracts the sort as a `const`.  If it's a variable,
+      it is set to [value] first. After we have support for [void], this will default to
+      [void] instead. *)
+  val default_for_transl_and_get : t -> Const.t
+
   (** To record changes to sorts, for use with `Types.{snapshot, backtrack}` *)
   type change
 
@@ -137,54 +192,6 @@ module type Sort = sig
 
     val var : Format.formatter -> var -> unit
   end
-
-  (* CR layouts: These are sorts for the types of ocaml expressions that are
-     currently required to be values, but for which we expect to relax that
-     restriction in versions 2 and beyond.  Naming them makes it easy to find
-     where in the translation to lambda they are assume to be value. *)
-  (* CR layouts: add similarly named jkinds and use those names everywhere (not
-     just the translation to lambda) rather than writing specific jkinds and
-     sorts in the code. *)
-  val for_class_arg : t
-
-  val for_instance_var : t
-
-  val for_lazy_body : t
-
-  val for_tuple_element : t
-
-  val for_variant_arg : t
-
-  val for_record : t
-
-  val for_block_element : t
-
-  val for_array_get_result : t
-
-  val for_array_comprehension_element : t
-
-  val for_list_element : t
-
-  (** These are sorts for the types of ocaml expressions that we expect will
-      always be "value".  These names are used in the translation to lambda to
-      make the code clearer. *)
-  val for_function : t
-
-  val for_probe_body : t
-
-  val for_poly_variant : t
-
-  val for_object : t
-
-  val for_initializer : t
-
-  val for_method : t
-
-  val for_module : t
-
-  val for_predef_value : t (* Predefined value types, e.g. int and string *)
-
-  val for_tuple : t
 end
 
 module History = struct
diff --git a/typing/jkind_types.ml b/typing/jkind_types.ml
index 5c015e8605b..f8cd642ae48 100644
--- a/typing/jkind_types.ml
+++ b/typing/jkind_types.ml
@@ -129,6 +129,44 @@ module Sort = struct
         in
         pp_element ~nested:false ppf c
     end
+
+    let for_function = value
+
+    let for_predef_value = value
+
+    let for_block_element = value
+
+    let for_probe_body = value
+
+    let for_poly_variant = value
+
+    let for_record = value
+
+    let for_object = value
+
+    let for_lazy_body = value
+
+    let for_tuple_element = value
+
+    let for_variant_arg = value
+
+    let for_instance_var = value
+
+    let for_class_arg = value
+
+    let for_method = value
+
+    let for_initializer = value
+
+    let for_module = value
+
+    let for_tuple = value
+
+    let for_array_get_result = value
+
+    let for_array_comprehension_element = value
+
+    let for_list_element = value
   end
 
   module Var = struct
@@ -361,6 +399,9 @@ module Sort = struct
         (* path compression *)
         result)
 
+  (* CR layouts v12: Default to void instead. *)
+  let default_for_transl_and_get s = default_to_value_and_get s
+
   (***********************)
   (* equality *)
 
@@ -501,44 +542,6 @@ module Sort = struct
     pp_element ~nested:false ppf t
 
   include Static.T
-
-  let for_function = value
-
-  let for_predef_value = value
-
-  let for_block_element = value
-
-  let for_probe_body = value
-
-  let for_poly_variant = value
-
-  let for_record = value
-
-  let for_object = value
-
-  let for_lazy_body = value
-
-  let for_tuple_element = value
-
-  let for_variant_arg = value
-
-  let for_instance_var = value
-
-  let for_class_arg = value
-
-  let for_method = value
-
-  let for_initializer = value
-
-  let for_module = value
-
-  let for_tuple = value
-
-  let for_array_get_result = value
-
-  let for_array_comprehension_element = value
-
-  let for_list_element = value
 end
 
 module Layout = struct
diff --git a/typing/mode.ml b/typing/mode.ml
index e344ca4c173..b72beee20f2 100644
--- a/typing/mode.ml
+++ b/typing/mode.ml
@@ -1335,9 +1335,9 @@ module Common (Obj : Obj) = struct
 
     let get_ceil m = Solver.get_ceil obj m
 
-    let get_conservative_floor m = Solver.get_conservative_floor obj m
+    let get_loose_floor m = Solver.get_loose_floor obj m
 
-    let get_conservative_ceil m = Solver.get_conservative_ceil obj m
+    let get_loose_ceil m = Solver.get_loose_ceil obj m
   end
 end
 [@@inline]
@@ -1370,8 +1370,8 @@ module Locality = struct
       if Const.le ceil floor then Some ceil else None
 
     let check_const_conservative m =
-      let floor = Guts.get_conservative_floor m in
-      let ceil = Guts.get_conservative_ceil m in
+      let floor = Guts.get_loose_floor m in
+      let ceil = Guts.get_loose_ceil m in
       if Const.le ceil floor then Some ceil else None
   end
 end
@@ -2251,28 +2251,22 @@ module Modality = struct
 
       let print ppf = function
         | Join_const c -> Format.fprintf ppf "join_const(%a)" Mode.Const.print c
-
-      (** Given a modality and a guarantee that the modality will only be appled
-      on [x >= mm], we can find some lower modality that is equivalent on the
-      restricted range. This is similar to mode-crossing, where we can push a
-      mode lower given a restricted range of types. *)
-      let modality_cross_left ~mm = function
-        | Join_const c ->
-          (* We want to find the minimal [c'] such that [join c x <= join c' x]
-             for all [x >= mm]. By definition of join, this is equivalent to [c
-             <= join x c'] for all [x >= mm]. This is equivalent to [c <= join
-             mm c']. Equivalently [subtract c mm <= c']. Note that [mm] is a
-             mode variable, but we need a constant. Therefore, we conservatively
-             take its incomplete lower bound [mm.lower]. Also recall that we
-             want the smallest such [c']. So we take [c' = subtract c mm.lower].
-          *)
-          let mm = Mode.Guts.get_floor mm in
-          Join_const (Mode.Const.subtract c mm)
     end
 
+    (* Similar to constant modalities, an inferred modality maps the mode of a
+       record/structure to the mode of a value therein. An inferred modality [f] is
+       inferred from the structure/record mode [mm] and the value mode [m].
+
+       Soundness: You should not get a value from a record/structure at a mode strictly
+       stronger than how it was put in. That is, [f mm >= m].
+
+       Completeness: You should be able to get a value from a record/structure at a mode
+       not strictly weaker than how it was put in. That is, [f mm <= m]. *)
+
     type t =
       | Const of Const.t
       | Diff of Mode.lr * Mode.l
+          (** inferred modality. See [apply] for its behavior. *)
       | Undefined
 
     let sub_log left right ~log : (unit, error) Result.t =
@@ -2322,10 +2316,13 @@ module Modality = struct
       | Const c -> c
       | Undefined -> Misc.fatal_error "modality Undefined should not be zapped."
       | Diff (mm, m) ->
-        let c = Mode.zap_to_floor m in
-        let m = Const.Join_const c in
-        (* To give the best modality, we try to cross modality. *)
-        Const.modality_cross_left ~mm m
+        let m = Mode.zap_to_floor m in
+        (* For soundness, we want some [c] such that [m <= join c mm], which
+           gives [subtract_mm m <= c]. Note that [mm] is a variable, but we need
+           a constant. Therefore, we take its floor [mm' <= mm], and we have
+           [subtract_mm m <= subtract_mm' m <= c]. *)
+        let mm' = Mode.Guts.get_floor mm in
+        Const.Join_const (Mode.Const.subtract m mm')
 
     let zap_to_id = zap_to_floor
 
@@ -2402,6 +2399,7 @@ module Modality = struct
       | Const of Const.t
       | Undefined
       | Exactly of Mode.lr * Mode.l
+          (** inferred modality. See [apply] for its behavior. *)
 
     let sub_log left right ~log : (unit, error) Result.t =
       match left, right with
@@ -2460,8 +2458,40 @@ module Modality = struct
     let zap_to_floor = function
       | Const c -> c
       | Undefined -> Misc.fatal_error "modality Undefined should not be zapped."
-      | Exactly (_, m) ->
-        let c = Mode.zap_to_floor m in
+      | Exactly (mm, m) ->
+        let m = Mode.zap_to_floor m in
+        (* We want some [c] such that:
+           - Soundness: [meet_with c mm >= m].
+           - Completeness: [meet_with c mm <= m].
+           - Simplicity: Optionally, we want [c] to be as high as possible to make
+            [meet_with c] a simpler modality.
+
+            We first rewrite completeness condition to [c <= imply_with mm m].
+            We will take [c] to be [imply_with mm m] and prove soundness for it.
+
+            To prove soundness [meet_with (imply_with mm m) mm >= m], we need to prove:
+            - [imply_with mm m >= m], or equivalently [meet mm m <= m] which is trivial.
+            - [mm >= m], which is guaranteed by the caller of [infer].
+            In fact, the soundness condition holds for any [c]  taken to be
+            [imply_with _ m] where the underscore can be anything.
+
+            Note that [imply_with] requires its first argument to be a constant, so we
+            need to get a constant out of [mm]. First recall that [imply_with] is antitone
+            in its first argument. Now, we have several choices:
+            - Take its floor [mm' <= mm], and then [c' = imply_with mm' m]. [c'] is higher
+              than [c] and thus might be incomplete.
+            - Take its ceil [mm' >= mm]. Then, [c'] is lower than [c] and thus complete,
+              but might be less simple than [c].
+            - Zap to floor. This gives us a [c' = c] that is complete and simple, but we
+               are imposing extra constraint to [mm] not requested by the caller.
+            - Zap to ceil. This gives us a [c' = c] that is complete, but less simple than
+              zapping it to floor. Also, we are imposing extra constraint.
+
+            We prioritize completeness and "not imposing extra constarint" over
+            simplicity. So we take its ceil [mm' >= mm].
+        *)
+        let mm' = Mode.Guts.get_ceil mm in
+        let c = Mode.Const.imply mm' m in
         Const.Meet_const c
 
     let to_const_exn = function
diff --git a/typing/mode_intf.mli b/typing/mode_intf.mli
index cf30c298fb6..5b0f4897265 100644
--- a/typing/mode_intf.mli
+++ b/typing/mode_intf.mli
@@ -545,7 +545,9 @@ module type S = sig
 
       (** Given [md_mode] the mode of a module, and [mode] the mode of a value
       to be put in that module, return the inferred modality to be put on the
-      value description in the inferred module type. *)
+      value description in the inferred module type.
+
+      The caller should ensure that for comonadic axes, [md_mode >= mode]. *)
       val infer : md_mode:Value.lr -> mode:Value.l -> t
 
       (* The following zapping functions possibly mutate a potentially inferred
diff --git a/typing/parmatch.ml b/typing/parmatch.ml
index 27e4c91445f..593a4d7ca21 100644
--- a/typing/parmatch.ml
+++ b/typing/parmatch.ml
@@ -177,9 +177,11 @@ let all_coherent column =
           | Const_unboxed_float32 _
           | Const_string _), _ -> false
       end
-    | Tuple l1, Tuple l2 -> l1 = l2
+    | Tuple l1, Tuple l2 ->
+      List.equal (Option.equal String.equal) l1 l2
     | Unboxed_tuple l1, Unboxed_tuple l2 ->
-      List.equal (fun (lbl1, _) (lbl2, _) -> lbl1 = lbl2) l1 l2
+      List.equal
+        (fun (lbl1, _) (lbl2, _) -> Option.equal String.equal lbl1 lbl2) l1 l2
     | Record (lbl1 :: _), Record (lbl2 :: _) ->
       Array.length lbl1.lbl_all = Array.length lbl2.lbl_all
     | Record_unboxed_product (lbl1 :: _), Record_unboxed_product (lbl2 :: _) ->
@@ -431,10 +433,17 @@ let simple_match d h =
   | Constant c1, Constant c2 -> const_compare c1 c2 = 0
   | Lazy, Lazy -> true
   | Record _, Record _ -> true
-  | Tuple len1, Tuple len2 -> len1 = len2
+  | Record_unboxed_product _, Record_unboxed_product _ -> true
+  | Tuple lbls1, Tuple lbls2 ->
+    List.equal (Option.equal String.equal) lbls1 lbls2
+  | Unboxed_tuple lbls1, Unboxed_tuple lbls2 ->
+    List.equal (fun (l1, _) (l2, _) -> Option.equal String.equal l1 l2)
+      lbls1 lbls2
   | Array (am1, _, len1), Array (am2, _, len2) -> am1 = am2 && len1 = len2
   | _, Any -> true
-  | _, _ -> false
+  | ( Construct _ | Variant _ | Constant _ | Lazy | Record _
+    | Record_unboxed_product _ | Tuple _ | Unboxed_tuple _ | Array _ | Any),
+    _ -> false
 
 
 
@@ -524,9 +533,17 @@ let discr_pat q pss =
   let rec refine_pat acc = function
     | [] -> acc
     | ((head, _), _) :: rows ->
+      let append_unique lbls lbls_unique =
+        List.fold_right (fun lbl lbls_unique ->
+          if List.exists (fun l -> l.lbl_num = lbl.lbl_num) lbls_unique then
+            lbls_unique
+          else
+            lbl :: lbls_unique
+        ) lbls lbls_unique
+      in
       match head.pat_desc with
       | Any -> refine_pat acc rows
-      | Tuple _ | Lazy -> head
+      | Tuple _ | Unboxed_tuple _ | Lazy -> head
       | Record lbls ->
         (* N.B. we could make this case "simpler" by refining the record case
            using [all_record_args].
@@ -534,24 +551,22 @@ let discr_pat q pss =
            records.
            However it makes the witness we generate for the exhaustivity warning
            less pretty. *)
-        let fields =
-          List.fold_right (fun lbl r ->
-            if List.exists (fun l -> l.lbl_num = lbl.lbl_num) r then
-              r
-            else
-              lbl :: r
-          ) lbls (record_arg acc)
-        in
+        let fields = append_unique lbls (record_arg acc) in
         let d = { head with pat_desc = Record fields } in
         refine_pat d rows
-      | _ -> acc
+      | Record_unboxed_product lbls ->
+        let fields = append_unique lbls (record_unboxed_product_arg acc) in
+        let d = { head with pat_desc = Record_unboxed_product fields } in
+        refine_pat d rows
+      | Construct _ | Constant _ | Variant _
+      | Array _ -> acc
   in
   let q, _ = deconstruct q in
   match q.pat_desc with
   (* short-circuiting: clearly if we have anything other than [Record] or
      [Any] to start with, we're not going to be able refine at all. So
      there's no point going over the matrix. *)
-  | Any | Record _ -> refine_pat q pss
+  | Any | Record _ | Record_unboxed_product _ -> refine_pat q pss
   | _ -> q
 
 (*
@@ -779,7 +794,8 @@ let build_specialized_submatrices ~extend_row discr rows =
     let initial_constr_group =
       let open Patterns.Head in
       match discr.pat_desc with
-      | Record _ | Tuple _ | Lazy ->
+      | Record _ | Record_unboxed_product _ | Tuple _ | Unboxed_tuple _
+      | Lazy ->
         (* [discr] comes from [discr_pat], and in this case subsumes any of the
            patterns we could find on the first column of [rows]. So it is better
            to use it for our initial environment than any of the normalized
diff --git a/typing/predef.ml b/typing/predef.ml
index c1ac0bb479e..c638b0178fb 100644
--- a/typing/predef.ml
+++ b/typing/predef.ml
@@ -220,14 +220,17 @@ and ident_some = ident_create "Some"
 and ident_null = ident_create "Null"
 and ident_this = ident_create "This"
 
+let option_argument_sort = Jkind.Sort.Const.value
 let option_argument_jkind = Jkind.Builtin.value ~why:(
   Type_argument {parent_path = path_option; position = 1; arity = 1})
 
+let list_jkind = Jkind.Builtin.value ~why:Boxed_variant
+let list_sort = Jkind.Sort.Const.value
+let list_argument_sort = Jkind.Sort.Const.value
 let list_argument_jkind = Jkind.Builtin.value ~why:(
   Type_argument {parent_path = path_list; position = 1; arity = 1})
 
-let or_null_argument_jkind = Jkind.Builtin.value ~why:(
-  Type_argument {parent_path = path_or_null; position = 1; arity = 1})
+let or_null_argument_sort = Jkind.Sort.Const.value
 
 let mk_add_type add_type
       ?manifest type_ident
@@ -290,16 +293,14 @@ let mk_add_type1 add_type type_ident
   add_type type_ident decl env
 
 let mk_add_extension add_extension id args =
-  List.iter (fun (_, jkind) ->
+  List.iter (fun (_, sort) ->
       let raise_error () = Misc.fatal_error
           "sanity check failed: non-value jkind in predef extension \
             constructor; should this have Constructor_mixed shape?" in
-      match Jkind.get_layout jkind with
-      | Some (Base Value) -> ()
-      | Some (Any
-              | Base (Void | Float32 | Float64 | Word | Bits8 | Bits16 | Bits32 | Bits64 | Vec128)
-              | Product _)
-      | None -> raise_error ())
+      match (sort : Jkind.Sort.Const.t) with
+      | Base Value -> ()
+      | Base (Void | Float32 | Float64 | Word | Bits8 | Bits16 | Bits32 | Bits64 | Vec128)
+      | Product _ -> raise_error ())
     args;
   add_extension id
     { ext_type_path = path_exn;
@@ -307,10 +308,10 @@ let mk_add_extension add_extension id args =
       ext_args =
         Cstr_tuple
           (List.map
-            (fun (ca_type, ca_jkind) ->
+            (fun (ca_type, ca_sort) ->
               {
                 ca_type;
-                ca_jkind;
+                ca_sort;
                 ca_modalities=Mode.Modality.Value.Const.id;
                 ca_loc=Location.none
               })
@@ -328,20 +329,20 @@ let mk_add_extension add_extension id args =
 
 let variant constrs =
   let mk_elt { cd_args } =
-    let jkinds = match cd_args with
+    let sorts = match cd_args with
       | Cstr_tuple args ->
-        Misc.Stdlib.Array.of_list_map (fun { ca_jkind } -> ca_jkind) args
+        Misc.Stdlib.Array.of_list_map (fun { ca_sort } -> ca_sort) args
       | Cstr_record lbls ->
-        Misc.Stdlib.Array.of_list_map (fun { ld_jkind } -> ld_jkind) lbls
+        Misc.Stdlib.Array.of_list_map (fun { ld_sort } -> ld_sort) lbls
     in
-    Constructor_uniform_value, jkinds
+    Constructor_uniform_value, sorts
   in
   Type_variant (constrs,
                 Variant_boxed (Misc.Stdlib.Array.of_list_map mk_elt constrs))
 
-let unrestricted tvar jkind =
+let unrestricted tvar ca_sort =
   {ca_type=tvar;
-   ca_jkind=jkind;
+   ca_sort;
    ca_modalities=Mode.Modality.Value.Const.id;
    ca_loc=Location.none}
 
@@ -351,7 +352,6 @@ let build_initial_env add_type add_extension empty_env =
   let add_type = mk_add_type add_type
   and add_type1 = mk_add_type1 add_type
   and add_extension = mk_add_extension add_extension in
-  let list_jkind = Jkind.Builtin.value ~why:Boxed_variant in
   empty_env
   (* Predefined types *)
   |> add_type1 ident_array
@@ -383,8 +383,8 @@ let build_initial_env add_type add_extension empty_env =
        ~separability:Separability.Ind
        ~kind:(fun tvar ->
          variant [cstr ident_nil [];
-                  cstr ident_cons [unrestricted tvar list_argument_jkind;
-                                   unrestricted (type_list tvar) list_jkind]])
+                  cstr ident_cons [unrestricted tvar list_argument_sort;
+                                   unrestricted (type_list tvar) list_sort]])
        ~jkind:list_jkind
   |> add_type ident_nativeint
       ~jkind:Jkind.Const.Builtin.immutable_data
@@ -393,35 +393,32 @@ let build_initial_env add_type add_extension empty_env =
        ~separability:Separability.Ind
        ~kind:(fun tvar ->
          variant [cstr ident_none [];
-                  cstr ident_some [unrestricted tvar option_argument_jkind]])
+                  cstr ident_some [unrestricted tvar option_argument_sort]])
        ~jkind:(Jkind.Builtin.value ~why:Boxed_variant)
   |> add_type ident_lexing_position
        ~kind:(
-         let lbl (field, field_type, jkind) =
+         let lbl (field, field_type) =
            let id = Ident.create_predef field in
              {
                ld_id=id;
                ld_mutable=Immutable;
                ld_modalities=Mode.Modality.Value.Const.id;
                ld_type=field_type;
-               ld_jkind=jkind;
+               ld_sort=Jkind.Sort.Const.value;
                ld_loc=Location.none;
                ld_attributes=[];
                ld_uid=Uid.of_predef_id id;
              }
          in
-         let immediate = Jkind.Builtin.immediate ~why:(Primitive ident_int) in
          let labels = List.map lbl [
-           ("pos_fname", type_string,
-            Jkind.of_builtin ~why:(Primitive ident_string)
-              Jkind.Const.Builtin.immutable_data);
-           ("pos_lnum", type_int, immediate);
-           ("pos_bol", type_int, immediate);
-           ("pos_cnum", type_int, immediate) ]
+           ("pos_fname", type_string);
+           ("pos_lnum", type_int);
+           ("pos_bol", type_int);
+           ("pos_cnum", type_int) ]
          in
          Type_record (
            labels,
-           (Record_boxed (List.map (fun label -> label.ld_jkind) labels |> Array.of_list))
+           (Record_boxed (List.map (fun label -> label.ld_sort) labels |> Array.of_list))
          )
        )
        ~jkind:Jkind.Const.Builtin.immutable_data
@@ -437,25 +434,25 @@ let build_initial_env add_type add_extension empty_env =
   (* Predefined exceptions - alphabetical order *)
   |> add_extension ident_assert_failure
        [newgenty (Ttuple[None, type_string; None, type_int; None, type_int]),
-        Jkind.Builtin.value ~why:Tuple]
+        Jkind.Sort.Const.value]
   |> add_extension ident_division_by_zero []
   |> add_extension ident_end_of_file []
   |> add_extension ident_failure [type_string,
-       Jkind.Builtin.value ~why:(Primitive ident_string)]
+       Jkind.Sort.Const.value]
   |> add_extension ident_invalid_argument [type_string,
-       Jkind.Builtin.value ~why:(Primitive ident_string)]
+       Jkind.Sort.Const.value]
   |> add_extension ident_match_failure
        [newgenty (Ttuple[None, type_string; None, type_int; None, type_int]),
-       Jkind.Builtin.value ~why:Tuple]
+       Jkind.Sort.Const.value]
   |> add_extension ident_not_found []
   |> add_extension ident_out_of_memory []
   |> add_extension ident_stack_overflow []
   |> add_extension ident_sys_blocked_io []
   |> add_extension ident_sys_error [type_string,
-       Jkind.Builtin.value ~why:(Primitive ident_string)]
+       Jkind.Sort.Const.value]
   |> add_extension ident_undefined_recursive_module
        [newgenty (Ttuple[None, type_string; None, type_int; None, type_int]),
-       Jkind.Builtin.value ~why:Tuple]
+       Jkind.Sort.Const.value]
 
 let add_simd_stable_extension_types add_type env =
   let add_type = mk_add_type add_type in
@@ -489,7 +486,7 @@ let add_small_number_beta_extension_types add_type env =
 
 let or_null_kind tvar =
   variant [cstr ident_null [];
-           cstr ident_this [unrestricted tvar or_null_argument_jkind]]
+           cstr ident_this [unrestricted tvar or_null_argument_sort]]
 
 let add_or_null add_type env =
   let add_type1 = mk_add_type1 add_type in
diff --git a/typing/printtyped.ml b/typing/printtyped.ml
index af6d9915b4e..153dd95a48f 100644
--- a/typing/printtyped.ml
+++ b/typing/printtyped.ml
@@ -180,9 +180,9 @@ let tuple_component_label i ppf = function
 let typevars ppf vs =
   List.iter (typevar_jkind ~print_quote:true ppf) vs
 
-let jkind_array i ppf jkinds =
-  array (i+1) (fun _ ppf l -> fprintf ppf "%a;@ " Jkind.format l)
-    ppf jkinds
+let sort_array i ppf sorts =
+  array (i+1) (fun _ ppf l -> fprintf ppf "%a;@ " Jkind.Sort.Const.format l)
+    ppf sorts
 
 let tag ppf = let open Types in function
   | Ordinary {src_index;runtime_tag} ->
@@ -194,8 +194,8 @@ let variant_representation i ppf = let open Types in function
     line i ppf "Variant_unboxed\n"
   | Variant_boxed cstrs ->
     line i ppf "Variant_boxed %a\n"
-      (array (i+1) (fun _ ppf (_cstr, jkinds) ->
-         jkind_array (i+1) ppf jkinds))
+      (array (i+1) (fun _ ppf (_cstr, sorts) ->
+         sort_array (i+1) ppf sorts))
       cstrs
   | Variant_extensible -> line i ppf "Variant_inlined\n"
 
@@ -205,8 +205,8 @@ let flat_element i ppf flat_element =
 let record_representation i ppf = let open Types in function
   | Record_unboxed ->
     line i ppf "Record_unboxed\n"
-  | Record_boxed jkinds ->
-    line i ppf "Record_boxed %a\n" (jkind_array i) jkinds
+  | Record_boxed sorts ->
+    line i ppf "Record_boxed %a\n" (sort_array i) sorts
   | Record_inlined (t, _c, v) ->
     line i ppf "Record_inlined (%a, %a)\n" tag t (variant_representation i) v
   | Record_float -> line i ppf "Record_float\n"
diff --git a/typing/solver.ml b/typing/solver.ml
index fe57c143d40..ff972766eed 100644
--- a/typing/solver.ml
+++ b/typing/solver.ml
@@ -542,7 +542,7 @@ module Solver_mono (C : Lattices_mono) = struct
     in
     loop (C.max obj) VarMap.empty l
 
-  let get_conservative_ceil : type a l r. a C.obj -> (a, l * r) mode -> a =
+  let get_loose_ceil : type a l r. a C.obj -> (a, l * r) mode -> a =
    fun obj m ->
     match m with
     | Amode a -> a
@@ -552,7 +552,7 @@ module Solver_mono (C : Lattices_mono) = struct
     | Amodejoin (a, mvs) ->
       VarMap.fold (fun _ mv acc -> C.join obj acc (mupper obj mv)) mvs a
 
-  let get_conservative_floor : type a l r. a C.obj -> (a, l * r) mode -> a =
+  let get_loose_floor : type a l r. a C.obj -> (a, l * r) mode -> a =
    fun obj m ->
     match m with
     | Amode a -> a
@@ -563,7 +563,7 @@ module Solver_mono (C : Lattices_mono) = struct
       VarMap.fold (fun _ mv acc -> C.meet obj acc (mlower obj mv)) mvs a
 
   (* Due to our biased implementation, the ceil is precise. *)
-  let get_ceil = get_conservative_ceil
+  let get_ceil = get_loose_ceil
 
   let zap_to_ceil : type a l. a C.obj -> (a, l * allowed) mode -> log:_ -> a =
    fun obj m ~log ->
@@ -641,8 +641,8 @@ module Solver_mono (C : Lattices_mono) = struct
       type a l r.
       ?verbose:bool -> a C.obj -> Format.formatter -> (a, l * r) mode -> unit =
    fun ?verbose (obj : a C.obj) ppf m ->
-    let ceil = get_conservative_ceil obj m in
-    let floor = get_conservative_floor obj m in
+    let ceil = get_loose_ceil obj m in
+    let floor = get_loose_floor obj m in
     if C.le obj ceil floor
     then C.print obj ppf ceil
     else print_raw ?verbose obj ppf m
@@ -745,9 +745,9 @@ module Solvers_polarized (C : Lattices_mono) = struct
 
     let get_floor = S.get_floor
 
-    let get_conservative_ceil = S.get_conservative_ceil
+    let get_loose_ceil = S.get_loose_ceil
 
-    let get_conservative_floor = S.get_conservative_floor
+    let get_loose_floor = S.get_loose_floor
 
     let print ?(verbose = false) = S.print ~verbose
 
@@ -807,9 +807,9 @@ module Solvers_polarized (C : Lattices_mono) = struct
 
     let get_floor = S.get_ceil
 
-    let get_conservative_ceil = S.get_conservative_floor
+    let get_loose_ceil = S.get_loose_floor
 
-    let get_conservative_floor = S.get_conservative_ceil
+    let get_loose_floor = S.get_loose_ceil
 
     let print ?(verbose = false) = S.print ~verbose
 
diff --git a/typing/solver_intf.mli b/typing/solver_intf.mli
index 177693da723..8eed401fab4 100644
--- a/typing/solver_intf.mli
+++ b/typing/solver_intf.mli
@@ -256,14 +256,14 @@ module type Solver_polarized = sig
   val get_ceil : 'a obj -> ('a, 'l * allowed) mode -> 'a
 
   (** Similar to [get_floor] but does not run the further constraining needed
-      for a precise bound. As a result, the returned bound is conservative;
+      for a precise bound. As a result, the returned bound is loose;
       i.e., it might be lower than the real floor. *)
-  val get_conservative_floor : 'a obj -> ('a, 'l * 'r) mode -> 'a
+  val get_loose_floor : 'a obj -> ('a, 'l * 'r) mode -> 'a
 
   (** Similar to [get_ceil] but does not run the further constraining needed
-      for a precise bound. As a result, the returned bound is conservative;
+      for a precise bound. As a result, the returned bound is loose;
       i.e., it might be higher than the real ceil. *)
-  val get_conservative_ceil : 'a obj -> ('a, 'l * 'r) mode -> 'a
+  val get_loose_ceil : 'a obj -> ('a, 'l * 'r) mode -> 'a
 
   (** Printing a mode for debugging. *)
   val print :
diff --git a/typing/subst.ml b/typing/subst.ml
index aba4f23965c..d063669535e 100644
--- a/typing/subst.ml
+++ b/typing/subst.ml
@@ -124,11 +124,6 @@ let with_additional_action =
   in
   { s with additional_action; last_compose = None }
 
-let apply_prepare_jkind s lay loc =
-  match s.additional_action with
-  | Prepare_for_saving { prepare_jkind } -> prepare_jkind loc lay
-  | Duplicate_variables | No_action -> lay
-
 let change_locs s loc = { s with loc = Some loc; last_compose = None }
 
 let loc s x =
@@ -467,7 +462,7 @@ let label_declaration copy_scope s l =
     ld_id = l.ld_id;
     ld_mutable = l.ld_mutable;
     ld_modalities = l.ld_modalities;
-    ld_jkind = apply_prepare_jkind s l.ld_jkind l.ld_loc;
+    ld_sort = l.ld_sort;
     ld_type = typexp copy_scope s l.ld_loc l.ld_type;
     ld_loc = loc s l.ld_loc;
     ld_attributes = attrs s l.ld_attributes;
@@ -477,13 +472,7 @@ let label_declaration copy_scope s l =
 let constructor_argument copy_scope s ca =
   {
     ca_type = typexp copy_scope s ca.ca_loc ca.ca_type;
-    ca_jkind = begin match s.additional_action with
-      | Prepare_for_saving { prepare_jkind } ->
-        prepare_jkind ca.ca_loc ca.ca_jkind
-      (* CR layouts v2.8: This will have to be copied once we
-         have with-types. *)
-      | Duplicate_variables | No_action -> ca.ca_jkind
-    end;
+    ca_sort = ca.ca_sort;
     ca_loc = loc s ca.ca_loc;
     ca_modalities = ca.ca_modalities;
   }
@@ -504,30 +493,6 @@ let constructor_declaration copy_scope s c =
     cd_uid = c.cd_uid;
   }
 
-(* called only when additional_action is [Prepare_for_saving] *)
-let variant_representation ~prepare_jkind loc = function
-  | Variant_unboxed -> Variant_unboxed
-  | Variant_boxed cstrs_and_jkinds  ->
-    Variant_boxed
-      (Array.map
-         (fun (cstr, jkinds) -> cstr, Array.map (prepare_jkind loc) jkinds)
-         cstrs_and_jkinds)
-  | Variant_extensible -> Variant_extensible
-
-(* called only when additional_action is [Prepare_for_saving] *)
-let record_representation ~prepare_jkind loc = function
-  | Record_unboxed -> Record_unboxed
-  | Record_inlined (tag, constructor_rep, variant_rep) ->
-    Record_inlined (tag,
-                    constructor_rep,
-                    variant_representation ~prepare_jkind loc variant_rep)
-  | Record_boxed lays ->
-      Record_boxed (Array.map (prepare_jkind loc) lays)
-  | (Record_float | Record_ufloat | Record_mixed _) as rep -> rep
-
-let record_unboxed_product_representation ~prepare_jkind:_ _loc = function
-  | Record_unboxed_product -> Record_unboxed_product
-
 let type_declaration' copy_scope s decl =
   { type_params = List.map (typexp copy_scope s decl.type_loc) decl.type_params;
     type_arity = decl.type_arity;
@@ -535,30 +500,11 @@ let type_declaration' copy_scope s decl =
       begin match decl.type_kind with
         Type_abstract r -> Type_abstract r
       | Type_variant (cstrs, rep) ->
-          let rep =
-            match s.additional_action with
-            | No_action | Duplicate_variables -> rep
-            | Prepare_for_saving { prepare_jkind } ->
-                variant_representation ~prepare_jkind decl.type_loc rep
-          in
           Type_variant (List.map (constructor_declaration copy_scope s) cstrs,
                         rep)
       | Type_record(lbls, rep) ->
-          let rep =
-            match s.additional_action with
-            | No_action | Duplicate_variables -> rep
-            | Prepare_for_saving { prepare_jkind } ->
-                record_representation ~prepare_jkind decl.type_loc rep
-          in
           Type_record (List.map (label_declaration copy_scope s) lbls, rep)
       | Type_record_unboxed_product(lbls, rep) ->
-          let rep =
-            match s.additional_action with
-            | No_action | Duplicate_variables -> rep
-            | Prepare_for_saving { prepare_jkind } ->
-              record_unboxed_product_representation
-                ~prepare_jkind decl.type_loc rep
-          in
           Type_record_unboxed_product
             (List.map (label_declaration copy_scope s) lbls, rep)
       | Type_open -> Type_open
diff --git a/typing/typecore.ml b/typing/typecore.ml
index 08610a2be8a..c6abb0b1a26 100644
--- a/typing/typecore.ml
+++ b/typing/typecore.ml
@@ -1456,7 +1456,7 @@ and build_as_type_aux (env : Env.t) p ~mode =
         else Value.newvar ()
       in
       let keep =
-        priv || cstr.cstr_existentials <> [] ||
+        priv ||
         vto <> None (* be lazy and keep the type for node constraints *) in
       let ty =
         if keep then p.pat_type else
@@ -9634,7 +9634,8 @@ and type_comprehension_expr ~loc ~env ~ty_expected ~attributes cexpr =
         container_type,
         (fun tcomp ->
           Texp_array_comprehension
-            (mut, Jkind.Sort.for_array_comprehension_element, tcomp)),
+            (mut, Jkind.Sort.of_const
+                    Jkind.Sort.Const.for_array_comprehension_element, tcomp)),
         comp,
         (* CR layouts v4: When this changes from [value], you will also have to
            update the use of [transl_exp] in transl_array_comprehension.ml. See
diff --git a/typing/typedecl.ml b/typing/typedecl.ml
index 179f6fb15f2..4fa6cec55c9 100644
--- a/typing/typedecl.ml
+++ b/typing/typedecl.ml
@@ -484,8 +484,8 @@ let transl_labels (type rep) ~(record_form : rep record_form) ~new_var_jkind
          {Types.ld_id = ld.ld_id;
           ld_mutable = ld.ld_mutable;
           ld_modalities = ld.ld_modalities;
-          ld_jkind = Jkind.Builtin.any ~why:Dummy_jkind;
-            (* Updated by [update_label_jkinds] *)
+          ld_sort = Jkind.Sort.Const.void;
+            (* Updated by [update_label_sorts] *)
           ld_type = ty;
           ld_loc = ld.ld_loc;
           ld_attributes = ld.ld_attributes;
@@ -516,8 +516,8 @@ let transl_types_gf ~new_var_jkind ~allow_unboxed
       Types.ca_modalities = ca.ca_modalities;
       ca_loc = ca.ca_loc;
       ca_type = ca.ca_type.ctyp_type;
-      ca_jkind = Jkind.Builtin.any ~why:Dummy_jkind;
-            (* Updated by [update_constructor_arguments_jkinds] *)
+      ca_sort = Jkind.Sort.Const.void;
+            (* Updated by [update_constructor_arguments_sorts] *)
     }) tyl_gfl
   in
   tyl_gfl, tyl_gfl'
@@ -546,7 +546,7 @@ let transl_constructor_arguments ~new_var_jkind ~unboxed
 (* Note that [make_constructor] does not fill in the [ld_jkind] field of any
    computed record types, because it's called too early in the translation of a
    type declaration to compute accurate jkinds in the presence of recursively
-   defined types. It is updated later by [update_constructor_arguments_jkinds]
+   defined types. It is updated later by [update_constructor_arguments_sorts]
 *)
 let make_constructor
       env loc ~cstr_path ~type_path ~unboxed type_params svars
@@ -797,7 +797,6 @@ let transl_declaration env sdecl (id, uid) =
       let cty = transl_simple_type ~new_var_jkind:Any env ~closed:no_row Mode.Alloc.Const.legacy sty in
       Some cty, Some cty.ctyp_type
   in
-  let any = Jkind.Builtin.any ~why:Initial_typedecl_env in
   (* jkind_default is the jkind to use for now as the type_jkind when there
      is no annotation and no manifest.
      See Note [Default jkinds in transl_declaration].
@@ -891,21 +890,21 @@ let transl_declaration env sdecl (id, uid) =
             Variant_unboxed,
             Jkind.of_new_legacy_sort ~why:Old_style_unboxed_type
           else
-            (* We mark all arg jkinds "any" here.  They are updated later,
-               after the circular type checks make it safe to check jkinds.
+            (* We mark all arg sorts "void" here.  They are updated later,
+               after the circular type checks make it safe to check sorts.
                Likewise, [Constructor_uniform_value] is potentially wrong
                and will be updated later.
             *)
             Variant_boxed (
               Array.map
                 (fun cstr ->
-                   let jkinds =
+                   let sorts =
                      match Types.(cstr.cd_args) with
                      | Cstr_tuple args ->
-                       Array.make (List.length args) any
-                     | Cstr_record _ -> [| any |]
+                       Array.make (List.length args) Jkind.Sort.Const.void
+                     | Cstr_record _ -> [| Jkind.Sort.Const.value |]
                    in
-                   Constructor_uniform_value, jkinds)
+                   Constructor_uniform_value, sorts)
                 (Array.of_list cstrs)
             ),
             Jkind.Builtin.value ~why:Boxed_variant
@@ -927,8 +926,8 @@ let transl_declaration env sdecl (id, uid) =
             (* Note this is inaccurate, using `Record_boxed` in cases where the
                correct representation is [Record_float], [Record_ufloat], or
                [Record_mixed].  Those cases are fixed up after we can get
-               accurate jkinds for the fields, in [update_decl_jkind]. *)
-              Record_boxed (Array.make (List.length lbls) any),
+               accurate sorts for the fields, in [update_decl_jkind]. *)
+              Record_boxed (Array.make (List.length lbls) Jkind.Sort.Const.void),
               Jkind.Builtin.value ~why:Boxed_record
           in
           Ttype_record lbls, Type_record(lbls', rep), jkind
@@ -942,7 +941,9 @@ let transl_declaration env sdecl (id, uid) =
           (* The jkinds below, and the ones in [lbls], are dummy jkinds which
              are replaced and made to correspond to each other in
              [update_decl_jkind]. *)
-          let jkind_ls = List.map (fun _ -> any) lbls in
+          let jkind_ls =
+            List.map (fun _ -> Jkind.Builtin.any ~why:Initial_typedecl_env) lbls
+          in
           let jkind = Jkind.Builtin.product ~why:Unboxed_record jkind_ls in
           Ttype_record_unboxed_product lbls,
           Type_record_unboxed_product(lbls', Record_unboxed_product), jkind
@@ -1237,59 +1238,70 @@ let check_coherence env loc dpath decl =
 let check_abbrev env sdecl (id, decl) =
   (id, check_coherence env sdecl.ptype_loc (Path.Pident id) decl)
 
-(* The [update_x_jkinds] functions infer more precise jkinds in the type kind,
+(* The [update_x_sorts] functions infer more precise jkinds in the type kind,
    including which fields of a record are void.  This would be hard to do during
    [transl_declaration] due to mutually recursive types.
 *)
-(* [update_label_jkinds] additionally returns whether all the jkinds
-   were void *)
-let update_label_jkinds env loc lbls named =
-  (* [named] is [Some jkinds] for top-level records (we will update the
-     jkinds) and [None] for inlined records. *)
+(* [update_label_sorts] additionally returns whether all the jkinds
+   were void, and the jkinds of the labels *)
+let update_label_sorts env loc lbls named =
+  (* [named] is [Some sorts] for top-level records (we will update the
+     sorts) and [None] for inlined records. *)
   (* CR layouts v5: it wouldn't be too hard to support records that are all
      void.  just needs a bit of refactoring in translcore *)
   let update =
     match named with
     | None -> fun _ _ -> ()
-    | Some jkinds -> fun idx jkind -> jkinds.(idx) <- jkind
+    | Some sorts -> fun idx sort -> sorts.(idx) <- sort
   in
-  let lbls =
+  let lbls_and_jkinds =
     List.mapi (fun idx (Types.{ld_type} as lbl) ->
-      let ld_jkind = Ctype.type_jkind env ld_type in
-      update idx ld_jkind;
-      {lbl with ld_jkind}
+      let jkind = Ctype.type_jkind env ld_type in
+      (* Next line guaranteed to be safe because of [check_representable] *)
+      let sort = Jkind.sort_of_jkind jkind in
+      let ld_sort = Jkind.Sort.default_to_value_and_get sort in
+      update idx ld_sort;
+      {lbl with ld_sort}, jkind
     ) lbls
   in
-  if List.for_all (fun l -> Jkind.is_void_defaulting l.ld_jkind) lbls then
+  let lbls, jkinds = List.split lbls_and_jkinds in
+  if List.for_all (fun l -> Jkind.Sort.Const.(equal void l.ld_sort)) lbls then
     raise (Error (loc, Jkind_empty_record))
-  else lbls, false
+  else lbls, false, jkinds
 (* CR layouts v5: return true for a record with all voids *)
 
 (* In addition to updated constructor arguments, returns whether
    all arguments are void, useful for detecting enumerations that
    can be [immediate]. *)
-let update_constructor_arguments_jkinds env loc cd_args jkinds =
+let update_constructor_arguments_sorts env loc cd_args sorts =
   let update =
-    match jkinds with
+    match sorts with
     | None -> fun _ _ -> ()
-    | Some jkinds -> fun idx jkind -> jkinds.(idx) <- jkind
+    | Some sorts -> fun idx sort -> sorts.(idx) <- sort
   in
   match cd_args with
   | Types.Cstr_tuple args ->
-    let args =
+    let args_and_jkinds =
       List.mapi (fun idx ({Types.ca_type; _} as arg) ->
-        let ca_jkind = Ctype.type_jkind env ca_type in
-        update idx ca_jkind;
-        {arg with ca_jkind}) args
+          let jkind = Ctype.type_jkind env ca_type in
+          (* Next line guaranteed to be safe because of [check_representable] *)
+          let sort = Jkind.sort_of_jkind jkind in
+          let ca_sort = Jkind.Sort.default_to_value_and_get sort in
+          update idx ca_sort;
+          {arg with ca_sort}, jkind)
+        args
     in
+    let args, jkinds = List.split args_and_jkinds in
     Types.Cstr_tuple args,
-    List.for_all (fun { ca_jkind } -> Jkind.is_void_defaulting ca_jkind) args
+    List.for_all
+      (fun { ca_sort } -> Jkind_types.Sort.Const.(equal void ca_sort)) args,
+    jkinds
   | Types.Cstr_record lbls ->
-    let lbls, all_void =
-      update_label_jkinds env loc lbls None
+    let lbls, all_void, jkinds =
+      update_label_sorts env loc lbls None
     in
-    update 0 (Jkind.Builtin.value ~why:Boxed_record);
-    Types.Cstr_record lbls, all_void
+    update 0 Jkind.Sort.Const.value;
+    Types.Cstr_record lbls, all_void, jkinds
 
 let assert_mixed_product_support =
   let required_reserved_header_bits = 8 in
@@ -1449,17 +1461,17 @@ module Element_repr = struct
 end
 
 let update_constructor_representation
-    env (cd_args : Types.constructor_arguments) ~loc
+    env (cd_args : Types.constructor_arguments) arg_jkinds ~loc
     ~is_extension_constructor
   =
   let flat_suffix =
     match cd_args with
     | Cstr_tuple arg_types_and_modes ->
         let arg_reprs =
-          List.map (fun {Types.ca_type=arg_type; ca_jkind=arg_jkind; _} ->
+          List.map2 (fun {Types.ca_type=arg_type; _} arg_jkind ->
             let kloc : jkind_sort_loc = Cstr_tuple { unboxed = false } in
             Element_repr.classify env loc kloc arg_type arg_jkind, arg_type)
-            arg_types_and_modes
+            arg_types_and_modes arg_jkinds
         in
         Element_repr.mixed_product_shape loc arg_reprs Cstr_tuple
           ~on_flat_field_expected:(fun ~non_value ~boxed ->
@@ -1472,11 +1484,11 @@ let update_constructor_representation
               raise (Error (loc, Illegal_mixed_product violation)))
     | Cstr_record fields ->
         let arg_reprs =
-          List.map (fun ld ->
+          List.map2 (fun ld arg_jkind ->
               let kloc = Inlined_record { unboxed = false } in
-              Element_repr.classify env loc kloc ld.Types.ld_type ld.ld_jkind,
+              Element_repr.classify env loc kloc ld.Types.ld_type arg_jkind,
               ld)
-            fields
+            fields arg_jkinds
         in
         Element_repr.mixed_product_shape loc arg_reprs Cstr_record
           ~on_flat_field_expected:(fun ~non_value ~boxed ->
@@ -1528,20 +1540,24 @@ let update_decl_jkind env dpath decl =
   let update_record_kind loc lbls rep =
     match lbls, rep with
     | [Types.{ld_type} as lbl], Record_unboxed ->
-      let ld_jkind = Ctype.type_jkind env ld_type in
-      [{lbl with ld_jkind}], Record_unboxed, ld_jkind
-    | _, Record_boxed jkinds ->
-      let lbls, all_void =
-        update_label_jkinds env loc lbls (Some jkinds)
+      let jkind = Ctype.type_jkind env ld_type in
+      (* This next line is guaranteed to be OK because of a call to
+         [check_representable] *)
+      let sort = Jkind.sort_of_jkind jkind in
+      let ld_sort = Jkind.Sort.default_to_value_and_get sort in
+      [{lbl with ld_sort}], Record_unboxed, jkind
+    | _, Record_boxed sorts ->
+      let lbls, all_void, jkinds =
+        update_label_sorts env loc lbls (Some sorts)
       in
       let jkind = Jkind.for_boxed_record ~all_void in
       let reprs =
-        List.mapi
-          (fun i lbl ->
+        List.map2
+          (fun lbl jkind ->
              let kloc = Record { unboxed = false } in
-             Element_repr.classify env loc kloc lbl.Types.ld_type jkinds.(i),
+             Element_repr.classify env loc kloc lbl.Types.ld_type jkind,
              lbl)
-          lbls
+          lbls jkinds
       in
       let repr_summary =
         { values = false; imms = false; floats = false; float64s = false;
@@ -1642,16 +1658,20 @@ let update_decl_jkind env dpath decl =
     | [{Types.cd_args} as cstr], Variant_unboxed -> begin
         match cd_args with
         | Cstr_tuple [{ca_type=ty; _} as arg] -> begin
-            let ca_jkind = Ctype.type_jkind env ty in
+            let jkind = Ctype.type_jkind env ty in
+            let sort = Jkind.sort_of_jkind jkind in
+            let ca_sort = Jkind.Sort.default_to_value_and_get sort in
             [{ cstr with Types.cd_args =
-                           Cstr_tuple [{ arg with ca_jkind }] }],
-            Variant_unboxed, ca_jkind
+                           Cstr_tuple [{ arg with ca_sort }] }],
+            Variant_unboxed, jkind
           end
         | Cstr_record [{ld_type} as lbl] -> begin
-            let ld_jkind = Ctype.type_jkind env ld_type in
+            let jkind = Ctype.type_jkind env ld_type in
+            let sort = Jkind.sort_of_jkind jkind in
+            let ld_sort = Jkind.Sort.default_to_value_and_get sort in
             [{ cstr with Types.cd_args =
-                           Cstr_record [{ lbl with ld_jkind }] }],
-            Variant_unboxed, ld_jkind
+                           Cstr_record [{ lbl with ld_sort }] }],
+            Variant_unboxed, jkind
           end
         | (Cstr_tuple ([] | _ :: _ :: _) | Cstr_record ([] | _ :: _ :: _)) ->
           assert false
@@ -1659,27 +1679,27 @@ let update_decl_jkind env dpath decl =
     | cstrs, Variant_boxed cstr_shapes ->
       let (_,cstrs,all_voids) =
         List.fold_left (fun (idx,cstrs,all_voids) cstr ->
-          let arg_jkinds =
+          let arg_sorts =
             match cstr_shapes.(idx) with
-            | Constructor_uniform_value, arg_jkinds -> arg_jkinds
+            | Constructor_uniform_value, arg_sorts -> arg_sorts
             | Constructor_mixed _, _ ->
                 fatal_error
                   "Typedecl.update_variant_kind doesn't expect mixed \
                    constructor as input"
           in
-          let cd_args, all_void =
-            update_constructor_arguments_jkinds env cstr.Types.cd_loc
-              cstr.Types.cd_args (Some arg_jkinds)
+          let cd_args, all_void, jkinds =
+            update_constructor_arguments_sorts env cstr.Types.cd_loc
+              cstr.Types.cd_args (Some arg_sorts)
           in
           let cstr_repr =
-            update_constructor_representation env cd_args
+            update_constructor_representation env cd_args jkinds
               ~is_extension_constructor:false
               ~loc:cstr.Types.cd_loc
           in
           let () =
             match cstr_repr with
             | Constructor_uniform_value -> ()
-            | Constructor_mixed _ -> cstr_shapes.(idx) <- cstr_repr, arg_jkinds
+            | Constructor_mixed _ -> cstr_shapes.(idx) <- cstr_repr, arg_sorts
           in
           let cstr = { cstr with Types.cd_args } in
           (idx+1,cstr::cstrs,all_voids && all_void)
@@ -1720,8 +1740,12 @@ let update_decl_jkind env dpath decl =
         | Record_unboxed_product ->
           let lbls, jkinds =
             List.map (fun (Types.{ld_type} as lbl) ->
-              let ld_jkind = Ctype.type_jkind env ld_type in
-              {lbl with ld_jkind}, ld_jkind
+              let jkind = Ctype.type_jkind env ld_type in
+              (* This next line is guaranteed to be OK because of a call to
+                 [check_representable] *)
+              let sort = Jkind.sort_of_jkind jkind in
+              let ld_sort = Jkind.Sort.default_to_value_and_get sort in
+              {lbl with ld_sort}, jkind
             ) lbls
             |> List.split
           in
@@ -2402,11 +2426,11 @@ let transl_extension_constructor_decl
       ~cstr_path:(Pident id) ~type_path ~unboxed:false typext_params
       svars sargs sret_type
   in
-  let args, constant =
-    update_constructor_arguments_jkinds env loc args None
+  let args, constant, jkinds =
+    update_constructor_arguments_sorts env loc args None
   in
   let constructor_shape =
-    update_constructor_representation env args ~loc
+    update_constructor_representation env args jkinds ~loc
       ~is_extension_constructor:true
   in
   args, constructor_shape, constant, ret_type,
diff --git a/typing/typedtree.ml b/typing/typedtree.ml
index 6263321b1b5..c78b97000c9 100644
--- a/typing/typedtree.ml
+++ b/typing/typedtree.ml
@@ -1082,18 +1082,31 @@ let rec iter_bound_idents
        { f = fun p -> iter_bound_idents f p }
        d
 
-type full_bound_ident_action =
-  Ident.t -> string loc -> type_expr -> Uid.t -> Mode.Value.l -> Jkind.sort -> unit
+type 'sort full_bound_ident_action =
+  Ident.t -> string loc -> type_expr -> Uid.t -> Mode.Value.l -> 'sort -> unit
+
+let for_transl f =
+  f ~of_sort:Jkind.Sort.default_for_transl_and_get ~of_const_sort:Fun.id
+
+let for_typing f =
+  f ~of_sort:Fun.id ~of_const_sort:Jkind.Sort.of_const
 
 (* The intent is that the sort should be the sort of the type of the pattern.
    It's used to avoid computing jkinds from types.  `f` then gets passed
    the sorts of the variables.
 
    This is occasionally used in places where we don't actually know
-   about the sort of the pattern but `f` doesn't care about the sorts. *)
-let iter_pattern_full ~both_sides_of_or f sort pat =
+   about the sort of the pattern but `f` doesn't care about the sorts.
+
+   Because this should work both over [Jkind.Sort.t] and [Jkind.Sort.Const.t],
+   this takes conversion functions [of_sort : Jkind.Sort.t -> 'sort] and
+   [of_const_sort : Jkind.Sort.Const.t -> 'sort]. The need for these is somewhat
+   unfortunate, but it's worth it to allow [Jkind.Sort.Const.t] to be used
+   throughout the transl process. *)
+let iter_pattern_full ~of_sort ~of_const_sort ~both_sides_of_or f sort pat =
+  let value = of_const_sort Jkind.Sort.Const.value in
   let rec loop :
-    type k . full_bound_ident_action -> Jkind.sort -> k general_pattern -> _ =
+    type k . 'sort full_bound_ident_action -> 'sort -> k general_pattern -> _ =
     fun f sort pat ->
       match pat.pat_desc with
       (* Cases where we push the sort inwards: *)
@@ -1112,56 +1125,55 @@ let iter_pattern_full ~both_sides_of_or f sort pat =
             match cstr.cstr_repr with
             | Variant_unboxed -> [ sort ]
             | Variant_boxed _ | Variant_extensible ->
-              (List.map (fun { ca_jkind } -> Jkind.sort_of_jkind ca_jkind )
+              (List.map (fun { ca_sort } -> of_const_sort ca_sort )
                  cstr.cstr_args)
           in
           List.iter2 (loop f) sorts patl
       | Tpat_record (lbl_pat_list, _) ->
           List.iter (fun (_, lbl, pat) ->
-            (loop f) (Jkind.sort_of_jkind lbl.lbl_jkind) pat)
+            (loop f) (of_const_sort lbl.lbl_sort) pat)
             lbl_pat_list
       | Tpat_record_unboxed_product (lbl_pat_list, _) ->
           List.iter (fun (_, lbl, pat) ->
-            (loop f) (Jkind.sort_of_jkind lbl.lbl_jkind) pat)
+            (loop f) (of_const_sort lbl.lbl_sort) pat)
             lbl_pat_list
       (* Cases where the inner things must be value: *)
-      | Tpat_variant (_, pat, _) -> Option.iter (loop f Jkind.Sort.value) pat
+      | Tpat_variant (_, pat, _) -> Option.iter (loop f value) pat
       | Tpat_tuple patl ->
-        List.iter (fun (_, pat) -> loop f Jkind.Sort.value pat) patl
+        List.iter (fun (_, pat) -> loop f value pat) patl
         (* CR layouts v5: tuple case to change when we allow non-values in
            tuples *)
       | Tpat_unboxed_tuple patl ->
-        List.iter (fun (_, pat, sort) -> loop f sort pat) patl
-      | Tpat_array (_, arg_sort, patl) -> List.iter (loop f arg_sort) patl
-      | Tpat_lazy p | Tpat_exception p -> loop f Jkind.Sort.value p
+        List.iter (fun (_, pat, sort) -> loop f (of_sort sort) pat) patl
+      | Tpat_array (_, arg_sort, patl) ->
+        List.iter (loop f (of_sort arg_sort)) patl
+      | Tpat_lazy p | Tpat_exception p -> loop f value p
       (* Cases without variables: *)
       | Tpat_any | Tpat_constant _ -> ()
   in
   loop f sort pat
 
-let rev_pat_bound_idents_full sort pat =
+let rev_pat_bound_idents_full ~of_sort ~of_const_sort sort pat =
   let idents_full = ref [] in
   let add id sloc typ uid _ sort =
     idents_full := (id, sloc, typ, uid, sort) :: !idents_full
   in
-  iter_pattern_full ~both_sides_of_or:false add sort pat;
+  iter_pattern_full
+    ~both_sides_of_or:false ~of_sort ~of_const_sort
+    add sort pat;
   !idents_full
 
 let rev_only_idents idents_full =
   List.rev_map (fun (id,_,_,_,_) -> id) idents_full
 
-let rev_only_idents_and_types idents_full =
-  List.rev_map (fun (id,_,ty,_,_) -> (id,ty)) idents_full
-
 let pat_bound_idents_full sort pat =
-  List.rev (rev_pat_bound_idents_full sort pat)
+  List.rev (for_transl rev_pat_bound_idents_full sort pat)
 
 (* In these two, we don't know the sort, but the sort information isn't used so
    it's fine to lie. *)
-let pat_bound_idents_with_types pat =
-  rev_only_idents_and_types (rev_pat_bound_idents_full Jkind.Sort.value pat)
 let pat_bound_idents pat =
-  rev_only_idents (rev_pat_bound_idents_full Jkind.Sort.value pat)
+  rev_only_idents
+    (for_typing rev_pat_bound_idents_full Jkind.Sort.value pat)
 
 let rev_let_bound_idents_full bindings =
   let idents_full = ref [] in
@@ -1176,7 +1188,9 @@ let let_bound_idents_with_modes_sorts_and_checks bindings =
   in
   let checks =
     List.fold_left (fun checks vb ->
-      iter_pattern_full ~both_sides_of_or:true f vb.vb_sort vb.vb_pat;
+      for_typing iter_pattern_full
+        ~both_sides_of_or:true
+        f vb.vb_sort vb.vb_pat;
        match vb.vb_pat.pat_desc, vb.vb_expr.exp_desc with
        | Tpat_var (id, _, _, _), Texp_function fn ->
          let zero_alloc =
diff --git a/typing/typedtree.mli b/typing/typedtree.mli
index d95ca7173a0..1b51ff29bd5 100644
--- a/typing/typedtree.mli
+++ b/typing/typedtree.mli
@@ -1271,11 +1271,9 @@ val mknoloc: 'a -> 'a Asttypes.loc
 val mkloc: 'a -> Location.t -> 'a Asttypes.loc
 
 val pat_bound_idents: 'k general_pattern -> Ident.t list
-val pat_bound_idents_with_types:
-  'k general_pattern -> (Ident.t * Types.type_expr) list
 val pat_bound_idents_full:
-  Jkind.sort -> 'k general_pattern
-  -> (Ident.t * string loc * Types.type_expr * Types.Uid.t * Jkind.sort) list
+  Jkind.Sort.Const.t -> 'k general_pattern
+  -> (Ident.t * string loc * Types.type_expr * Types.Uid.t * Jkind.Sort.Const.t) list
 
 (** Splits an or pattern into its value (left) and exception (right) parts. *)
 val split_pattern:
diff --git a/typing/typeopt.ml b/typing/typeopt.ml
index 793f3270778..eae41b8c550 100644
--- a/typing/typeopt.ml
+++ b/typing/typeopt.ml
@@ -136,7 +136,7 @@ type 'a classification =
    See comment on [classification] above to understand [classify_product]. *)
 let classify ~classify_product env loc ty sort : _ classification =
   let ty = scrape_ty env ty in
-  match Jkind.(Sort.default_to_value_and_get sort) with
+  match (sort : Jkind.Sort.Const.t) with
   | Base Value -> begin
   if is_always_gc_ignorable env ty then Int
   else match get_desc ty with
@@ -231,7 +231,8 @@ let array_kind_of_elt ~elt_sort env loc ty =
     match elt_sort with
     | Some s -> s
     | None ->
-      type_legacy_sort ~why:Array_element env loc ty
+      Jkind.Sort.default_for_transl_and_get
+        (type_legacy_sort ~why:Array_element env loc ty)
   in
   let classify_product ty sorts =
     if Language_extension.(is_at_least Layouts Alpha) then
@@ -577,7 +578,7 @@ and value_kind_variant env ~loc ~visited ~depth ~num_nodes_visited
         value_kind env ~loc ~visited ~depth ~num_nodes_visited ty
       | _ -> assert false
     end
-  | Variant_boxed cstrs_and_jkinds ->
+  | Variant_boxed cstrs_and_sorts ->
     let depth = depth + 1 in
     let for_constructor_fields fields ~depth ~num_nodes_visited ~field_to_type =
       List.fold_left_map
@@ -665,7 +666,7 @@ and value_kind_variant env ~loc ~visited ~depth ~num_nodes_visited
           | None -> None
           | Some (num_nodes_visited,
                   next_const, consts, next_tag, non_consts) ->
-            let cstr_shape, _ = cstrs_and_jkinds.(idx) in
+            let cstr_shape, _ = cstrs_and_sorts.(idx) in
             let (is_mutable, num_nodes_visited), fields =
               for_one_constructor constructor ~depth ~num_nodes_visited
                 ~cstr_shape
@@ -834,8 +835,7 @@ let[@inline always] rec layout_of_const_sort_generic ~value_kind ~error
     error const
 
 let layout env loc sort ty =
-  layout_of_const_sort_generic
-    (Jkind.Sort.default_to_value_and_get sort)
+  layout_of_const_sort_generic sort
     ~value_kind:(lazy (value_kind env loc ty))
     ~error:(function
       | Base Value -> assert false
@@ -854,9 +854,7 @@ let layout env loc sort ty =
     )
 
 let layout_of_sort loc sort =
-  layout_of_const_sort_generic
-    (Jkind.Sort.default_to_value_and_get sort)
-    ~value_kind:(lazy Lambda.generic_value)
+  layout_of_const_sort_generic sort ~value_kind:(lazy Lambda.generic_value)
     ~error:(function
     | Base Value -> assert false
     | Base Void ->
@@ -872,7 +870,7 @@ let layout_of_sort loc sort =
                            (Jkind.Sort.of_const const, Stable, None)))
     )
 
-let layout_of_const_sort c =
+let layout_of_non_void_sort c =
   layout_of_const_sort_generic
     c
     ~value_kind:(lazy Lambda.generic_value)
@@ -898,7 +896,7 @@ let function_arg_layout env loc sort ty =
 (** Whether a forward block is needed for a lazy thunk on a value, i.e.
     if the value can be represented as a float/forward/lazy *)
 let lazy_val_requires_forward env loc ty =
-  let sort = Jkind.Sort.for_lazy_body in
+  let sort = Jkind.Sort.Const.for_lazy_body in
   let classify_product _ sorts =
     let kind = Jkind.Sort.Const.Product sorts in
     raise (Error (loc, Unsupported_product_in_lazy kind))
diff --git a/typing/typeopt.mli b/typing/typeopt.mli
index a990f654e53..5ee744b6698 100644
--- a/typing/typeopt.mli
+++ b/typing/typeopt.mli
@@ -26,16 +26,16 @@ val maybe_pointer : Typedtree.expression -> Lambda.immediate_or_pointer
 (* Supplying [None] for [elt_sort] should be avoided when possible. It
    will result in a call to [Ctype.type_sort] which can be expensive. *)
 val array_type_kind :
-  elt_sort:(Jkind.Sort.t option)
+  elt_sort:(Jkind.Sort.Const.t option)
   -> Env.t -> Location.t -> Types.type_expr -> Lambda.array_kind
 val array_type_mut : Env.t -> Types.type_expr -> Lambda.mutable_flag
 val array_kind_of_elt :
-  elt_sort:(Jkind.Sort.t option)
+  elt_sort:(Jkind.Sort.Const.t option)
   -> Env.t -> Location.t -> Types.type_expr -> Lambda.array_kind
 val array_kind :
-  Typedtree.expression -> Jkind.Sort.t -> Lambda.array_kind
+  Typedtree.expression -> Jkind.Sort.Const.t -> Lambda.array_kind
 val array_pattern_kind :
-  Typedtree.pattern -> Jkind.Sort.t -> Lambda.array_kind
+  Typedtree.pattern -> Jkind.Sort.Const.t -> Lambda.array_kind
 
 (* If [kind] or [layout] is unknown, attempt to specialize it by examining the
    type parameters of the bigarray. If [kind] or [length] is not unknown, returns
@@ -50,30 +50,30 @@ val bigarray_specialize_kind_and_layout :
    take that check out.
 *)
 val layout :
-  Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout
+  Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout
 
 (* These translate a type system sort to a lambda layout.  The function [layout]
    gives a more precise result---this should only be used when the kind is
    needed for compilation but the precise Lambda.layout isn't needed for
    optimization.  [layout_of_sort] gracefully errors on void, while
-   [layout_of_base] loudly fails on void. *)
-val layout_of_sort : Location.t -> Jkind.sort -> Lambda.layout
-val layout_of_const_sort : Jkind.Sort.Const.t -> Lambda.layout
+   [layout_of_non_void_sort] loudly fails on void. *)
+val layout_of_sort : Location.t -> Jkind.Sort.Const.t -> Lambda.layout
+val layout_of_non_void_sort : Jkind.Sort.Const.t -> Lambda.layout
 
 (* Given a function type and the sort of its return type, compute the layout of
    its return type. *)
 val function_return_layout :
-  Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout
+  Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout
 
 (* Given a function type with two arguments and the sort of its return type,
    compute the layout of its return type. *)
 val function2_return_layout :
-  Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout
+  Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout
 
 (* Given a function type and the sort of its argument, compute the layout
    of its argument.  Fails loudly if the type isn't a function type. *)
 val function_arg_layout :
-  Env.t -> Location.t -> Jkind.sort -> Types.type_expr -> Lambda.layout
+  Env.t -> Location.t -> Jkind.Sort.Const.t -> Types.type_expr -> Lambda.layout
 
 val value_kind : Env.t -> Location.t -> Types.type_expr -> Lambda.value_kind
 
diff --git a/typing/types.ml b/typing/types.ml
index ad08297c013..dde54bbfecc 100644
--- a/typing/types.ml
+++ b/typing/types.ml
@@ -313,7 +313,7 @@ and mixed_product_shape =
 and record_representation =
   | Record_unboxed
   | Record_inlined of tag * constructor_representation * variant_representation
-  | Record_boxed of (allowed * disallowed) jkind array
+  | Record_boxed of Jkind_types.Sort.Const.t array
   | Record_float
   | Record_ufloat
   | Record_mixed of mixed_product_shape
@@ -324,7 +324,7 @@ and record_unboxed_product_representation =
 and variant_representation =
   | Variant_unboxed
   | Variant_boxed of (constructor_representation *
-                      (allowed * disallowed) jkind array) array
+                      Jkind_types.Sort.Const.t array) array
   | Variant_extensible
 
 and constructor_representation =
@@ -337,7 +337,7 @@ and label_declaration =
     ld_mutable: mutability;
     ld_modalities: Mode.Modality.Value.Const.t;
     ld_type: type_expr;
-    ld_jkind : jkind_l;
+    ld_sort: Jkind_types.Sort.Const.t;
     ld_loc: Location.t;
     ld_attributes: Parsetree.attributes;
     ld_uid: Uid.t;
@@ -357,7 +357,7 @@ and constructor_argument =
   {
     ca_modalities: Mode.Modality.Value.Const.t;
     ca_type: type_expr;
-    ca_jkind: jkind_l;
+    ca_sort: Jkind_types.Sort.Const.t;
     ca_loc: Location.t;
   }
 
@@ -637,12 +637,13 @@ let equal_constructor_representation r1 r2 = r1 == r2 || match r1, r2 with
 let equal_variant_representation r1 r2 = r1 == r2 || match r1, r2 with
   | Variant_unboxed, Variant_unboxed ->
       true
-  | Variant_boxed cstrs_and_jkinds1, Variant_boxed cstrs_and_jkinds2 ->
-      Misc.Stdlib.Array.equal (fun (cstr1, jkinds1) (cstr2, jkinds2) ->
+  | Variant_boxed cstrs_and_sorts1, Variant_boxed cstrs_and_sorts2 ->
+      Misc.Stdlib.Array.equal (fun (cstr1, sorts1) (cstr2, sorts2) ->
           equal_constructor_representation cstr1 cstr2
-          && Misc.Stdlib.Array.equal !jkind_equal jkinds1 jkinds2)
-        cstrs_and_jkinds1
-        cstrs_and_jkinds2
+          && Misc.Stdlib.Array.equal Jkind_types.Sort.Const.equal
+               sorts1 sorts2)
+        cstrs_and_sorts1
+        cstrs_and_sorts2
   | Variant_extensible, Variant_extensible ->
       true
   | (Variant_unboxed | Variant_boxed _ | Variant_extensible), _ ->
@@ -657,8 +658,8 @@ let equal_record_representation r1 r2 = match r1, r2 with
       ignore (cr1 : constructor_representation);
       ignore (cr2 : constructor_representation);
       equal_tag tag1 tag2 && equal_variant_representation vr1 vr2
-  | Record_boxed lays1, Record_boxed lays2 ->
-      Misc.Stdlib.Array.equal !jkind_equal lays1 lays2
+  | Record_boxed sorts1, Record_boxed sorts2 ->
+      Misc.Stdlib.Array.equal Jkind_types.Sort.Const.equal sorts1 sorts2
   | Record_float, Record_float ->
       true
   | Record_ufloat, Record_ufloat ->
@@ -686,7 +687,7 @@ type 'a gen_label_description =
     lbl_arg: type_expr;                 (* Type of the argument *)
     lbl_mut: mutability;                (* Is this a mutable field? *)
     lbl_modalities: Mode.Modality.Value.Const.t;(* Modalities on the field *)
-    lbl_jkind : jkind_l;                (* Jkind of the argument *)
+    lbl_sort: Jkind_types.Sort.Const.t; (* Sort of the argument *)
     lbl_pos: int;                       (* Position in block *)
     lbl_num: int;                       (* Position in type *)
     lbl_all: 'a gen_label_description array;   (* All the labels in this type *)
diff --git a/typing/types.mli b/typing/types.mli
index ef573d3293e..60c342a757a 100644
--- a/typing/types.mli
+++ b/typing/types.mli
@@ -605,7 +605,7 @@ and record_representation =
   (* For an inlined record, we record the representation of the variant that
      contains it and the tag/representation of the relevant constructor of that
      variant. *)
-  | Record_boxed of jkind_l array
+  | Record_boxed of Jkind_types.Sort.Const.t array
   | Record_float (* All fields are floats *)
   | Record_ufloat
   (* All fields are [float#]s.  Same runtime representation as [Record_float],
@@ -625,7 +625,7 @@ and record_unboxed_product_representation =
 and variant_representation =
   | Variant_unboxed
   | Variant_boxed of (constructor_representation *
-                      jkind_l array) array
+                      Jkind_types.Sort.Const.t array) array
   (* The outer array has an element for each constructor. Each inner array
      has a jkind for each argument of the corresponding constructor.
 
@@ -652,7 +652,7 @@ and label_declaration =
     ld_mutable: mutability;
     ld_modalities: Mode.Modality.Value.Const.t;
     ld_type: type_expr;
-    ld_jkind : jkind_l;
+    ld_sort: Jkind_types.Sort.Const.t;
     ld_loc: Location.t;
     ld_attributes: Parsetree.attributes;
     ld_uid: Uid.t;
@@ -672,7 +672,7 @@ and constructor_argument =
   {
     ca_modalities: Mode.Modality.Value.Const.t;
     ca_type: type_expr;
-    ca_jkind: jkind_l;
+    ca_sort: Jkind_types.Sort.Const.t;
     ca_loc: Location.t;
   }
 
@@ -898,7 +898,7 @@ type 'a gen_label_description =
     lbl_mut: mutability;                (* Is this a mutable field? *)
     lbl_modalities: Mode.Modality.Value.Const.t;
                                         (* Modalities on the field *)
-    lbl_jkind : jkind_l;                (* Jkind of the argument *)
+    lbl_sort: Jkind_types.Sort.Const.t; (* Sort of the argument *)
     lbl_pos: int;                       (* Position in block *)
     lbl_num: int;                       (* Position in the type *)
     lbl_all: 'a gen_label_description array;   (* All the labels in this type *)
@@ -908,9 +908,6 @@ type 'a gen_label_description =
     lbl_attributes: Parsetree.attributes;
     lbl_uid: Uid.t;
   }
-(* CR layouts v5: once we allow [any] in record fields, change [lbl_jkind] to
-   be a [sort option].  This will allow a fast path for representability checks
-   at record construction, and currently only the sort is used anyway. *)
 
 type label_description = record_representation gen_label_description
 
diff --git a/typing/value_rec_check.ml b/typing/value_rec_check.ml
index 4d9c86904e6..6b76def1e0a 100644
--- a/typing/value_rec_check.ml
+++ b/typing/value_rec_check.ml
@@ -718,11 +718,13 @@ let rec expression : Typedtree.expression -> term_judg =
     | Texp_unboxed_tuple exprs ->
       list expression (List.map (fun (_, e, _) -> e) exprs) << Return
     | Texp_array (_, elt_sort, exprs, _) ->
+      let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in
       list expression exprs << array_mode exp elt_sort
     | Texp_list_comprehension { comp_body; comp_clauses } ->
       join ((expression comp_body << Guard) ::
             comprehension_clauses comp_clauses)
     | Texp_array_comprehension (_, elt_sort, { comp_body; comp_clauses }) ->
+      let elt_sort = Jkind.Sort.default_for_transl_and_get elt_sort in
       join ((expression comp_body << array_mode exp elt_sort) ::
             comprehension_clauses comp_clauses)
     | Texp_construct (_, desc, exprs, _) ->