Skip to content

Commit

Permalink
doc(engine): ppx_functor_application
Browse files Browse the repository at this point in the history
Fixes #742
  • Loading branch information
Lucas Franceschino committed Jul 1, 2024
1 parent 53aa5d8 commit 420857f
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 58 deletions.
125 changes: 71 additions & 54 deletions engine/utils/ppx_functor_application/README.md
Original file line number Diff line number Diff line change
@@ -1,65 +1,82 @@
# `ppx_inline`
# `ppx_functor_application`

Inlines chunks of OCaml AST in place.
## Motivation
The engine consists of numerous phases, implemented as OCaml functors
parametrized over "AST features" (see the book). Two phases can be
binded (sequenced) via `Phase_utils.BindPhase` functor.

Rewrite `[%%inline_defs L]`, `[%%inline_arms L]`, `[%%inline_body PATH]` inside nodes `[%%inlined_contents NODE]`, where:
- `L` is a (`+`/`-`-separated) list of path specifying which chunk of AST we should inline;
- `PATH` is a `.`-separated list of strings, possibly containing the `*` glob.
Since OCaml define (or let users define) infix notations for functor
application, combining many phases (functors) results in the following
christmas tree looking kinds of code:

## Example:
File `some_module.ml`:
```ocaml
let f x = x + 1
let g x = x + 2
let f' x = x + 3
module M = struct
let w = 0
let x = 1
let y = 2
let z = 3
struct
module ARG0 = (Phases.Reject.RawOrMutPointer)(Features.Rust)
module ARG1 = (Phases.Transform_hax_lib_inline)(ARG0.FB)
module ARG2 = (Phases.Specialize)(ARG1.FB)
module ARG3 = (Phases.Drop_sized_trait)(ARG2.FB)
module ARG4 = (Phases.Simplify_question_marks)(ARG3.FB)
module ARG5 = (Phases.And_mut_defsite)(ARG4.FB)
module ARG6 = (Phases.Reconstruct_for_loops)(ARG5.FB)
module ARG7 = (Phases.Reconstruct_while_loops)(ARG6.FB)
module ARG8 = (Phases.Direct_and_mut)(ARG7.FB)
module ARG9 = (Phases.Reject.Arbitrary_lhs)(ARG8.FB)
module ARG10 = (Phases.Drop_blocks)(ARG9.FB)
module ARG11 = (Phases.Drop_references)(ARG10.FB)
module ARG12 = (Phases.Trivialize_assign_lhs)(ARG11.FB)
module ARG13 = (Side_effect_utils.Hoist)(ARG12.FB)
module ARG14 = (Phases.Simplify_match_return)(ARG13.FB)
module ARG15 = (Phases.Drop_needless_returns)(ARG14.FB)
module ARG16 = (Phases.Local_mutation)(ARG15.FB)
module ARG17 = (Phases.Reject.Continue)(ARG16.FB)
module ARG18 = (Phases.Cf_into_monads)(ARG17.FB)
module ARG19 = (Phases.Reject.EarlyExit)(ARG18.FB)
module ARG20 = (Phases.Functionalize_loops)(ARG19.FB)
module ARG21 = (Phases.Reject.As_pattern)(ARG20.FB)
module ARG22 = (Phases.Traits_specs)(ARG21.FB)
module ARG23 = (Phases.Simplify_hoisting)(ARG22.FB)
module ARG24 = (Phases.Newtype_as_refinement)(ARG23.FB)
module ARG25 = (SubtypeToInputLanguage)(ARG24.FB)
module ARG26 = (Identity)(ARG25.FB)
include
((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(ARG0))(ARG1)))(ARG2)))(ARG3)))(ARG4)))(ARG5)))(ARG6)))(ARG7)))(ARG8)))(ARG9)))(ARG10)))(ARG11)))(ARG12)))(ARG13)))(ARG14)))(ARG15)))(ARG16)))(ARG17)))(ARG18)))(ARG19)))(ARG20)))(ARG21)))(ARG22)))(ARG23)))(ARG24)))(ARG25)))(ARG26)
end
let h x = ""
type foo = | A | B
let i (x: foo) =
match x with
| A -> 0
| B -> 1
```

The module:
```ocaml
module%inlined_contents [@@add "some_module.ml"] Test = struct
[%%inline_defs f + g + foo]
[%%inline_defs "M.*" - z - y]
let h: int -> string = [%%inline_body h]
let i: foo -> int =
match i with
| [%%inline_arms "i.*" - B] -> dummy
| B -> 123
end
```
The system of phases is supposed to let backends opt-in or out easily
for phases. This syntactic limitation was a major issue for that.

## Solution
This PPX defines a small DSL that embeds in the OCaml syntax of
expressions to provide a nice way of binding phases functors via a
`|>` infix operator.

Will be rewritten into:
Example:
```ocaml
module%inlined_contents [@@add "some_module.ml"] Test = struct
(* [%%inline_defs f + g + foo] *)
let f x = x + 1
let g x = x + 2
type foo = | A | B
(* [%%inline_defs "M.*" - z - y] *)
let w = 0
let x = 1
let h: int -> string = (fun x -> "")
let i: foo -> int =
match i with
| A -> 0
| B -> 123
end
module TransformToInputLanguage =
[%functor_application
Phases.Reject.RawOrMutPointer(Features.Rust)
|> Phases.Transform_hax_lib_inline
|> Phases.Specialize
|> Phases.Drop_sized_trait
|> Phases.Simplify_question_marks
|> Phases.And_mut_defsite
|> Phases.Reconstruct_for_loops
|> Phases.Reconstruct_while_loops
|> SubtypeToInputLanguage
|> Identity
]
[@ocamlformat "disable"]
```

Note: the `[@ocamlformat "disable"]` annotation is important,
otherwise `ocamlformat` tries to format those PPX invokations with its
rules for expressions, yielding rather ugly looking code...

### Syntax
- `Name`: a module `Name`
- `Name(X, Y, Z)`: the application of the functor `Name` with three arguments `X`, `Y` and `Z`
- `(module <M>)`: the arbitary OCaml module expression `<M>`
- `<a> <b>`: the application of the module described by `<a>` and the module described by `<b>`
- `(fun X -> <a>)`: a "functor" from `X` to `<a>`
- `<a> |> <b>`: `<a>` binded with `<b>`
23 changes: 19 additions & 4 deletions engine/utils/ppx_functor_application/ppx_functor_application.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ let show_module_expr = string_of_module_expr
let pp_module_expr (fmt : Format.formatter) (s : module_expr) : unit =
Format.pp_print_string fmt @@ string_of_module_expr s

(** Defines a DSL for functor application. *)
type module_dsl =
| Var of longident
| App of module_dsl * module_dsl
Expand All @@ -41,6 +42,7 @@ type module_dsl =

let var_of_string s = Var (Longident.Lident s)

(** Elaborate a OCaml module expression from a `module_dsl` *)
let rec elab ~loc (t : module_dsl) : module_expr =
let (module E) = Ast_builder.make loc in
let h = elab ~loc in
Expand Down Expand Up @@ -92,6 +94,7 @@ let rec collect_pipes (t : module_dsl) : module_dsl list =
| Meta (Pipe l, _) | Pipe l -> List.concat_map ~f:collect_pipes l
| _ -> [ t ]

(** Get rid of extra `Pipe` nodes *)
let rec normalize (t : module_dsl) : module_dsl =
match t with
| App (f, x) -> App (normalize f, normalize x)
Expand All @@ -104,16 +107,28 @@ let rec normalize (t : module_dsl) : module_dsl =
| [ t ] -> t
| l -> Pipe l)

(** Recognize a small language embedded in OCaml syntax for applying
functors in chain. *)
let rec parse expr =
let r =
match expr with
| { pexp_desc = Pexp_construct ({ txt; _ }, None); _ } -> Var txt
| { pexp_desc = Pexp_construct ({ txt; _ }, None); _ } ->
(* Parses variables (module names are uppercase, since we are looking at OCaml expressions, so we match on constructors) *)
Var txt
| { pexp_desc = Pexp_construct ({ txt; _ }, Some arg); _ } ->
(* Parses module applcations (same as above: in expressions, module applications are parsed as constructor applications) *)
App (Var txt, parse arg)
| [%expr [%e? m1] |> [%e? m2]] -> Pipe [ parse m1; parse m2 ]
| [%expr (module [%m? m])] -> ModExpr m
| [%expr [%e? f] [%e? x]] -> App (parse f, parse x)
| [%expr [%e? m1] |> [%e? m2]] ->
(* Parses `... |> ...` infix module application *)
Pipe [ parse m1; parse m2 ]
| [%expr (module [%m? m])] ->
(* Parses module expressions (in this case, that corresponds to OCaml module expression) *)
ModExpr m
| [%expr [%e? f] [%e? x]] ->
(* Parses module applications (e.g. `(fun x -> ...) (module YXZ)`) *)
App (parse f, parse x)
| [%expr fun [%p? x] -> [%e? body]] -> (
(* Parses module abstractions (e.g. `fun X -> Z(X)`) *)
match x with
| { ppat_desc = Ppat_construct ({ txt = Lident x; _ }, None); _ } ->
Abs (x, parse body)
Expand Down

0 comments on commit 420857f

Please sign in to comment.