Shared_ast
module Runtime = Runtime_ocaml.Runtime
module ScopeName : sig ... end
module TopdefName : sig ... end
module StructName : sig ... end
module StructField : sig ... end
module EnumName : sig ... end
module EnumConstructor : sig ... end
Only used by surface
module RuleName : sig ... end
module LabelName : sig ... end
Used for unresolved structs/maps in desugared
module IdentName = Catala_utils.String
Only used by desugared/scopelang
module ScopeVar : sig ... end
module SubScopeName : sig ... end
module StateName : sig ... end
Define a common base type for the expressions in most passes of the compiler
we instantiate them with a polymorphic variant to take advantage of sub-typing. The values aren't actually used.
These types allow to select the features present in any given expression type
'a any
is 'a, but adds the constraint that it should be restricted to valid AST kinds
type ('a, 'b) dcalc_lcalc =
< monomorphic : yes
; polymorphic : yes
; overloaded : no
; resolved : yes
; syntacticNames : no
; resolvedNames : yes
; scopeVarStates : no
; scopeVarSimpl : no
; explicitScopes : no
; assertions : yes
; defaultTerms : 'a
; exceptions : 'b >
This type regroups Dcalc and Lcalc ASTs.
type typ = naked_typ Catala_utils.Mark.pos
and naked_typ =
| TLit of typ_lit |
| TTuple of typ list |
| TStruct of StructName.t |
| TEnum of EnumName.t |
| TOption of typ |
| TArrow of typ list * typ |
| TArray of typ |
| TAny |
type date = Runtime.date
type date_rounding = Runtime.date_rounding
type duration = Runtime.duration
type log_entry =
| VarDef of naked_typ | (* During code generation, we need to know the type of the variable being logged for embedding *) |
| BeginCall | |
| EndCall | |
| PosRecordIfTrueBool |
module Op : sig ... end
Classification of operators on how they should be typed
type 'a operator = 'a Op.t
type _ mark =
| Untyped : untyped -> untyped mark |
| Typed : typed -> typed mark |
| Custom : 'a custom -> 'a custom mark |
The generic type of AST markings. Using a GADT allows functions to be polymorphic in the marking, but still do transformations on types when appropriate. The Custom
case can be used within passes that need to store specific information, e.g. typing
type ('a, 'm) marked = ( 'a, 'm mark ) Catala_utils.Mark.ed
Type of values marked with the above standard mark GADT
Define a common base type for the expressions in most passes of the compiler
type lit =
| LBool of bool |
| LInt of Runtime.integer |
| LRat of Runtime.decimal |
| LMoney of Runtime.money |
| LUnit |
| LDate of date |
| LDuration of duration |
Literals are the same throughout compilation except for the LEmptyError
case which is eliminated midway through.
type 'a glocation =
| DesugaredScopeVar : ScopeVar.t Catala_utils.Mark.pos
* StateName.t option -> < scopeVarStates : yes.. > glocation |
| ScopelangScopeVar : ScopeVar.t Catala_utils.Mark.pos -> < scopeVarSimpl : yes.. >
glocation |
| SubScopeVar : ScopeName.t
* SubScopeName.t Catala_utils.Mark.pos
* ScopeVar.t Catala_utils.Mark.pos -> < explicitScopes : yes.. > glocation |
| ToplevelVar : TopdefName.t Catala_utils.Mark.pos -> < explicitScopes : yes.. >
glocation |
Locations are handled differently in desugared
and scopelang
type ('a, 'm) gexpr = ( ( 'a, 'm ) naked_gexpr, 'm ) marked
and ('a, 'm) naked_gexpr = ( 'a, 'a, 'm ) base_gexpr
General expressions: groups all expression cases of the different ASTs, and uses a GADT to eliminate irrelevant cases for each one. The 't
annotations are also totally unconstrained at this point. The dcalc exprs, for ex ample, are then defined with type naked_expr = dcalc naked_gexpr
plus the annotations.
A few tips on using this GADT:
fun (type a) (x: a naked_gexpr) -> ...
let rec f: type a . a naked_gexpr -> ...
Expr
rather than completely defining your recursion manually.The first argument of the base_gexpr type caracterises the "deep" type of the AST, while the second is the shallow type. They are always equal for well-formed AST types, but differentiating them ephemerally allows us to do well-typed recursive transformations on the AST that change its type
and ('a, 'b, 'm) base_gexpr =
| ELit : lit -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| EApp : {
} -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| EOp : {
} -> ( 'a, < .. > as 'b, 'm ) base_gexpr | ||||
| EArray : ( 'a, 'm ) gexpr list -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| EVar : ( 'a, 'm ) naked_gexpr Bindlib.var -> ( 'a, _, 'm ) base_gexpr | ||||
| EAbs : {
} -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| EIfThenElse : {
} -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| EStruct : {
} -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| EInj : {
} -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| EMatch : {
} -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| ETuple : ( 'a, 'm ) gexpr list -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| ETupleAccess : {
} -> ( 'a, < .. >, 'm ) base_gexpr | ||||
| ELocation : 'b glocation -> ( 'a, < .. > as 'b, 'm ) base_gexpr | ||||
| EScopeCall : {
} -> ( 'a, < explicitScopes : yes.. >, 'm ) base_gexpr | ||||
| EDStructAccess : {
} -> ( 'a, < syntacticNames : yes.. >, 'm ) base_gexpr | (*
| |||
| EStructAccess : {
} -> ( 'a, < resolvedNames : yes.. >, 'm ) base_gexpr | (* Resolved struct/enums, after | |||
| EAssert : ( 'a, 'm ) gexpr -> ( 'a, < assertions : yes.. >, 'm ) base_gexpr | ||||
| EDefault : {
} -> ( 'a, < defaultTerms : yes.. >, 'm ) base_gexpr | ||||
| EEmptyError : ( 'a, < defaultTerms : yes.. >, 'm ) base_gexpr | ||||
| EErrorOnEmpty : ( 'a, 'm ) gexpr -> ( 'a, < defaultTerms : yes.. >, 'm )
base_gexpr | ||||
| ERaise : except -> ( 'a, < exceptions : yes.. >, 'm ) base_gexpr | ||||
| ECatch : {
} -> ( 'a, < exceptions : yes.. >, 'm ) base_gexpr |
type ('a, 'm) boxed_gexpr = ( ( 'a, 'm ) naked_gexpr Bindlib.box, 'm ) marked
The annotation is lifted outside of the box for expressions
type 'e boxed = ( 'a, 'm ) boxed_gexpr constraint 'e = ( 'a, 'm ) gexpr
('a, 'm) gexpr boxed
is ('a, 'm) boxed_gexpr
. The difference with ('a, 'm) gexpr Bindlib.box
is that the annotations is outside of the box, and can therefore be accessed without the need to resolve the box
type ('e, 'b) binder = ( ( 'a, 'm ) naked_gexpr, 'b ) Bindlib.binder constraint 'e = ( 'a, 'm ) gexpr
The expressions use the Bindlib library, based on higher-order abstract syntax
type ('e, 'b) mbinder = ( ( 'a, 'm ) naked_gexpr, 'b ) Bindlib.mbinder constraint 'e = ( 'a, 'm ) gexpr
Constructs scopes and programs on top of expressions. The 'e
type parameter throughout is expected to match instances of the gexpr
type defined above. Markings are constrained to the mark
GADT defined above. Note that this structure is at the moment only relevant for dcalc
and lcalc
, as scopelang
has its own scope structure, as the name implies.
type scope_let_kind =
This kind annotation signals that the let-binding respects a structural invariant. These invariants concern the shape of the expression in the let-binding, and are documented below.
type 'e scope_let = {
scope_let_kind : scope_let_kind; |
scope_let_typ : typ; |
scope_let_expr : 'e; |
scope_let_next : ( 'e, 'e scope_body_expr ) binder; |
scope_let_pos : Catala_utils.Pos.t; |
} constraint 'e = ( 'a any, _ ) gexpr
This type is parametrized by the expression type so it can be reused in later intermediate representations.
and 'e scope_body_expr =
| Result of 'e |
| ScopeLet of 'e scope_let |
constraint 'e = ( 'a any, _ ) gexpr
A scope let-binding has all the information necessary to make a proper let-binding expression, plus an annotation for the kind of the let-binding that comes from the compilation of a Scopelang.Ast
statement.
type 'e scope_body = {
scope_body_input_struct : StructName.t; |
scope_body_output_struct : StructName.t; |
scope_body_expr : ( 'e, 'e scope_body_expr ) binder; |
} constraint 'e = ( 'a any, _ ) gexpr
Instead of being a single expression, we give a little more ad-hoc structure to the scope body by decomposing it in an ordered list of let-bindings, and a result expression that uses the let-binded variables. The first binder is the argument of type scope_body_input_struct
.
type struct_ctx = typ StructField.Map.t StructName.Map.t
type enum_ctx = typ EnumConstructor.Map.t EnumName.Map.t
type scope_out_struct = {
out_struct_name : StructName.t; |
out_struct_fields : StructField.t ScopeVar.Map.t; |
}
type decl_ctx = {
ctx_enums : enum_ctx; | |
ctx_structs : struct_ctx; | |
ctx_struct_fields : StructField.t StructName.Map.t IdentName.Map.t; | (* needed for disambiguation (desugared -> scope) *) |
ctx_scopes : scope_out_struct ScopeName.Map.t; |
}
module Var : sig ... end
module Type : sig ... end
module Operator : sig ... end
module Expr : sig ... end
Functions handling the expressions of shared_ast
module Scope : sig ... end
Functions handling the code item structures of shared_ast
, in particular the scopes
module Program : sig ... end
module Print : sig ... end
Printing functions for the default calculus AST
module Typing : sig ... end
Typing for the default calculus. Because of the error terms, we perform type inference using the classical W algorithm with union-find unification.
module Interpreter : sig ... end
Reference interpreter for the default calculus
module Optimizations : sig ... end
Optimization passes for default calculus and lambda calculus programs and expressions