From 1e7423fffec62c85f56022a8f3ccbbec41b5d7c8 Mon Sep 17 00:00:00 2001 From: rhiannon morris Date: Sat, 17 Sep 2022 20:57:25 +0200 Subject: [PATCH] add ats post --- posts/2022-09-16-ats.md | 436 ++++++++++++++++++++++++++++++++++++++++ style/page.css | 1 + syntax/ats.xml | 210 +++++++++++++++++++ 3 files changed, 647 insertions(+) create mode 100644 posts/2022-09-16-ats.md create mode 100644 syntax/ats.xml diff --git a/posts/2022-09-16-ats.md b/posts/2022-09-16-ats.md new file mode 100644 index 0000000..9de512b --- /dev/null +++ b/posts/2022-09-16-ats.md @@ -0,0 +1,436 @@ +--- +title: a little ats program +date: 2022-09-16 +tags: [computer, ATS, cool languages] +show-toc: true +... + +[ats] is a language that is a little infamous among type system likers, for its +complexity, its weird syntax, and its thin documentation. well i won't be beat, +so here's a little implementation of simply-typed lambda calculus. right now it +doesn't actually use many ats-specific features, since i just didn't end up +needing them. + +[ats]: http://www.ats-lang.org + +it does leak memory without gc though, so maybe i should have. +oh well one thing at a time + +# installation, et cetera + +instead of trying to install ats myself, i just used [nix], since i already +have it installed as the least annoying package manager for idris. i just run +`nix shell nixpkgs#ats2` and i'm in a shell with `patscc` and `patsopt` +available. if you want to try building it yourself, that's, uh, your choice. + +[nix]: https://nixos.org + +to compile the program, i wrote a makefile. just a small one! + +```makefile +ATSCC := patscc +ATSFLAGS := \ + -DATS_MEMALLOC_GCBDW -lgc \ + -atsccomp \ + 'gcc -I$${PATSHOME} -I$${PATSHOME}/ccomp/runtime \ + -L$${PATSHOME}/ccomp/atslib/lib -std=gnu99' + +%: %.dats + $(ATSCC) $< $(ATSFLAGS) -o $@ +``` + +(in some installs the compiler is just called `atscc` for some reason.) + +the first line of `ATSFLAGS` tells it which allocator to use. in this case, +`libgc`. the rest tells it to use `-std=gnu99`, so that it can find `alloca()`; +the rest is the same as the default. + +one last thing. the first two lines of the program have to be this. i didn't +look into it too hard but they seem to define... something... and if you forget +to include them the c output by ats isn't even syntactically correct. ouch. + +```ats +#include "share/atspre_define.hats" +#include "share/atspre_staload.hats" +``` + +# types + +let's start by defining the syntax for types. it's going to be a single base +type, and functions. + +$$ +\newcommand\Typ{\mathit{typ}} +\newcommand\OR{\mathrel|} +\Typ ::= \bullet \OR \Typ_1 \to \Typ_2 +$$ + +so obviously, my first attempt was to just write this: + +```ats +datatype typ = Base | Arr of (typ, typ) +``` + +i'm going to need to check two types for equality, so let's do that next. + +```ats +fun eq_typ (a : typ, b : typ) .. :<> bool = +case (a, b) of +| (Base(), Base()) => true +| (Arr(a1, a2), Arr(b1, b2)) => eq_typ(a1, b1) && eq_typ(a2, b2) +| (_, _) => false +``` + +alas: (it is talking about the `..`) + +``` +/home/niss/ats/lam.dats: 149(line=7, offs=33) -- 150(line=7, offs=34): + error(2): the static identifier [a] is unrecognized. +``` + +surprise! as far as i can tell ats only allows termination checking to be +based on _static_ (compile time) information. so i need to index the +datatype with, uh, something. when we define functions on syntax like this, +they basically always recurse on a subterm. so the height of the syntax tree +will do. what i _actually_ have are these more messy definitions. + +```ats +datatype typ(int) = +| Base(1) +| {h1, h2 : nat} Arr(1 + max(h1, h2)) of (typ(h1), typ(h2)) + +typedef typ = [h : nat] typ(h) +``` + +the base type $\bullet$ has height 1, and an arrow type has a height one more +than the max height of its subterms. `nat` is a version of `int` constrained to +be at least zero, rather than a separate type, so that's why it says `int` one +place and `nat` the other. the typechecker has a linear arithmetic solver for +reasoning about inequalities and expressions like `1 + max(h1, h2)`. + +i don't want to be talking about "types of height $h$" unless i really have to, +though, so there's a definition for just _a_ type too. the syntax `[x : t1] t2` +is an existential type that is automatically wrapped and unwrapped, so in this +case it is a `typ(h)` for some, unknown, `h`. also, names can be overloaded by +their arity so `typ(h)` and `typ` can both be defined in the same scope. + +so now `eq_typ` has to have a more intricate type to talk about height. the +definition is still the same though. + +```ats +fun eq_typ {ha : nat} .. (a : typ(ha), b : typ) :<> bool = +case (a, b) of +| (Base(), Base()) => true +| (Arr(a1, a2), Arr(b1, b2)) => eq_typ(a1, b1) && eq_typ(a2, b2) +| (_, _) => false + +overload = with eq_typ +``` + +since i only actually need to care about one of the arguments' height for the +termination proof, the other can still just be a `typ`. the last line lets me +write `a = b` instead of `eq_typ(a, b)`, which is nice. + +next, i want to be able to print types. ideally this would be a function that +returns a string, but string stuff involves pointer weirdness i haven't worked +out yet. so it just prints. the only two precedence levels in these types are +"left of an arrow" and "not left of an arrow", so that can just be a `bool`. +unfortunately `print` has no information about its termination behaviour so +there's no point bothering with that stuff here either. :confused: + +```ats +fun print_typ (parens : bool, t : typ) : void = +case t of +| Base() => print "•" +| Arr(a, b) => begin + if parens then print("("); + print_typ(true, a); + print(" → "); + print_typ(false, b); + if parens then print(")"); + end + +fn print_typ (t : typ) : void = print_typ(false, t) +overload print with print_typ +``` + +# terms + +types are done, now let's move on to terms. we just want names, lambdas, and +application. + +$$ +\newcommand\Term{\mathit{term}} +\newcommand\Var{\mathit{var}} +\Term ::= \Var \OR \lambda \Var : \Typ. \Term \OR \Term_1 \; \Term_2 +$$ + +for the same reason as before, terms are indexed by their height, but _also_ by +the number of variables in scope, since i want de bruijn indices to be +intrinsically well-scoped. why even have a type system otherwise. so a +`term(n, h)` has $n$ variables and height $h$. + +```ats +datatype term(int, int) = +| {n, i : nat | i < n} Var(n, 1) of int(i) +| {n, h : nat} Lam(n, 1 + h) of (typ, term(1 + n, h)) +| {n, h1, h2 : nat} App(n, 1 + max(h1, h2)) of (term(n, h1), term(n, h2)) + +typedef term(n : int) = [h : nat] term(n, h) +``` + +a variable is [a de bruijn index][db], which is a natural less than `n`. to +express this we need to bridge the static world, where `n` lives, and the +dynamic, where `Var`'s field lives. to do this i use a singleton type +`int(i)`, whose only value is `i` itself. with this type, you can constraint the +static `i`, and in turn the value of the field. + +[db]: https://en.wikipedia.org/wiki/De_Bruijn_index + + +# contexts + +```ats +typedef ctx(n: int) = list(typ, n) +``` + +a typing context is just a list of types of the appropriate length. since +there's no binding in types here, that's all we need. + +looking up into a context is just list lookup, with some constraints to make it +work. + +```ats +fun lookup {n, i : nat | i < n} .. (g : ctx(n), i : nat(i)) :<> typ = +let val+ list_cons(x, g) = g in + if i = 0 then x else lookup(g, i - 1) +end +``` + +from the constraints $n \ge 0, i \ge 0, i < n$, the compiler can see that, as +$n \ge 1$, the list is nonempty and `list_cons` _is_ the only case. it can also +propagate the $i \ne 0$ condition from the `else` to see the recursive call is +ok too, and this function is total. + +:::aside +while writing this post i discovered that this is actually a restatement of +the type of `xs[i]` in the standard library and i could have just used that. +oops! +::: + + +# typechecker + +i'm going to write the type checker using exceptions for error handling, because +using an error monad without `do` notation was just too annoying. but from the +outside it is still going to use a `result` type. + +```ats +datatype result(t@ype, t@ype) = +| {a, e : t@ype} Ok(a, e) of a +| {a, e : t@ype} Err(a, e) of e +``` + +the funky looking `t@ype` refers to a type of unknown size, as opposed to `type` +which can only be instantiated with pointer-sized types. that will not come up +again here but "`t@ype`" is definitely the funniest part of ats's syntax. + +the errors we need here are: + +- a function takes an argument of type $A$, but is given one of type $B$; +- a term of a non-arrow type is being applied. + +```ats +datatype err = +| Clash of (typ, typ) +| NotArr of typ + +fn print_err (e : err) : void = … // incredibly uninteresting +overload print with print_err + + +typedef typ_res = result(typ, err) + +fn print_res (r : typ_res) : void = … +overload print with print_res +``` + +so onto the typechecker. like i said, it's going to use a local exception for +errors, but then catch it on the outside so that it has a pure type. + +```ats +fn typ_of {n : nat} (g : ctx n, t : term(n)) :<> typ_res = +let + exception ERR of err + fun go {n, h : nat} .. (g : ctx(n), t : term(n, h)) : typ = + case t of + | Var(i) => lookup(g, i) + | Lam(a, t) => Arr(a, go(list_cons(a, g), t)) + | App(s, t) => begin + case go(g, s) of + | Arr(a1, b) => + let val a2 = go(g, t) in + if a1 = a2 then b else $raise ERR(Clash(a1, a2)) + end + | b => $raise ERR(NotArr(b)) + end +in + $effmask_exn(try Ok(go(g, t)) with ~ERR(e) => Err(e)) +end + +fn typ_of (t : term(0)) :<> typ_res = typ_of(list_nil, t) +``` + +most of this is fine. a bit noisy i guess. but there are a couple of new things: + +- in the type of `go`, the `:` part means it can throw exceptions, but has + no other effects. +- `$effmask_exn(e)` hides the `exn` effect in `e`, because ats doesn't + distinguish different exceptions in effects so it can't tell that `ERR` is the + only one that is ever raised. +- exceptions are linear, and the pattern `~ERR(e)` is consuming the outermost + constructor. +- exceptions are the only reason we needed `alloca` in this program + :woozy_face: + + +# the end + +ok, a quick `main` function, and that's it! + +```ats +implement main0() = begin + print("type of [λx:•. x] is: "); + print(typ_of(Lam(Base, Var(0)))); + print("\ntype of [λx:•. x x] is: "); + print(typ_of(Lam(Base, App(Var(0), Var(0))))); + print("\n") +end +``` + +and it works, at least on these two examples. + +``` +type of [λx:•. x] is: • → • +type of [λx:•. x x] is: expected an arrow type, but got [•] +``` + +uh. thank's for reading. maybe i'll write another one if i think of another +nice little program that actually uses more of ats's weirdness. + +-------------------------------------------------------------------------------- + +
+full code +```ats +#include "share/atspre_define.hats" +#include "share/atspre_staload.hats" + +datatype typ(int) = +| Base(1) +| {h1, h2 : nat} Arr(1 + max(h1, h2)) of (typ(h1), typ(h2)) + +typedef typ = [h : nat] typ(h) + +fun eq_typ {ha : nat} .. (a : typ(ha), b : typ) :<> bool = +case (a, b) of +| (Base(), Base()) => true +| (Arr(a1, a2), Arr(b1, b2)) => eq_typ(a1, b1) && eq_typ(a2, b2) +| (_, _) => false +overload = with eq_typ + +fun print_typ (parens : bool, t : typ) : void = +case t of +| Base() => print "•" +| Arr(a, b) => begin + if parens then print("("); + print_typ(true, a); + print(" → "); + print_typ(false, b); + if parens then print(")"); + end + +fn print_typ (t : typ) : void = print_typ(false, t) +overload print with print_typ + + +datatype term(int, int) = +| {n, i : nat | i < n} Var(n, 1) of int(i) +| {n, h : nat} Lam(n, 1 + h) of (typ, term(1 + n, h)) +| {n, h1, h2 : nat} App(n, 1 + max(h1, h2)) of (term(n, h1), term(n, h2)) + +typedef term(n : int) = [h : nat] term(n, h) + + +typedef ctx(n: int) = list(typ, n) + +fun lookup {n, i : nat | i < n} .. (g : ctx(n), i : int(i)) :<> typ = +let val+ list_cons(x, g) = g in + if i = 0 then x else lookup(g, i - 1) +end + + +datatype result(t@ype, t@ype) = +| {a, e : t@ype} Ok(a, e) of a +| {a, e : t@ype} Err(a, e) of e + + +datatype err = +| Clash of (typ, typ) +| NotArr of typ + +fn print_err (e : err) : void = +case e of +| Clash(a, b) => begin + print("clash between types ["); + print(a); + print("] and ["); + print(b); + print("]"); + end +| NotArr(t) => begin + print("expected an arrow type, but got ["); + print(t); + print("]"); + end +overload print with print_err + + +typedef typ_res = result(typ, err) + +fn print_res (r : typ_res) : void = +case r of Ok(t) => print(t) | Err(e) => print(e) +overload print with print_res + + +fn typ_of {n : nat} (g : ctx n, t : term(n)) :<> typ_res = +let + exception ERR of err + fun go {n, h : nat} .. (g : ctx(n), t : term(n, h)) : typ = + case t of + | Var(i) => lookup(g, i) + | Lam(a, t) => Arr(a, go(list_cons(a, g), t)) + | App(s, t) => begin + case go(g, s) of + | Arr(a1, b) => + let val a2 = go(g, t) in + if a1 = a2 then b else $raise ERR(Clash(a1, a2)) + end + | b => $raise ERR(NotArr(b)) + end +in + $effmask_exn(try Ok(go(g, t)) with ~ERR(e) => Err(e)) +end + +fn typ_of (t : term(0)) :<> typ_res = typ_of(list_nil, t) + + +implement main0() = begin + print("type of [λx:•. x] is: "); + print(typ_of(Lam(Base, Var(0)))); + print("\ntype of [λx:•. x x] is: "); + print(typ_of(Lam(Base, App(Var(0), Var(0))))); + print("\n") +end +``` +
diff --git a/style/page.css b/style/page.css index b2f2398..55d4530 100644 --- a/style/page.css +++ b/style/page.css @@ -405,3 +405,4 @@ aside { :is(.st, .fl, .dv, .sc) { color: hsl(143deg, 100%, 20%); } .wa { color: hsl(350deg, 80%, 25%); text-decoration: wavy 1.5px underline; } .al { color: hsl(350deg, 80%, 25%); } +.cn { color: hsl(343deg, 100%, 30%); } diff --git a/syntax/ats.xml b/syntax/ats.xml new file mode 100644 index 0000000..632870a --- /dev/null +++ b/syntax/ats.xml @@ -0,0 +1,210 @@ + + + + + + + +]> + + + + + abstype abst@ype absprop + absview absvtype absvt@ype + absviewtype absviewt@ype + + exception + + datatype dataprop dataview + dataviewtype datavtype datasort + + typedef propdef viewdef + vtypedef viewtypedef sortdef + tkindef classdec + macdef macrodef + + withtype withprop withview + withvtype withviewtype + + val fun fn fnx + prfun prfn praxi + castfn + and + + var prvar + + infix infixl infixr + prefix postfix nonfix + + symintr symelim + + sta stacst + stadef static + + + + extern extype extvar + $extern $extype $extkind + $extype_struct + + $myfilename $mylocation $myfunction + + import staload dynload + + + + #include #staload #dynload + #require + + #if #ifdef #ifndef + #then + #elif #elifdef #elifndef + #else #endif + + #error #prerr #print + #assert + + #define #undef + + #pragma #codegen2 #codegen3 + + + + assume reassume + implmnt implement + primplmnt primplement + overload + + + + begin end + op as + + case prcase of when + scase + + let local in rec + if then else + ifcase sif + where + + for do + $break $continue + + lam llam lam@ + fix fix@ + + try with $raise + + $delay $ldelay + + $arrpsz $arrptrsize + $tyrep $d2ctype + + $effmask + $effmask_ntm $effmask_exn + $effmask_ref $effmask_wrt + $effmask_all + + $lst $lst_t $lst_vt + $list $list_t $list_vt + + $rec $rec_t $rec_vt + $record $record_t $record_vt + + $tup $tup_t $tup_vt + $tuple $tuple_t $tuple_vt + + $solver_assert + $solver_verify + + $vararg + $showtype + $tempenver + + + + type t@ype + view prop + viewtype viewt@ype + vtype vt@ype + + + + bool true false + int nat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +