Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
creating:operators [2025/05/14 21:41] – function & operator call syntax parsing ahelwer | creating:operators [2025/06/13 21:25] (current) – Removed PrintStream parameter from Interpreter constructor ahelwer | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | ======= Operators | + | ======= |
We now rejoin //Crafting Interpreters// | We now rejoin //Crafting Interpreters// | ||
Line 15: | Line 15: | ||
Functions have not yet been defined in any form, but will be later in this tutorial. | Functions have not yet been defined in any form, but will be later in this tutorial. | ||
- | ====== Section 10.1: Operator | + | ====== Section 10.1: Function |
[[https:// | [[https:// | ||
Line 97: | Line 97: | ||
This '' | This '' | ||
+ | |||
+ | ===== Subsection 10.1.2: Interpreting function calls ===== | ||
+ | |||
+ | In [[https:// | ||
+ | First, add some imports to the top of '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | import java.util.Set; | ||
+ | import java.util.HashSet; | ||
+ | import java.util.ArrayList; | ||
+ | import java.util.List; | ||
+ | import java.util.Map; | ||
+ | import java.util.HashMap; | ||
+ | </ | ||
+ | |||
+ | Implement the '' | ||
+ | <code java> | ||
+ | @Override | ||
+ | public Object visitFnApplyExpr(Expr.FnApply expr) { | ||
+ | Object callee = evaluate(expr.fn); | ||
+ | Object argument = evaluate(expr.argument); | ||
+ | Map<?, ?> function = (Map<?, ?> | ||
+ | |||
+ | return function.get(argument); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Now we can apply functions, although we can't yet define them. | ||
+ | Next up is operators. | ||
+ | Here we completely rewrite our '' | ||
+ | <code java> | ||
+ | @Override | ||
+ | public Object visitVariableExpr(Expr.Variable expr) { | ||
+ | Object callee = environment.get(expr.name); | ||
+ | |||
+ | List< | ||
+ | for (Expr argument : expr.arguments) { | ||
+ | arguments.add(evaluate(argument)); | ||
+ | } | ||
+ | |||
+ | TlaCallable operator = (TlaCallable)callee; | ||
+ | return operator.call(this, | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | We write our own version of the '' | ||
+ | <code java> | ||
+ | package tla; | ||
+ | |||
+ | import java.util.List; | ||
+ | |||
+ | interface TlaCallable { | ||
+ | Object call(Interpreter interpreter, | ||
+ | } | ||
+ | </ | ||
+ | We can now call operators, although cannot yet define operators to call! | ||
+ | |||
+ | ===== Subsection 10.1.3: Call type errors ===== | ||
+ | |||
+ | In '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | @Override | ||
+ | public Object visitFnApplyExpr(Expr.FnApply expr) { | ||
+ | Object callee = evaluate(expr.fn); | ||
+ | checkFunctionOperand(expr.bracket, | ||
+ | Object argument = evaluate(expr.argument); | ||
+ | Map<?, ?> function = (Map<?, ?> | ||
+ | </ | ||
+ | This requires another validation helper, '' | ||
+ | Put it down with the rest of the validation helpers: | ||
+ | <code java> | ||
+ | private void checkFunctionOperand(Token operator, Object operand) { | ||
+ | if (operand instanceof Map<?,?> | ||
+ | throw new RuntimeError(operator, | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | We should also check that the function is actually defined on the given argument, and provide a useful error message if not: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | Object argument = evaluate(expr.argument); | ||
+ | Map<?, ?> function = (Map<?, ?> | ||
+ | if (!function.containsKey(argument)) { | ||
+ | throw new RuntimeError(expr.bracket, | ||
+ | " | ||
+ | + argument.toString()); | ||
+ | } | ||
+ | |||
+ | return function.get(argument); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | In '' | ||
+ | However, unlike the book this is not an error! | ||
+ | It is simply an identifier bound to a concrete value in the current '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | @Override | ||
+ | public Object visitVariableExpr(Expr.Variable expr) { | ||
+ | Object callee = environment.get(expr.name); | ||
+ | |||
+ | if (!(callee instanceof TlaCallable)) { | ||
+ | return callee; | ||
+ | } | ||
+ | |||
+ | List< | ||
+ | </ | ||
+ | This duality of identifiers either being operators to be evaluated or already-bound values is a bit confusing at times, but we can handle it. | ||
+ | If the user tries to provide arguments to one of these concrete values then we should raise an error: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | if (!(callee instanceof TlaCallable)) { | ||
+ | if (!expr.arguments.isEmpty()) { | ||
+ | throw new RuntimeError(expr.name, | ||
+ | " | ||
+ | } | ||
+ | |||
+ | return callee; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | ===== Subsection 10.1.4: Checking arity ===== | ||
+ | |||
+ | If '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | TlaCallable operator = (TlaCallable)callee; | ||
+ | if (arguments.size() != operator.arity()) { | ||
+ | throw new RuntimeError(expr.name, | ||
+ | operator.arity() + " arguments but got " + | ||
+ | arguments.size() + " | ||
+ | } | ||
+ | |||
+ | return operator.call(this, | ||
+ | </ | ||
+ | Which requires adding a new method to the '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | interface TlaCallable { | ||
+ | int arity(); | ||
+ | Object call(Interpreter interpreter, | ||
+ | </ | ||
+ | |||
+ | For functions, we don't need to check arity because the single-argument restriction is enforced at the parser level. | ||
+ | |||
+ | ====== Section 10.2: Native Functions ====== | ||
+ | |||
+ | In [[https:// | ||
+ | We've already implemented all our native TLA⁺ functions in the // | ||
+ | Due to the '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | class Interpreter implements Expr.Visitor< | ||
+ | | ||
+ | final Environment globals; | ||
+ | private Environment environment; | ||
+ | |||
+ | public Interpreter(boolean replMode) { | ||
+ | this.globals = new Environment(replMode); | ||
+ | this.environment = this.globals; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Note that the '' | ||
+ | The use of this field will become clear when implementing the '' | ||
+ | |||
+ | ====== Section 10.3: Function Declarations ====== | ||
+ | |||
+ | We can call functions & operators, now let's [[https:// | ||
+ | Operators take less work so they come first. | ||
+ | In the '' | ||
+ | We want to parse operator definitions like '' | ||
+ | '' | ||
+ | We will use the now-familiar '' | ||
+ | Recall that '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | private boolean isAtOpDefStart() { | ||
+ | if (!match(IDENTIFIER)) return false; | ||
+ | if (match(LEFT_PAREN)) { | ||
+ | do { | ||
+ | if (!match(IDENTIFIER)) return false; | ||
+ | } while (match(COMMA)); | ||
+ | if (!match(RIGHT_PAREN)) return false; | ||
+ | } | ||
+ | |||
+ | return match(EQUAL_EQUAL); | ||
+ | } | ||
+ | </ | ||
+ | <code java [highlight_lines_extra=" | ||
+ | private Stmt operatorDefinition() { | ||
+ | Token name = consume(IDENTIFIER, | ||
+ | List< | ||
+ | if (match(LEFT_PAREN)) { | ||
+ | do { | ||
+ | params.add(consume(IDENTIFIER, | ||
+ | } while (match(COMMA)); | ||
+ | consume(RIGHT_PAREN, | ||
+ | } | ||
+ | |||
+ | consume(EQUAL_EQUAL, | ||
+ | return new Stmt.OpDef(name, | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Our changes to '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | defineAst(outputDir, | ||
+ | " | ||
+ | " | ||
+ | )); | ||
+ | </ | ||
+ | |||
+ | We can now parse operators with parameters! | ||
+ | On to functions. | ||
+ | This requires defining a new expression type in the '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | defineAst(outputDir, | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | </ | ||
+ | |||
+ | This looks a bit abstract. | ||
+ | What does '' | ||
+ | And why do we allow multiple parameters when our functions only take one? | ||
+ | This is because we'll be bundling multiple closely-related language constructs into this single AST node type: function constructors, | ||
+ | |||
+ | In TLA⁺, functions are constructed with the '' | ||
+ | So you can write '' | ||
+ | TLA⁺ also has very useful universal & existential quantification syntax. | ||
+ | You can write '' | ||
+ | We can see the commonality between these; they all have: | ||
+ | - A token identifying the operator ('' | ||
+ | - A set to draw elements from ('' | ||
+ | - Some number of identifiers to which to bind values from the set ('' | ||
+ | - An expression body in which the identifiers can be used | ||
+ | So we can contain all of them in the '' | ||
+ | |||
+ | Here's how we parse the function constructor syntax. | ||
+ | Add this to the '' | ||
+ | <code java> | ||
+ | if (match(LEFT_BRACKET)) { | ||
+ | List< | ||
+ | params.add(consume(IDENTIFIER, | ||
+ | consume(IN, "' | ||
+ | Expr set = expression(); | ||
+ | Token op = consume(ALL_MAP_TO, | ||
+ | Expr expr = expression(); | ||
+ | consume(RIGHT_BRACKET, | ||
+ | return new Expr.QuantFn(op, | ||
+ | } | ||
+ | </ | ||
+ | While the '' | ||
+ | |||
+ | The parser code for universal & existential quantification looks similar, but we allow multiple parameters; put this in '' | ||
+ | <code java> | ||
+ | if (match(FOR_ALL, | ||
+ | Token op = previous(); | ||
+ | List< | ||
+ | do { | ||
+ | params.add(consume(IDENTIFIER, | ||
+ | } while (match(COMMA)); | ||
+ | consume(IN, "' | ||
+ | Expr set = expression(); | ||
+ | consume(COLON, | ||
+ | Expr expr = expression(); | ||
+ | return new Expr.QuantFn(op, | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | And that concludes function parsing! | ||
+ | We can now define both operators and functions with parameters; on to evaluation. | ||
+ | |||
+ | ====== Section 10.4: Function Objects ====== | ||
+ | |||
+ | In [[https:// | ||
+ | Create a new file called '' | ||
+ | <code java> | ||
+ | package tla; | ||
+ | |||
+ | import java.util.List; | ||
+ | |||
+ | class TlaOperator implements TlaCallable { | ||
+ | private final Stmt.OpDef declaration; | ||
+ | TlaOperator(Stmt.OpDef declaration) { | ||
+ | this.declaration = declaration; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | The implementation of the '' | ||
+ | <code java> | ||
+ | @Override | ||
+ | public Object call(Interpreter interpreter, | ||
+ | | ||
+ | Environment environment = new Environment(interpreter.globals); | ||
+ | for (int i = 0; i < declaration.params.size(); | ||
+ | environment.define(declaration.params.get(i).lexeme, | ||
+ | arguments.get(i)); | ||
+ | } | ||
+ | |||
+ | interpreter.executeBlock(declaration.body, | ||
+ | return null; | ||
+ | } | ||
+ | </ | ||
+ | Here we see the usage of '' | ||
+ | This is so nested operator calls do not propagate their argument values to the nested callee. | ||
+ | |||
+ | This method calls a nonexistent '' | ||
+ | <code java> | ||
+ | Object executeBlock(Expr expr, Environment environment) { | ||
+ | Environment previous = this.environment; | ||
+ | try { | ||
+ | this.environment = environment; | ||
+ | return evaluate(expr); | ||
+ | } finally { | ||
+ | this.environment = previous; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | In the book, '' | ||
+ | Since TLA⁺ is not an imperative language with code blocks in the traditional sense, we define '' | ||
+ | Wrapping the evaluation in a '' | ||
+ | |||
+ | Back in '' | ||
+ | <code java> | ||
+ | @Override | ||
+ | public int arity() { | ||
+ | return declaration.params.size(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | For ease of use we'll also override '' | ||
+ | <code java> | ||
+ | @Override | ||
+ | public String toString() { | ||
+ | return "< | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | There is only one more thing to do and then parameterized operators will be fully functional (operational? | ||
+ | <code java [highlight_lines_extra=" | ||
+ | @Override | ||
+ | public Void visitOpDefStmt(Stmt.OpDef stmt) { | ||
+ | TlaOperator op = new TlaOperator(stmt); | ||
+ | environment.define(stmt.name, | ||
+ | return null; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Additional TLA⁺-specific validation is necessary here. | ||
+ | While we allow redefining operators // | ||
+ | So, if an operator of name '' | ||
+ | We should also disallow operator definitions with duplicate parameter names like '' | ||
+ | Define a new helper for this near the bottom of the '' | ||
+ | <code java> | ||
+ | private void checkNotDefined(List< | ||
+ | for (Token name : names) { | ||
+ | if (environment.isDefined(name)) { | ||
+ | throw new RuntimeError(name, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | for (int i = 0; i < names.size() - 1; i++) { | ||
+ | for (int j = i + 1; j < names.size(); | ||
+ | if (names.get(i).lexeme.equals(names.get(j).lexeme)) { | ||
+ | throw new RuntimeError(names.get(i), | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | This helper requires a new method in the '' | ||
+ | <code java> | ||
+ | boolean isDefined(Token name) { | ||
+ | return values.containsKey(name.lexeme) | ||
+ | || (enclosing != null && enclosing.isDefined(name)); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Add a '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | public Void visitOpDefStmt(Stmt.OpDef stmt) { | ||
+ | checkNotDefined(stmt.params); | ||
+ | for (Token param : stmt.params) { | ||
+ | if (param.lexeme.equals(stmt.name.lexeme)) { | ||
+ | throw new RuntimeError(param, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | TlaOperator op = new TlaOperator(stmt); | ||
+ | </ | ||
+ | |||
+ | And that takes care of operators! | ||
+ | Now to handle functions. | ||
+ | We still need to implement our '' | ||
+ | Similar to other operator evaluation methods like '' | ||
+ | We also ensure the function parameter names are not shadowing an existing identifier: | ||
+ | <code java> | ||
+ | @Override | ||
+ | public Object visitQuantFnExpr(Expr.QuantFn expr) { | ||
+ | checkNotDefined(expr.params); | ||
+ | Object set = evaluate(expr.set); | ||
+ | checkSetOperand(expr.op, | ||
+ | switch (expr.op.type) { | ||
+ | case ALL_MAP_TO: { | ||
+ | |||
+ | } case FOR_ALL: { | ||
+ | |||
+ | } case EXISTS: { | ||
+ | |||
+ | } default: { | ||
+ | // Unreachable. | ||
+ | return null; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Thinking about it, the code to implement these operators is fairly tricky. | ||
+ | We want to generate a series of bindings of set elements to identifiers. | ||
+ | Let's fill out the '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | } case FOR_ALL: { | ||
+ | for (Environment binding : bindings) { | ||
+ | Object result = executeBlock(expr.body, | ||
+ | checkBooleanOperand(expr.op, | ||
+ | if (!(Boolean)result) return false; | ||
+ | } | ||
+ | return true; | ||
+ | } case EXISTS: { | ||
+ | </ | ||
+ | This is nice, concise code. | ||
+ | We iterate over a series of bindings - in the form of '' | ||
+ | But what is '' | ||
+ | This is one of the more algorithmically tricky parts of this entire tutorial series, but it is also very fun! | ||
+ | We are going to encapsulate the binding generation logic in a new class file, '' | ||
+ | <code java> | ||
+ | package tla; | ||
+ | |||
+ | import java.util.ArrayList; | ||
+ | import java.util.Iterator; | ||
+ | import java.util.List; | ||
+ | import java.util.Set; | ||
+ | |||
+ | class BindingGenerator implements Iterator< | ||
+ | Iterable< | ||
+ | private final List< | ||
+ | private final List< | ||
+ | private final Environment parent; | ||
+ | |||
+ | BindingGenerator(List< | ||
+ | this.vars = vars; | ||
+ | this.set = new ArrayList<> | ||
+ | this.parent = parent; | ||
+ | } | ||
+ | |||
+ | @Override | ||
+ | public Iterator< | ||
+ | return this; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | Implementing the '' | ||
+ | So that's what '' | ||
+ | A '' | ||
+ | |||
+ | Before getting too far into the weeds with the '' | ||
+ | Construct a '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | public Object visitQuantFnExpr(Expr.QuantFn expr) { | ||
+ | Object set = evaluate(expr.set); | ||
+ | checkSetOperand(expr.op, | ||
+ | BindingGenerator bindings = new BindingGenerator(expr.params, | ||
+ | switch (expr.op.type) { | ||
+ | </ | ||
+ | One important thing to note is that unlike in '' | ||
+ | <code haskell> | ||
+ | op(x) == \A y \in 0 .. 2 : y < x | ||
+ | </ | ||
+ | Here's how we implement the '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | } case EXISTS: { | ||
+ | boolean result = false; | ||
+ | for (Environment binding : bindings) { | ||
+ | Object junctResult = executeBlock(expr.body, | ||
+ | checkBooleanOperand(expr.op, | ||
+ | result |= (boolean)junctResult; | ||
+ | } | ||
+ | return result; | ||
+ | } default: { | ||
+ | </ | ||
+ | Finally, here's how we implement the '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case ALL_MAP_TO: { | ||
+ | Token param = expr.params.get(0); | ||
+ | Map< | ||
+ | for (Environment binding : bindings) { | ||
+ | function.put(binding.get(param), | ||
+ | } | ||
+ | return function; | ||
+ | } case FOR_ALL: { | ||
+ | </ | ||
+ | |||
+ | ====== Set Enumeration ====== | ||
+ | |||
+ | Now that we've finished all that, let's get to the real difficult part: implementing an enumeration algorithm in '' | ||
+ | A moment of consideration is in order. | ||
+ | If you had to bind a set of variables to every combination of values from a set, how would you do it? | ||
+ | To computer scientists, the connection between enumerating every possible combination of values and incrementing numbers in an arbitrary base is readily made. | ||
+ | For example, imagine you had a set of three values '' | ||
+ | This is isomorphic to iterating through all possible values of a two-digit number in base three. | ||
+ | The number of digits corresponds to the number of variables to bind, and the base corresponds to the number of elements in the set. | ||
+ | |||
+ | This is confusing, but we will walk through it step by step. | ||
+ | First we must assign an arbitrary order to the set so we can map single digits in base three to an element of the set: | ||
+ | ^ Digit ^ Element ^ | ||
+ | | 0 | a | | ||
+ | | 1 | b | | ||
+ | | 2 | c | | ||
+ | |||
+ | Using that mapping, here's a table showing how counting up through all two-digit base three numbers decomposes into binding all possible combinations of values to '' | ||
+ | ^ Number (base 3) ^ As set elements ^ x (0th-significant digit) ^ y (1st-significant digit) ^ | ||
+ | | 00 | aa | a | a | | ||
+ | | 01 | ab | b | a | | ||
+ | | 02 | ac | c | a | | ||
+ | | 10 | ba | a | b | | ||
+ | | 11 | bb | b | b | | ||
+ | | 12 | bc | c | b | | ||
+ | | 20 | ca | a | c | | ||
+ | | 21 | cb | b | c | | ||
+ | | 22 | cc | c | c | | ||
+ | |||
+ | As you can see, we generated every possible binding of '' | ||
+ | Each identifier is assigned a particular digit in the number, and their value in a binding is given by that digit' | ||
+ | |||
+ | We're almost there, but not quite; Java doesn' | ||
+ | We will need to increment a regular '' | ||
+ | Actually the well-known [[https:// | ||
+ | It is quite elegant; given a number '' | ||
+ | Then, assign '' | ||
+ | Confused? Here's how it looks as a table for converting the enumeration value 6 to a binding from our previous base 3 example: | ||
+ | ^ Iteration ^ '' | ||
+ | | 0 | 6 | 0 | 2 | | ||
+ | | 1 | 2 | 2 | 0 | | ||
+ | |||
+ | Note that the value of '' | ||
+ | Reading the '' | ||
+ | Consulting our tables, we see " | ||
+ | '' | ||
+ | '' | ||
+ | |||
+ | The iteration number actually corresponds to digit significance. | ||
+ | So, in iteration 0, '' | ||
+ | In iteration 1, '' | ||
+ | If this is still too abstract, try some other values to convince yourself that our algorithm successfully calculates the binding! | ||
+ | |||
+ | Now to implement it. | ||
+ | First, add another field to the '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | class BindingGenerator implements Iterator< | ||
+ | Iterable< | ||
+ | private final List< | ||
+ | private final List< | ||
+ | private final Environment parent; | ||
+ | private int enumerationIndex = 0; | ||
+ | </ | ||
+ | |||
+ | Then, implement '' | ||
+ | This is required by the '' | ||
+ | In our case, we test that '' | ||
+ | For '' | ||
+ | <code java> | ||
+ | @Override | ||
+ | public boolean hasNext() { | ||
+ | return enumerationIndex < Math.pow(set.size(), | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Now for the main event. | ||
+ | Here's the strikingly simple code implementing the change-of-base algorithm in the '' | ||
+ | <code java> | ||
+ | @Override | ||
+ | public Environment next() { | ||
+ | int current = enumerationIndex++; | ||
+ | Environment bindings = new Environment(parent); | ||
+ | for (Token var : vars) { | ||
+ | bindings.define(var, | ||
+ | current /= set.size(); | ||
+ | } | ||
+ | |||
+ | return bindings; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Note that '' | ||
+ | A nice little terse code trick. | ||
+ | |||
+ | And we're done! | ||
+ | You can now run your interpreter and enjoy the full power of functions & operators with parameters, along with the new universal & existential quantification expressions! | ||
+ | See the current expected state of your source code [[https:// | ||
+ | On to the next chapter: [[creating: | ||
+ | |||
+ | ====== Challenges ====== | ||
+ | |||
+ | - In the full TLA⁺ language, quantified functions can be even more complicated; | ||
+ | - Modify the change-of-base algorithm to handle enumerating multiple sets of varying cardinality, | ||
+ | - Our enumeration algorithm is a " | ||
+ | - Often, sets in TLA⁺ models are very large. Instead of immediately evaluating sets constructed with the '' | ||
+ | |||
+ | [[creating: | ||