Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
creating:expressions [2025/04/06 19:58] – Summary ahelwer | creating:expressions [2025/04/14 17:57] (current) – Finished first draft of expression interpretation tutorial ahelwer | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | ====== Handling Constant TLA⁺ Expressions ====== | + | ======= Handling Constant TLA⁺ Expressions |
This tutorial page covers the next three chapters in //Crafting Interpreters//: | This tutorial page covers the next three chapters in //Crafting Interpreters//: | ||
Line 11: | Line 11: | ||
Just primitive literal values stuck together, not too much more advanced than a simple calculator app. | Just primitive literal values stuck together, not too much more advanced than a simple calculator app. | ||
This will give us a skeleton on which to hang the rest of the language. | This will give us a skeleton on which to hang the rest of the language. | ||
+ | |||
+ | Each section in this tutorial corresponds to one or more sections of //Crafting Interpreters// | ||
+ | First read the section of the book, then read the corresponding commentary & modifications given by this tutorial. | ||
+ | |||
+ | ====== Chapter 5: Representing Code ====== | ||
+ | |||
+ | [[https:// | ||
+ | However, since this tutorial is intended to be self-contained code-wise, all necessary code is reproduced. | ||
+ | |||
+ | ===== Section 5.1: Context-Free Grammars ===== | ||
+ | |||
+ | Everything before [[https:// | ||
+ | The ambiguous first-draft grammar for Lox expressions can be adapted to TLA⁺: | ||
+ | <code eiffel> | ||
+ | expression | ||
+ | | unary | ||
+ | | binary | ||
+ | | ternary | ||
+ | | variadic | ||
+ | | grouping ; | ||
+ | |||
+ | literal | ||
+ | grouping | ||
+ | unary → (( " | ||
+ | binary | ||
+ | ternary | ||
+ | variadic | ||
+ | operator | ||
+ | </ | ||
+ | There are a few interesting differences. | ||
+ | The '' | ||
+ | The '' | ||
+ | The operators are changed to the set of operators defined in our TLA⁺ implementation. | ||
+ | These are all the expressions we can use without introducing the concept of identifiers referring to something else. | ||
+ | |||
+ | There is also the '' | ||
+ | This one is so named because it's where we'll put operators accepting varying numbers of parameters. | ||
+ | It's kind of weird to think of the finite set literal '' | ||
+ | The only difference between '' | ||
+ | This is the perspective of a language implementer. | ||
+ | Later on we will extend the definition of '' | ||
+ | |||
+ | ===== Section 5.2: Implementing Syntax Trees ===== | ||
+ | |||
+ | While some programmers might have an aversion to generating code, the approach taken in the book is actually very convenient - as you will discover if you take some time to prototype & play around with different class representations of the parse tree! | ||
+ | In [[https:// | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | package tool; | ||
+ | |||
+ | import java.io.IOException; | ||
+ | import java.io.PrintWriter; | ||
+ | import java.util.Arrays; | ||
+ | import java.util.List; | ||
+ | |||
+ | public class GenerateAst { | ||
+ | public static void main(String[] args) throws IOException { | ||
+ | if (args.length != 1) { | ||
+ | System.err.println(" | ||
+ | System.exit(64); | ||
+ | } | ||
+ | String outputDir = args[0]; | ||
+ | defineAst(outputDir, | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | )); | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | In the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | private static void defineAst( | ||
+ | String outputDir, String baseName, List< | ||
+ | throws IOException { | ||
+ | String path = outputDir + "/" | ||
+ | PrintWriter writer = new PrintWriter(path, | ||
+ | |||
+ | writer.println(" | ||
+ | writer.println(); | ||
+ | writer.println(" | ||
+ | writer.println(); | ||
+ | writer.println(" | ||
+ | |||
+ | |||
+ | // The AST classes. | ||
+ | for (String type : types) { | ||
+ | String className = type.split(":" | ||
+ | String fields = type.split(":" | ||
+ | defineType(writer, | ||
+ | } | ||
+ | writer.println(" | ||
+ | writer.close(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Finally, the '' | ||
+ | |||
+ | <code java> | ||
+ | private static void defineType( | ||
+ | PrintWriter writer, String baseName, | ||
+ | String className, String fieldList) { | ||
+ | writer.println(" | ||
+ | baseName + " {"); | ||
+ | |||
+ | // Constructor. | ||
+ | writer.println(" | ||
+ | |||
+ | // Store parameters in fields. | ||
+ | String[] fields = fieldList.split(", | ||
+ | for (String field : fields) { | ||
+ | String name = field.split(" | ||
+ | writer.println(" | ||
+ | } | ||
+ | |||
+ | writer.println(" | ||
+ | |||
+ | // Fields. | ||
+ | writer.println(); | ||
+ | for (String field : fields) { | ||
+ | writer.println(" | ||
+ | } | ||
+ | |||
+ | writer.println(" | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | |||
+ | ===== Section 5.3: Working with Trees ===== | ||
+ | |||
+ | [[https:// | ||
+ | No TLA⁺-specific differences are necessary when modifying '' | ||
+ | Insert this line in '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | writer.println(" | ||
+ | |||
+ | defineVisitor(writer, | ||
+ | |||
+ | // The AST classes. | ||
+ | </ | ||
+ | |||
+ | This calls the '' | ||
+ | <code java> | ||
+ | private static void defineVisitor( | ||
+ | PrintWriter writer, String baseName, List< | ||
+ | writer.println(" | ||
+ | |||
+ | for (String type : types) { | ||
+ | String typeName = type.split(":" | ||
+ | writer.println(" | ||
+ | typeName + " " + baseName.toLowerCase() + " | ||
+ | } | ||
+ | |||
+ | writer.println(" | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Again insert some lines in '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | defineType(writer, | ||
+ | } | ||
+ | |||
+ | // The base accept() method. | ||
+ | writer.println(); | ||
+ | writer.println(" | ||
+ | |||
+ | writer.println(" | ||
+ | </ | ||
+ | |||
+ | Finally, insert some lines in '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | writer.println(" | ||
+ | |||
+ | // Visitor pattern. | ||
+ | writer.println(); | ||
+ | writer.println(" | ||
+ | writer.println(" | ||
+ | writer.println(" | ||
+ | className + baseName + " | ||
+ | writer.println(" | ||
+ | |||
+ | // Fields. | ||
+ | </ | ||
+ | Our generated syntax tree node types now support the visitor pattern! | ||
+ | |||
+ | ===== Section 5.4: A (Not Very) Pretty Printer ===== | ||
+ | |||
+ | [[https:// | ||
+ | There are a few TLA⁺-specific modifications, | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | package tla; | ||
+ | |||
+ | class AstPrinter implements Expr.Visitor< | ||
+ | String print(Expr expr) { | ||
+ | return expr.accept(this); | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | We also have some modifications and additions to the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | @Override | ||
+ | public String visitBinaryExpr(Expr.Binary expr) { | ||
+ | return parenthesize(expr.operator.lexeme, | ||
+ | expr.left, expr.right); | ||
+ | } | ||
+ | |||
+ | @Override | ||
+ | public String visitGroupingExpr(Expr.Grouping expr) { | ||
+ | return parenthesize(" | ||
+ | } | ||
+ | |||
+ | @Override | ||
+ | public String visitLiteralExpr(Expr.Literal expr) { | ||
+ | if (expr.value == null) return " | ||
+ | return expr.value.toString(); | ||
+ | } | ||
+ | |||
+ | @Override | ||
+ | public String visitUnaryExpr(Expr.Unary expr) { | ||
+ | return parenthesize(expr.operator.lexeme, | ||
+ | } | ||
+ | |||
+ | @Override | ||
+ | public string visitTernaryExpr(Expr.Ternary expr) { | ||
+ | return parenthesize(expr.operator.lexeme, | ||
+ | expr.second, | ||
+ | } | ||
+ | |||
+ | @Override | ||
+ | public string visitVariadicExpr(Expr.Variadic expr) { | ||
+ | return parenthesize(expr.operator.lexeme, | ||
+ | expr.parameters.toArray(Expr[]:: | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | The '' | ||
+ | |||
+ | <code java> | ||
+ | private String parenthesize(String name, Expr... exprs) { | ||
+ | StringBuilder builder = new StringBuilder(); | ||
+ | |||
+ | builder.append(" | ||
+ | for (Expr expr : exprs) { | ||
+ | builder.append(" | ||
+ | builder.append(expr.accept(this)); | ||
+ | } | ||
+ | builder.append(" | ||
+ | |||
+ | return builder.toString(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | It isn't necessary to define a '' | ||
+ | |||
+ | ====== Chapter 6: Parsing Expressions ====== | ||
+ | |||
+ | In [[https:// | ||
+ | |||
+ | ===== Section 6.1: Ambiguity and the Parsing Game ===== | ||
+ | |||
+ | First, as in [[https:// | ||
+ | The way that precedence works in TLA⁺ is different from Lox, and indeed different from most other languages! | ||
+ | One side-quote from this section of the book reads: | ||
+ | |||
+ | >While not common these days, some languages specify that certain pairs of operators have no relative precedence. That makes it a syntax error to mix those operators in an expression without using explicit grouping. | ||
+ | |||
+ | > | ||
+ | |||
+ | TLA⁺ has both of these features! | ||
+ | Instead of operators occupying a slot in some hierarchy of precedence, each operator has a precedence //range//. | ||
+ | When the precedence ranges of two operators overlap it is a parse error. | ||
+ | For example, the '' | ||
+ | < | ||
+ | ENABLED x' | ||
+ | </ | ||
+ | Users must add parentheses to indicate their desired grouping in the expression. | ||
+ | Similarly, some operators like '' | ||
+ | Both of these factors combine to make our operator parsing code quite a bit different from that given in the book. | ||
+ | Worry not, it can still be made terse and understandable! | ||
+ | |||
+ | ===== Section 6.2: Recursive Descent Parsing ===== | ||
+ | |||
+ | [[https:// | ||
+ | Recursive descent can seem a bit magical even if you're used to reasoning about recursive functions! | ||
+ | Unfortunately TLA⁺ requires us to push this magic even further. | ||
+ | We'll take it step by step. | ||
+ | |||
+ | We start with the same basic '' | ||
+ | We also add an import for '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | package tla; | ||
+ | |||
+ | import java.util.List; | ||
+ | import java.util.ArrayList; | ||
+ | |||
+ | import static tla.TokenType.*; | ||
+ | |||
+ | class Parser { | ||
+ | private final List< | ||
+ | private int current = 0; | ||
+ | |||
+ | Parser(List< | ||
+ | this.tokens = tokens; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Now we hit our first major difference. | ||
+ | In Lox, precedence is given by a small hierarchy of named rules like '' | ||
+ | TLA⁺ is more complicated than that. | ||
+ | The full language has around 100 operators spanning precedences 1 to 15! | ||
+ | If we wanted to match the book's style we'd have to write a tower of recursive functions like: | ||
+ | <code java> | ||
+ | private Expr operatorExpressionPrec1() { ... } | ||
+ | private Expr operatorExpressionPrec2() { ... } | ||
+ | private Expr operatorExpressionPrec3() { ... } | ||
+ | ... | ||
+ | private Expr operatorExpressionPrec15() { ... } | ||
+ | </ | ||
+ | where each '' | ||
+ | Life is too short for this. | ||
+ | Instead, we'll adopt a technique alluded to in the text: | ||
+ | >If you wanted to do some clever Java 8, you could create a helper method for parsing a left-associative series of binary operators given a list of token types, and an operand method handle to simplify this redundant code. | ||
+ | |||
+ | Here's the skeleton of our operator parsing function; the trick is to make the precedence a parameter to the function instead of a component of the name. | ||
+ | Add this code after the '' | ||
+ | <code java> | ||
+ | private Expr expression() { | ||
+ | return operatorExpression(1); | ||
+ | } | ||
+ | |||
+ | private Expr operatorExpression(int prec) { | ||
+ | if (prec == 16) return primary(); | ||
+ | |||
+ | Expr expr = operatorExpression(prec + 1); | ||
+ | |||
+ | return expr; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Before filling out '' | ||
+ | |||
+ | <code java> | ||
+ | private boolean match(TokenType... types) { | ||
+ | for (TokenType type : types) { | ||
+ | if (check(type)) { | ||
+ | advance(); | ||
+ | return true; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | return false; | ||
+ | } | ||
+ | |||
+ | private boolean check(TokenType type) { | ||
+ | if (isAtEnd()) return false; | ||
+ | return peek().type == type; | ||
+ | } | ||
+ | |||
+ | private Token advance() { | ||
+ | if (!isAtEnd()) current++; | ||
+ | return previous(); | ||
+ | } | ||
+ | |||
+ | private boolean isAtEnd() { | ||
+ | return peek().type == EOF; | ||
+ | } | ||
+ | |||
+ | private Token peek() { | ||
+ | return tokens.get(current); | ||
+ | } | ||
+ | |||
+ | private Token previous() { | ||
+ | return tokens.get(current - 1); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Now we have to define a table of operators with their details. | ||
+ | For this, create a new file '' | ||
+ | |||
+ | <code java> | ||
+ | package tla; | ||
+ | |||
+ | enum Fix { | ||
+ | PREFIX, INFIX, POSTFIX | ||
+ | } | ||
+ | |||
+ | class Operator { | ||
+ | final Fix fix; | ||
+ | final TokenType token; | ||
+ | final boolean assoc; | ||
+ | final int lowPrec; | ||
+ | final int highPrec; | ||
+ | |||
+ | public Operator(Fix fix, TokenType token, boolean assoc, | ||
+ | int lowPrec, int highPrec) { | ||
+ | this.fix = fix; | ||
+ | this.token = token; | ||
+ | this.assoc = assoc; | ||
+ | this.lowPrec = lowPrec; | ||
+ | this.highPrec = highPrec; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | For convenience, | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | package tla; | ||
+ | |||
+ | import java.util.List; | ||
+ | import java.util.ArrayList; | ||
+ | |||
+ | import static tla.TokenType.*; | ||
+ | import static tla.Fix.*; | ||
+ | |||
+ | class Parser { | ||
+ | </ | ||
+ | |||
+ | |||
+ | You can find operator attributes on page 271 of // | ||
+ | We use a small subset of these operators. | ||
+ | Record their attributes in a table in '' | ||
+ | |||
+ | <code java> | ||
+ | private static final Operator[] operators = new Operator[] { | ||
+ | new Operator(PREFIX, | ||
+ | new Operator(PREFIX, | ||
+ | new Operator(PREFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(INFIX, | ||
+ | new Operator(POSTFIX, | ||
+ | }; | ||
+ | </ | ||
+ | |||
+ | Here's something a bit odd. | ||
+ | In TLA⁺, the infix minus operator (subtraction) has higher precedence at 11-11 than the infix plus operator (addition) at 10-10! | ||
+ | In grade school you probably learned acronyms like PEMDAS or BEDMAS to remember the order of operations in arithmetic. | ||
+ | Really, you learned parsing rules! | ||
+ | Now when writing our own parsing algorithm we come to understand that the order of these operations is not inscribed in the bedrock of mathematical truth but instead is a simple convention of mathematical notation. | ||
+ | TLA⁺ subverts this by parsing the expression '' | ||
+ | While this design decision is unusual, it is [[https:// | ||
+ | |||
+ | We need one more helper method before we can start working on '' | ||
+ | Add this code above '' | ||
+ | <code java> | ||
+ | private Operator matchOp(Fix fix, int prec) { | ||
+ | for (Operator op : operators) { | ||
+ | if (op.fix == fix && op.lowPrec == prec) { | ||
+ | if (match(op.token)) return op; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | return null; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Okay, we're all set for the main event! | ||
+ | Here is how we modify '' | ||
+ | You can see a strong resemblance to the infix operator parsing code given in the book (new lines highlighted): | ||
+ | <code java [highlight_lines_extra=" | ||
+ | private Expr operatorExpression(int prec) { | ||
+ | if (prec == 16) return primary(); | ||
+ | |||
+ | Operator op; | ||
+ | |||
+ | Expr expr = operatorExpression(prec + 1); | ||
+ | while ((op = matchOp(INFIX, | ||
+ | Token operator = previous(); | ||
+ | Expr right = operatorExpression(op.highPrec + 1); | ||
+ | expr = new Expr.Binary(expr, | ||
+ | } | ||
+ | |||
+ | return expr; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | We use Java's combined conditional-assignment atop the '' | ||
+ | The interior of the '' | ||
+ | It takes a bit of thinking to understand why this works for parsing expressions according to the precedence rules we want, but it does. | ||
+ | Take a minute to ponder it. | ||
+ | If you still don't get it, wait until we have a fully-functional expression parser then play around with this line to see how it changes the behavior. | ||
+ | |||
+ | Our code implements precedence ranges, but assumes all infix operators are associative. | ||
+ | We need to modify the loop to return immediately if the infix operator is not associative: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | while ((op = matchOp(INFIX, | ||
+ | Token operator = previous(); | ||
+ | Expr right = operatorExpression(op.highPrec + 1); | ||
+ | expr = new Expr.Binary(expr, | ||
+ | if (!op.assoc) return expr; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | And that's how we parse infix operators! | ||
+ | That was the most complicated case, so let's move on to prefix operators. | ||
+ | Again our code resembles the prefix operator parsing logic given in the book. | ||
+ | Add this snippet near the top of '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | private Expr operatorExpression(int prec) { | ||
+ | if (prec == 16) return primary(); | ||
+ | |||
+ | Operator op; | ||
+ | if ((op = matchOp(PREFIX, | ||
+ | Token opToken = previous(); | ||
+ | Expr expr = operatorExpression( | ||
+ | op.assoc ? op.lowPrec : op.highPrec + 1); | ||
+ | return new Expr.Unary(opToken, | ||
+ | } | ||
+ | |||
+ | Expr expr = operatorExpression(prec + 1); | ||
+ | </ | ||
+ | |||
+ | Of note is the expression '' | ||
+ | To understand this expression, it is helpful to consider an example. | ||
+ | The negative prefix operator '' | ||
+ | In order to do that after consuming the first '' | ||
+ | Then we will consume the second '' | ||
+ | In contrast, the prefix operator '' | ||
+ | In that case we want to reject the expression '' | ||
+ | Thus the second '' | ||
+ | |||
+ | All that remains is to parse postfix operators. | ||
+ | These are not covered in the book, but they pretty much resemble infix operators without matching an expression on the right-hand side of the operator. | ||
+ | Add the highlighted code below the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | } | ||
+ | |||
+ | while ((op = matchOp(POSTFIX, | ||
+ | Token opToken = previous(); | ||
+ | expr = new Expr.Unary(opToken, | ||
+ | if (!op.assoc) break; | ||
+ | } | ||
+ | |||
+ | return expr | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | And that completes our operator parsing logic! | ||
+ | All that remains is our '' | ||
+ | It is fairly similar to that given in the book, with some omissions since our minimal TLA⁺ subset does not contain strings or null values. | ||
+ | Add this code after '' | ||
+ | |||
+ | <code java> | ||
+ | private Expr primary() { | ||
+ | if (match(FALSE)) return new Expr.Literal(false); | ||
+ | if (match(TRUE)) return new Expr.Literal(true); | ||
+ | |||
+ | if (match(NUMBER)) { | ||
+ | return new Expr.Literal(previous().literal); | ||
+ | } | ||
+ | |||
+ | if (match(LEFT_PAREN)) { | ||
+ | Expr expr = expression(); | ||
+ | consume(RIGHT_PAREN, | ||
+ | return new Expr.Grouping(expr); | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | We have two final additions to '' | ||
+ | Here's how we parse '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | return new Expr.Grouping(expr); | ||
+ | } | ||
+ | |||
+ | if (match(IF)) { | ||
+ | Token operator = previous(); | ||
+ | Expr condition = expression(); | ||
+ | consume(THEN, | ||
+ | Expr yes = expression(); | ||
+ | consume(ELSE, | ||
+ | Expr no = expression(); | ||
+ | return new Expr.Ternary(operator, | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | And here's how we parse the set construction operator. | ||
+ | Again add the highlighted code to the bottom of '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | return new Expr.Ternary(operator, | ||
+ | } | ||
+ | |||
+ | if (match(LEFT_BRACE)) { | ||
+ | Token operator = previous(); | ||
+ | List< | ||
+ | if (RIGHT_BRACE != peek().type) { | ||
+ | do { | ||
+ | elements.add(expression()); | ||
+ | } while (match(COMMA)); | ||
+ | } | ||
+ | consume(RIGHT_BRACE, | ||
+ | return new Expr.Variadic(operator, | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | This will handle expressions like '' | ||
+ | |||
+ | The '' | ||
+ | |||
+ | ===== Section 6.3: Syntax Errors ===== | ||
+ | |||
+ | We don't need to modify most of the error reporting code given in [[https:// | ||
+ | Here it is; add the '' | ||
+ | |||
+ | <code java> | ||
+ | private Token consume(TokenType type, String message) { | ||
+ | if (check(type)) return advance(); | ||
+ | |||
+ | throw error(peek(), | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Then add the '' | ||
+ | |||
+ | <code java> | ||
+ | private ParseError error(Token token, String message) { | ||
+ | Lox.error(token, | ||
+ | return new ParseError(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | In the '' | ||
+ | |||
+ | <code java> | ||
+ | static void error(Token token, String message) { | ||
+ | if (token.type == TokenType.EOF) { | ||
+ | report(token.line, | ||
+ | } else { | ||
+ | report(token.line, | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Add the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | class Parser { | ||
+ | private static class ParseError extends RuntimeException {} | ||
+ | |||
+ | private final List< | ||
+ | </ | ||
+ | |||
+ | One piece of code we //do// need to modify is the '' | ||
+ | TLA⁺ doesn' | ||
+ | |||
+ | <code java> | ||
+ | private void synchronize() { | ||
+ | advance(); | ||
+ | |||
+ | while(!isAtEnd()) { | ||
+ | if (previous().type == EQUAL_EQUAL) return; | ||
+ | |||
+ | advance(); | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Fully-featured TLA⁺ parsers generally look ahead to identify the start of a new operator definition (not trivial!) and insert a synthetic zero-width token for the parser to consume. | ||
+ | For an example, see SANY's very complicated '' | ||
+ | We won't be doing that here, but it's a good technique to know about; looking ahead then inserting synthetic tokens to disambiguate syntax is a common method when parsing complicated & ambiguous languages like TLA⁺. | ||
+ | |||
+ | ===== Section 6.4: Wiring up the Parser ===== | ||
+ | |||
+ | In [[https:// | ||
+ | In the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | List< | ||
+ | |||
+ | Parser parser = new Parser(tokens); | ||
+ | Expr expression = parser.parse(); | ||
+ | |||
+ | // Stop if there was a syntax error. | ||
+ | if (hadError) return; | ||
+ | |||
+ | System.out.println(new AstPrinter().print(expression)); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Fire up the interpreter and parse a TLA⁺ expression! | ||
+ | < | ||
+ | > {1 + 2, IF TRUE THEN 3 ELSE 4, {}} | ||
+ | ({ (+ 1 2) (IF true 3 4) ({)) | ||
+ | </ | ||
+ | |||
+ | ====== Chapter 7: Evaluating Expressions ====== | ||
+ | |||
+ | Now that we can parse expressions, | ||
+ | Here we follow the material in [[https:// | ||
+ | It's an exciting chapter, so let's jump in. | ||
+ | |||
+ | ===== Section 7.1: Representing Values ===== | ||
+ | |||
+ | In [[https:// | ||
+ | Our mapping for TLA⁺ is fairly similar to Lox, although we use '' | ||
+ | TLA⁺ thankfully lacks '' | ||
+ | |||
+ | ^ TLA⁺ type ^ Java representation | ||
+ | | Any TLA⁺ value | '' | ||
+ | | Boolean | ||
+ | | number | ||
+ | | set | '' | ||
+ | |||
+ | Java does a huge amount of work for us with the capabilities of the '' | ||
+ | |||
+ | ===== Section 7.2: Representing Values ===== | ||
+ | |||
+ | [[https:// | ||
+ | Create a new file '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | package tla; | ||
+ | |||
+ | import java.util.Set; | ||
+ | import java.util.HashSet; | ||
+ | |||
+ | class Interpreter implements Expr.Visitor< | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | If your IDE supports it, get it to automatically add all necessary stub methods to implement the '' | ||
+ | |||
+ | Let's start with defining the value of literals. | ||
+ | Our code is completely unchanged from the book; add this '' | ||
+ | |||
+ | <code java> | ||
+ | @Override | ||
+ | public Object visitLiteralExpr(Expr.Literal expr) { | ||
+ | return expr.value; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Our code for evaluating grouping (parentheses) is similarly unchanged from the book: | ||
+ | |||
+ | <code java> | ||
+ | @Override | ||
+ | public Object visitGroupingExpr(Expr.Grouping expr) { | ||
+ | return evaluate(expr.expression); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | This uses the '' | ||
+ | |||
+ | <code java> | ||
+ | private Object evaluate(Expr expr) { | ||
+ | return expr.accept(this); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | ==== Evaluating Unary Operators ==== | ||
+ | |||
+ | Our first real difference is in the '' | ||
+ | Recall that since our '' | ||
+ | Here's how we define the negative prefix operator, casting the parameter to an '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | @Override | ||
+ | public Object visitUnaryExpr(Expr.Unary expr) { | ||
+ | Object operand = evaluate(expr.expr); | ||
+ | |||
+ | switch (expr.operator.type) { | ||
+ | case MINUS: | ||
+ | return -(int)operand; | ||
+ | } | ||
+ | |||
+ | // Unreachable. | ||
+ | return null; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Note that the negative prefix operator is not actually a built-in operator of TLA⁺. | ||
+ | It is defined in the '' | ||
+ | However, because our minimal TLA⁺ language subset lacks module importing, for convenience we just define some common arithmetic operators as built-in. | ||
+ | You can find more information about the built-in TLA⁺ operators in chapter 16 of // | ||
+ | |||
+ | Our logical-not prefix operator is denoted by the '' | ||
+ | We also have no use for the '' | ||
+ | Add this code to the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | switch (expr.operator.type) { | ||
+ | case NOT: | ||
+ | return !(boolean)operand; | ||
+ | case MINUS: | ||
+ | </ | ||
+ | |||
+ | We still have two more unary operators to define: '' | ||
+ | Both are trivial in this domain but will become much more complicated later on. | ||
+ | For constant expressions, | ||
+ | Add the highlighted code to the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | switch (expr.operator.type) { | ||
+ | case ENABLED: | ||
+ | return (boolean)operand; | ||
+ | case NOT: | ||
+ | </ | ||
+ | |||
+ | Priming a constant expression has no effect on the value of that expression, so just return the operand' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | switch (expr.operator.type) { | ||
+ | case PRIME: | ||
+ | return operand; | ||
+ | case ENABLED: | ||
+ | </ | ||
+ | |||
+ | ==== Evaluating Binary Operators ==== | ||
+ | |||
+ | Unlike Lox, addition in TLA⁺ is only defined between two numbers (at least in its standard '' | ||
+ | Here's how we define our basic binary arithmetic & comparison operators; add a '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | @Override | ||
+ | public Object visitBinaryExpr(Expr.Binary expr) { | ||
+ | Object left = evaluate(expr.left); | ||
+ | Object right = evaluate(expr.right); | ||
+ | |||
+ | switch (expr.operator.type) { | ||
+ | case MINUS: | ||
+ | return (int)left - (int)right; | ||
+ | case PLUS: | ||
+ | return (int)left + (int)right; | ||
+ | case LESS_THAN: | ||
+ | return (int)left < (int)right; | ||
+ | case EQUAL: | ||
+ | return left.equals(right); | ||
+ | } | ||
+ | |||
+ | // Unreachable. | ||
+ | return null; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Note that since we don't have to worry about '' | ||
+ | However, this actually gives us different equality semantics from TLC. | ||
+ | In TLC, evaluating things like '' | ||
+ | In the interest of simplicity we are fine with keeping these looser semantics. | ||
+ | See the challenges at the end of this tutorial page if you're interested in more closely matching the semantics of TLC. | ||
+ | |||
+ | Let's take our first foray into sets by defining the set membership operator in '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | switch (expr.operator.type) { | ||
+ | case IN: | ||
+ | return ((Set<?> | ||
+ | case MINUS: | ||
+ | </ | ||
+ | |||
+ | The question mark is some unusual Java syntax you probably haven' | ||
+ | It has to do with // | ||
+ | Think of it as casting the right operand to //some kind// of set, so we can access the '' | ||
+ | It is tempting to cast it to '' | ||
+ | |||
+ | Similar to equality, we get looser semantics here than with TLC. | ||
+ | TLC will emit a runtime error when evaluating the expression '' | ||
+ | Again this is acceptable. | ||
+ | |||
+ | Now for something more complicated. | ||
+ | Here's the set construction helper operator '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | switch (expr.operator.type) { | ||
+ | case DOT_DOT: | ||
+ | Set< | ||
+ | int lower = (int)left; | ||
+ | int higher = (int)right; | ||
+ | if (lower <= higher) { | ||
+ | for (int i = lower; i <= higher; i++) { | ||
+ | set.add(i); | ||
+ | } | ||
+ | } | ||
+ | return set; | ||
+ | case IN: | ||
+ | </ | ||
+ | |||
+ | So expressions like '' | ||
+ | We use '' | ||
+ | It's incredible how much we get for free here; Java sets even implement value-based de-duplication and equality comparison for us for arbitrarily-nested sets! | ||
+ | |||
+ | The only binary operators remaining are logical '' | ||
+ | We actually want '' | ||
+ | Add the highlighted lines near the top of the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | public Object visitBinaryExpr(Expr.Binary expr) { | ||
+ | Object left = evaluate(expr.left); | ||
+ | switch (expr.operator.type) { | ||
+ | case AND: | ||
+ | if (!(boolean)left) return false; | ||
+ | Object right = evaluate(expr.right); | ||
+ | return (boolean)right; | ||
+ | default: | ||
+ | break; | ||
+ | } | ||
+ | |||
+ | </ | ||
+ | |||
+ | In an odd contrast, our '' | ||
+ | We add it in with the rest of the operators in '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | Object right = evaluate(expr.right); | ||
+ | switch (expr.operator.type) { | ||
+ | case OR: | ||
+ | return (boolean)left || (boolean)right; | ||
+ | case DOT_DOT: | ||
+ | </ | ||
+ | |||
+ | Why are these different? | ||
+ | It has to do with how they are used in TLA⁺ formulas. | ||
+ | Actions are usually defined as a conjunction of expressions serving as guards, where later expressions could result in a runtime error if evaluated in some cases. | ||
+ | For example, consider a variable '' | ||
+ | People often write expressions like '' | ||
+ | It would be a runtime error to evaluate '' | ||
+ | It is thus useful to use conjunction as a guard stopping expression interpretation if the first operand is not true. | ||
+ | In contrast, disjunction is used in TLA⁺ to express nondeterminism and so both branches of the disjunct need to be evaluated to see whether their expressions satisfy alternative next states. | ||
+ | You can read [[https:// | ||
+ | |||
+ | ==== Evaluating Other Operators ==== | ||
+ | |||
+ | Our '' | ||
+ | Add this '' | ||
+ | |||
+ | <code java> | ||
+ | @Override | ||
+ | public Object visitTernaryExpr(Ternary expr) { | ||
+ | switch (expr.operator.type) { | ||
+ | case IF: | ||
+ | Object conditional = evaluate(expr.first); | ||
+ | return (boolean)conditional ? | ||
+ | evaluate(expr.second) : evaluate(expr.third); | ||
+ | default: | ||
+ | // Unreachable. | ||
+ | return null; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | '' | ||
+ | Note our definition is also short-circuiting. | ||
+ | |||
+ | Finally, here's how we define the set construction operator; add this '' | ||
+ | |||
+ | <code java> | ||
+ | @Override | ||
+ | public Object visitVariadicExpr(Expr.Variadic expr) { | ||
+ | switch (expr.operator.type) { | ||
+ | case LEFT_BRACE: | ||
+ | Set< | ||
+ | for (Expr parameter : expr.parameters) { | ||
+ | set.add(evaluate(parameter)); | ||
+ | } | ||
+ | return set; | ||
+ | default: | ||
+ | // Unreachable. | ||
+ | return null; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | ===== Section 7.3: Runtime Errors ===== | ||
+ | |||
+ | In [[https:// | ||
+ | |||
+ | Here's our variant of the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | private void checkNumberOperand(Token operator, Object operand) { | ||
+ | if (operand instanceof Integer) return; | ||
+ | throw new RuntimeError(operator, | ||
+ | } | ||
+ | |||
+ | private void checkNumberOperands(Token operator, | ||
+ | | ||
+ | if (left instanceof Integer && right instanceof Integer) return; | ||
+ | |||
+ | throw new RuntimeError(operator, | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Create a new '' | ||
+ | Contents are identical to that give in the book except for the package name: | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | package tla; | ||
+ | |||
+ | class RuntimeError extends RuntimeException { | ||
+ | final Token token; | ||
+ | |||
+ | RuntimeError(Token token, String message) { | ||
+ | super(message); | ||
+ | this.token = token; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | We also need some helpers for boolean operator parameters since TLA⁺ is more strict about that; add these to the '' | ||
+ | |||
+ | <code java> | ||
+ | private void checkBooleanOperand(Token operator, Object operand) { | ||
+ | if (operand instanceof Boolean) return; | ||
+ | throw new RuntimeError(operator, | ||
+ | } | ||
+ | |||
+ | private void checkBooleanOperands(Token operator, Object left, Object right) { | ||
+ | if (left instanceof Boolean && right instanceof Boolean) return; | ||
+ | throw new RuntimeError(operator, | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Finally, we need a helper for checking whether an operand is a set: | ||
+ | |||
+ | <code java> | ||
+ | private void checkSetOperand(Token operator, Object operand) { | ||
+ | if (operand instanceof Set<?> | ||
+ | throw new RuntimeError(operator, | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Now add these checks to our interpreter code. | ||
+ | Here's negative: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case MINUS: | ||
+ | checkNumberOperand(expr.operator, | ||
+ | return -(int)operand; | ||
+ | </ | ||
+ | Logical not: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case NOT: | ||
+ | checkBooleanOperand(expr.operator, | ||
+ | return !(boolean)operand; | ||
+ | </ | ||
+ | Enabled: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case ENABLED: | ||
+ | checkBooleanOperand(expr.operator, | ||
+ | return (boolean)operand; | ||
+ | </ | ||
+ | Subtraction: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case MINUS: | ||
+ | checkNumberOperands(expr.operator, | ||
+ | return (int)left - (int)right; | ||
+ | </ | ||
+ | Addition: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case PLUS: | ||
+ | checkNumberOperands(expr.operator, | ||
+ | return (int)left + (int)right; | ||
+ | </ | ||
+ | Less than: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case LESS_THAN: | ||
+ | checkNumberOperands(expr.operator, | ||
+ | return (int)left < (int)right; | ||
+ | </ | ||
+ | The '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case DOT_DOT: | ||
+ | checkNumberOperands(expr.operator, | ||
+ | Set< | ||
+ | </ | ||
+ | Set membership: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case IN: | ||
+ | checkSetOperand(expr.operator, | ||
+ | return ((Set<?> | ||
+ | </ | ||
+ | Disjunction: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case OR: | ||
+ | checkBooleanOperands(expr.operator, | ||
+ | return (boolean)left || (boolean)right; | ||
+ | </ | ||
+ | Conjunction: | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case AND: | ||
+ | checkBooleanOperand(expr.operator, | ||
+ | if (!(boolean)left) return false; | ||
+ | Object right = evaluate(expr.right); | ||
+ | checkBooleanOperand(expr.operator, | ||
+ | return (boolean)right; | ||
+ | </ | ||
+ | Finally, '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | case IF: | ||
+ | Object conditional = evaluate(expr.first); | ||
+ | checkBooleanOperand(expr.operator, | ||
+ | return (boolean)conditional ? | ||
+ | evaluate(expr.second) : evaluate(expr.third); | ||
+ | </ | ||
+ | |||
+ | ===== Section 7.4: Hooking Up the Interpreter ===== | ||
+ | |||
+ | We're very close! | ||
+ | In [[https:// | ||
+ | First, add this public '' | ||
+ | <code java [highlight_lines_extra=" | ||
+ | void interpret(Expr expression) { | ||
+ | try { | ||
+ | Object value = evaluate(expression); | ||
+ | System.out.println(stringify(value)); | ||
+ | } catch (RuntimeError error) { | ||
+ | TlaPlus.runtimeError(error); | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | In contrast to the book, our '' | ||
+ | |||
+ | <code java> | ||
+ | private String stringify(Object object) { | ||
+ | return object.toString(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Add a '' | ||
+ | |||
+ | <code java> | ||
+ | static void runtimeError(RuntimeError error) { | ||
+ | System.err.println(error.getMessage() + | ||
+ | " | ||
+ | hadRuntimeError = true; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Then add a static '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | static boolean hadError = false; | ||
+ | static boolean hadRuntimeError = false; | ||
+ | |||
+ | public static void main(String[] args) throws IOException { | ||
+ | </ | ||
+ | |||
+ | And exit from the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | run(new String(bytes, | ||
+ | |||
+ | // Indicate an error in the exit code. | ||
+ | if (hadError) System.exit(65); | ||
+ | if (hadRuntimeError) System.exit(70); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Finally, create a static '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | public class TlaPlus { | ||
+ | private static final Interpreter interpreter = new Interpreter(); | ||
+ | static boolean hadError = false; | ||
+ | </ | ||
+ | |||
+ | Then finish it off by actually interpreting the expression given by the user; add this in the '' | ||
+ | |||
+ | <code java [highlight_lines_extra=" | ||
+ | // Stop if there was a syntax error. | ||
+ | if (hadError) return; | ||
+ | |||
+ | interpreter.interpret(expression); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Voila! | ||
+ | Your program will now interpret any constant TLA⁺ expression you provide it! | ||
+ | Try it out: | ||
+ | < | ||
+ | > {0 .. 2, IF TRUE THEN 1 ELSE 2, {}} | ||
+ | [[], 1, [0, 1, 2]] | ||
+ | </ | ||
+ | |||
+ | If you got out of sync, you can find a snapshot of the expected state of the code in [[https:// | ||
+ | Next tutorial: [[https:// | ||
+ | |||
+ | ====== Challenges ====== | ||
+ | |||
+ | Here are some optional challenges to flesh out your TLA⁺ interpreter, | ||
+ | You should save a copy of your code before attempting these. | ||
+ | - TLC evaluates cross-type equality comparison as a runtime error. For example, '' | ||
+ | - TLC requires set elements to all be of the same type. Trying to construct the set '' | ||
+ | |||
+ | [[https:// | ||
+ |