Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
| creating:syntax [2025/03/27 20:18] – Add TLA+ token types ahelwer | creating:syntax [2025/04/06 23:20] (current) – Deleting page ahelwer | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| - | ====== Parsing TLA⁺ Syntax ====== | ||
| - | TLA⁺ is a large, complicated, | ||
| - | This is assisted by [[https:// | ||
| - | This tutorial only uses a minimal subset of TLA⁺ syntax, just enough to handle [[https:// | ||
| - | While that may seem limiting, this tutorial tries to focus on the difficult & interesting parts of parsing the language instead of more mundane drudgework like handling all hundred-some user-definable operator symbols. | ||
| - | You are encouraged to extend this minimal core as you wish; language tooling is best developed incrementally! | ||
| - | Slowly filling in the details of this rough language sketch has a satisfying meditative aspect. | ||
| - | |||
| - | Here is what our minimal language subset includes: | ||
| - | * The '' | ||
| - | * Named parameterized operator definitions like '' | ||
| - | * Comments, both single-line like '' | ||
| - | * Declaration of '' | ||
| - | * Finite set literals, like '' | ||
| - | * The '' | ||
| - | * Some infix operators: '' | ||
| - | * The variable-priming suffix operator | ||
| - | * Parentheses to control expression grouping | ||
| - | * Vertically-aligned conjunction & disjunction lists | ||
| - | * The '' | ||
| - | * Natural numbers like '' | ||
| - | * Boolean values '' | ||
| - | |||
| - | Notably, the following will not be covered in this tutorial: | ||
| - | * PlusCal | ||
| - | * Declaring '' | ||
| - | * Nesting modules or importing other modules through '' | ||
| - | * Set map & set filter syntax, like '' | ||
| - | * Universal & existential quantification, | ||
| - | * The '' | ||
| - | * Functions, like '' | ||
| - | * Sequences, like ''<< | ||
| - | * The '' | ||
| - | * Temporal- or action-level operators like '' | ||
| - | * Higher-order operator parameters like '' | ||
| - | * The TLA⁺ proof language | ||
| - | |||
| - | As outlined above, you are free to add these missing features (or others) as you wish. | ||
| - | |||
| - | ===== Preparation ===== | ||
| - | |||
| - | Read part I (the first three chapters) of free online textbook // | ||
| - | We will be closely following the material in this book, modifying it to our uses. | ||
| - | The first two chapters are a nice introduction and overview of language implementation generally. | ||
| - | Chapter three specifies a toy language called Lox, to be used as an object of study for the remainder of the book. | ||
| - | Our minimal TLA⁺ subset has some similarity to Lox with regard to expressions involving integers and booleans, but also differences - you can skip the section on closures (unless you want to implement higher-order operator parameters yourself) and the section on classes. | ||
| - | What's important is that it's similar enough for all the fundamentals to still apply! | ||
| - | |||
| - | [[https:// | ||
| - | Everything before section 4.2 can be left unchanged from what's given, although of course it makes sense to change some of the names from " | ||
| - | You should thus have followed along and arrived at the following file '' | ||
| - | |||
| - | <code java> | ||
| - | package com.craftinginterpreters.tla; | ||
| - | |||
| - | import java.io.BufferedReader; | ||
| - | import java.io.IOException; | ||
| - | import java.io.InputStreamReader; | ||
| - | import java.nio.charset.Charset; | ||
| - | import java.nio.file.Files; | ||
| - | import java.nio.file.Paths; | ||
| - | import java.util.List; | ||
| - | |||
| - | public class TlaPlus { | ||
| - | static boolean hadError = false; | ||
| - | |||
| - | public static void main(String[] args) throws IOException { | ||
| - | if (args.length > 1) { | ||
| - | System.out.println(" | ||
| - | System.exit(64); | ||
| - | } else if (args.length == 1) { | ||
| - | runFile(args[0]); | ||
| - | } else { | ||
| - | runPrompt(); | ||
| - | } | ||
| - | } | ||
| - | |||
| - | private static void runFile(String path) throws IOException { | ||
| - | byte[] bytes = Files.readAllBytes(Paths.get(path)); | ||
| - | run(new String(bytes, | ||
| - | |||
| - | // Indicate an error in the exit code. | ||
| - | if (hadError) System.exit(65); | ||
| - | } | ||
| - | |||
| - | private static void runPrompt() throws IOException { | ||
| - | InputStreamReader input = new InputStreamReader(System.in); | ||
| - | BufferedReader reader = new BufferedReader(input); | ||
| - | |||
| - | for (;;) { | ||
| - | System.out.print("> | ||
| - | String line = reader.readLine(); | ||
| - | if (line == null) break; | ||
| - | run(line); | ||
| - | hadError = false; | ||
| - | } | ||
| - | } | ||
| - | |||
| - | private static void run(String source) { | ||
| - | Scanner scanner = new Scanner(source); | ||
| - | List< | ||
| - | |||
| - | // For now, just print the tokens. | ||
| - | for (Token token : tokens) { | ||
| - | System.out.println(token); | ||
| - | } | ||
| - | } | ||
| - | |||
| - | static void error(int line, String message) { | ||
| - | report(line, | ||
| - | } | ||
| - | |||
| - | private static void report(int line, String where, | ||
| - | | ||
| - | System.err.println( | ||
| - | "[line " + line + "] Error" + where + ": " + message); | ||
| - | hadError = true; | ||
| - | } | ||
| - | } | ||
| - | </ | ||
| - | |||
| - | The '' | ||
| - | We instead use the atomic components of our minimal TLA⁺ language subset defined above. | ||
| - | Adapting the snippet in [[https:// | ||
| - | |||
| - | <code java> | ||
| - | package com.craftinginterpreters.tla; | ||
| - | |||
| - | enum TokenType { | ||
| - | // Single-character tokens. | ||
| - | LEFT_PAREN, RIGHT_PAREN, | ||
| - | COMMA, MINUS, PLUS, EQUALS, LESS_THAN, NEGATION, PRIME | ||
| - | |||
| - | // One or two character tokens. | ||
| - | AND, OR, DEF_EQ, IN | ||
| - | |||
| - | // Literals. | ||
| - | IDENTIFIER, NUMBER, | ||
| - | |||
| - | // Keywords. | ||
| - | VARIABLES, ENABLED, IF, THEN, ELSE, | ||
| - | SINGLE_LINE, | ||
| - | |||
| - | EOF | ||
| - | } | ||
| - | </ | ||
| - | |||
| - | There is a very minor design decision here, of the type encountered innumerable times when writing a parser. | ||
| - | Our language includes boolean values '' | ||
| - | This is the approach we take here, and is also the approach taken by the existing tools. | ||
| - | This sort of works-either-way design dilemma occurs often, and we will see it again when trying to decide whether to disallow a snippet of invalid TLA⁺ at the syntactic or semantic level. | ||
| - | |||
| - | In [[https:// | ||
| - | This will come in useful when parsing vertically-aligned conjunction & disjunction lists. | ||
| - | |||
| - | <code java> | ||
| - | package com.craftinginterpreters.tla; | ||
| - | |||
| - | class Token { | ||
| - | final TokenType type; | ||
| - | final String lexeme; | ||
| - | final Object literal; | ||
| - | final int line; | ||
| - | final int column; | ||
| - | |||
| - | Token(TokenType type, String lexeme, Object literal, int line, int column) { | ||
| - | this.type = type; | ||
| - | this.lexeme = lexeme; | ||
| - | this.literal = literal; | ||
| - | this.line = line; | ||
| - | this.column = column; | ||
| - | } | ||
| - | |||
| - | public String toString() { | ||
| - | return type + " " + lexeme + " " + literal; | ||
| - | } | ||
| - | } | ||
| - | </ | ||
| - | |||
| - | We now move on to [[https:// | ||
| - | Here we again make several logical modifications. | ||
| - | The first is to track the column in addition to the line, mirroring our addition to the '' | ||
| - | |||
| - | <code java> | ||
| - | private final List< | ||
| - | private int start = 0; | ||
| - | private int current = 0; | ||
| - | private int line = 1; | ||
| - | private int column = 0; | ||
| - | </ | ||