Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
creating:scanning [2025/03/29 23:25] – Add link to parsing tutorial, minor wording fixes ahelwer | creating:scanning [2025/04/27 17:35] (current) – Fixed link to repo ahelwer | ||
---|---|---|---|
Line 1: | Line 1: | ||
- | ====== Scanning TLA⁺ Tokens ====== | + | ======= Scanning TLA⁺ Tokens |
This page corresponds to [[https:// | This page corresponds to [[https:// | ||
Line 5: | Line 5: | ||
For each section in the chapter, first read the section in the book and then read the corresponding section tutorial on this page to see how to adapt the concepts to TLA⁺. | For each section in the chapter, first read the section in the book and then read the corresponding section tutorial on this page to see how to adapt the concepts to TLA⁺. | ||
- | ==== Section 4.1: The Interpreter Framework ==== | + | ====== Section 4.1: The Interpreter Framework |
- | Everything | + | Almost everything |
- | You should thus have followed along and arrived at something similar | + | We do make one small functional modification: |
+ | Here's our main file, '' | ||
- | <code java [enable_line_numbers=" | + | <code java [enable_line_numbers=" |
- | package | + | package tla; |
import java.io.BufferedReader; | import java.io.BufferedReader; | ||
import java.io.IOException; | import java.io.IOException; | ||
import java.io.InputStreamReader; | import java.io.InputStreamReader; | ||
- | import java.nio.charset.Charset; | + | import java.nio.charset.StandardCharsets; |
import java.nio.file.Files; | import java.nio.file.Files; | ||
import java.nio.file.Paths; | import java.nio.file.Paths; | ||
Line 37: | Line 38: | ||
private static void runFile(String path) throws IOException { | private static void runFile(String path) throws IOException { | ||
byte[] bytes = Files.readAllBytes(Paths.get(path)); | byte[] bytes = Files.readAllBytes(Paths.get(path)); | ||
- | run(new String(bytes, | + | run(new String(bytes, |
// Indicate an error in the exit code. | // Indicate an error in the exit code. | ||
Line 79: | Line 80: | ||
</ | </ | ||
- | ==== Section 4.2: Lexemes and Tokens ==== | + | ====== Section 4.2: Lexemes and Tokens |
The '' | The '' | ||
Line 86: | Line 87: | ||
<code java [enable_line_numbers=" | <code java [enable_line_numbers=" | ||
- | package | + | package tla; |
enum TokenType { | enum TokenType { | ||
Line 92: | Line 93: | ||
LEFT_PAREN, RIGHT_PAREN, | LEFT_PAREN, RIGHT_PAREN, | ||
LEFT_BRACKET, | LEFT_BRACKET, | ||
- | MINUS, PLUS, LESS_THAN, | + | MINUS, PLUS, LESS_THAN, |
// Short fixed-length tokens. | // Short fixed-length tokens. | ||
Line 98: | Line 99: | ||
// Literals. | // Literals. | ||
- | IDENTIFIER, | + | IDENTIFIER, |
// Keywords. | // Keywords. | ||
Line 120: | Line 121: | ||
<code java [enable_line_numbers=" | <code java [enable_line_numbers=" | ||
- | package | + | package tla; |
class Token { | class Token { | ||
Line 143: | Line 144: | ||
</ | </ | ||
- | ==== Section 4.4: The Scanner Class ==== | + | ====== Section 4.4: The Scanner Class ====== |
Nothing in section 4.3 requires modification, | Nothing in section 4.3 requires modification, | ||
Line 149: | Line 150: | ||
<code java [enable_line_numbers=" | <code java [enable_line_numbers=" | ||
- | package | + | package tla; |
import java.util.ArrayList; | import java.util.ArrayList; | ||
Line 156: | Line 157: | ||
import java.util.Map; | import java.util.Map; | ||
- | import static | + | import static tla.TokenType.*; |
class Scanner { | class Scanner { | ||
Line 187: | Line 188: | ||
</ | </ | ||
- | ==== Section 4.5: Recognizing Lexemes ==== | + | ====== Section 4.5: Recognizing Lexemes |
[[https:// | [[https:// | ||
Line 208: | Line 209: | ||
case ' | case ' | ||
case '<': | case '<': | ||
- | case ' | + | case ' |
case ' | case ' | ||
default: | default: | ||
Line 284: | Line 285: | ||
</ | </ | ||
- | ==== Section 4.6: Longer Lexemes ==== | + | ====== Section 4.6: Longer Lexemes |
In [[https:// | In [[https:// | ||
Line 363: | Line 364: | ||
private void number() { | private void number() { | ||
while (isDigit(peek())) advance(); | while (isDigit(peek())) advance(); | ||
- | addToken(NAT_NUMBER, | + | addToken(NUMBER, |
Integer.parseInt(source.substring(start, | Integer.parseInt(source.substring(start, | ||
} | } | ||
</ | </ | ||
- | ==== Section 4.7: Reserved Words and Identifiers ==== | + | ====== Section 4.7: Reserved Words and Identifiers |
In [[https:// | In [[https:// | ||
Line 417: | Line 418: | ||
keywords = new HashMap<> | keywords = new HashMap<> | ||
keywords.put(" | keywords.put(" | ||
- | keywords.put(" | + | keywords.put(" |
keywords.put(" | keywords.put(" | ||
keywords.put(" | keywords.put(" | ||
Line 472: | Line 473: | ||
symbols.put(" | symbols.put(" | ||
symbols.put(" | symbols.put(" | ||
- | symbols.put(" | + | symbols.put(" |
- | symbols.put(" | + | symbols.put(" |
symbols.put(" | symbols.put(" | ||
} | } | ||
Line 507: | Line 508: | ||
Isn't it amazing how quickly this is coming together? | Isn't it amazing how quickly this is coming together? | ||
The simplicity of the required code is one of the great wonders of language implementation. | The simplicity of the required code is one of the great wonders of language implementation. | ||
+ | If you got lost somewhere along the way, you can find a snapshot of the code on this page [[https:// | ||
Next we learn how to collect our tokens into a parse tree! | Next we learn how to collect our tokens into a parse tree! | ||
- | Continue the tutorial at [[creating:syntax|Parsing TLA⁺ | + | Continue the tutorial at [[creating:expressions|Parsing |
- | ===== Challenges ===== | + | ====== Challenges |
Here are some optional challenges to flesh out your TLA⁺ scanner, roughly ranked from simplest to most difficult. | Here are some optional challenges to flesh out your TLA⁺ scanner, roughly ranked from simplest to most difficult. | ||
You should save a copy of your code before attempting these. | You should save a copy of your code before attempting these. | ||
+ | - Our error reporting functionality only reports the line on which the error occurs, even though we now also track the column. Modify the error reporting functions to pipe through and print out the column location of the error. | ||
- Implement token recognition for the '' | - Implement token recognition for the '' | ||
- Modify '' | - Modify '' | ||
- Add support for nestable block comments like '' | - Add support for nestable block comments like '' | ||
- Similar to nested block comments, add support for extramodular text & nested modules. TLA⁺ files are properly supposed to ignore all text outside of modules, treating it the same as comments. Lexing TLA⁺ tokens should only start after reading ahead and detecting a '' | - Similar to nested block comments, add support for extramodular text & nested modules. TLA⁺ files are properly supposed to ignore all text outside of modules, treating it the same as comments. Lexing TLA⁺ tokens should only start after reading ahead and detecting a '' | ||
- | - Add Unicode support. This is surprisingly straightforward. Instead of using the '' | + | - Add Unicode support. Instead of using the '' |
+ | |||
+ | [[creating: | ||