Compare commits

...

10 Commits

Author SHA1 Message Date
f695f8a52e Improve switch's diagnostics 2026-05-02 19:40:37 +07:00
e29ffb2a9c Improve selectors' diagnostics 2026-05-01 18:43:55 +07:00
c58bcd4aac Change new() parser to make use of matching-) functionality 2026-04-30 17:27:27 +07:00
b1f0714483 Improve block body's diagnostics 2026-04-29 20:13:58 +07:00
9d3313995e Add delimeter matching to lexer 2026-04-29 13:50:45 +07:00
150bd2f5cf Clean control flow code 2026-04-28 21:28:34 +07:00
519d0cd3a7 Implent better diagnostics for control flow and primaries 2026-04-28 14:54:36 +07:00
8632ba0a86 Add more diagnostic messages 2026-04-12 17:52:39 +07:00
588790b9b4 Refactor everything
Huge dump of refactored code. Still in the middle of the changes that
are to be squashed later in a one huge monster commit, because there is
no value in anything atomic here.
2026-04-05 20:32:11 +07:00
5bd9aadc55 Add DeclarationLiteral parsing
Added method for parsing simple literals for use in top-level class
declarations.

Along with this change we've also moved out methods specific for parsing
low-level literals into the same file as new method, since that
structure made more sense.
2025-09-23 20:27:12 +07:00
85 changed files with 25126 additions and 3958 deletions

287
Cargo.lock generated
View File

@ -78,6 +78,12 @@ version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "bumpalo"
version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "bytes"
version = "1.10.1"
@ -96,6 +102,73 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a48563284b67c003ba0fb7243c87fab68885e1532c605704228a80238512e31"
[[package]]
name = "convert_case"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crossterm"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
dependencies = [
"bitflags 2.9.1",
"crossterm_winapi",
"derive_more",
"document-features",
"mio",
"parking_lot",
"rustix",
"signal-hook",
"signal-hook-mio",
"winapi",
]
[[package]]
name = "crossterm_winapi"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
dependencies = [
"winapi",
]
[[package]]
name = "dashmap"
version = "5.5.3"
@ -109,13 +182,38 @@ dependencies = [
"parking_lot_core",
]
[[package]]
name = "derive_more"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134"
dependencies = [
"derive_more-impl",
]
[[package]]
name = "derive_more-impl"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb"
dependencies = [
"convert_case",
"proc-macro2",
"quote",
"rustc_version",
"syn",
]
[[package]]
name = "dev_tests"
version = "0.1.0"
dependencies = [
"chardet",
"encoding_rs",
"is-terminal",
"libc",
"rottlib",
"sysinfo",
"walkdir",
]
@ -130,6 +228,21 @@ dependencies = [
"syn",
]
[[package]]
name = "document-features"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
dependencies = [
"litrs",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "encoding_rs"
version = "0.8.35"
@ -139,6 +252,16 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "errno"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -243,6 +366,12 @@ version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "hermit-abi"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "httparse"
version = "1.10.1"
@ -367,6 +496,17 @@ dependencies = [
"libc",
]
[[package]]
name = "is-terminal"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
dependencies = [
"hermit-abi",
"libc",
"windows-sys",
]
[[package]]
name = "itoa"
version = "1.0.15"
@ -385,12 +525,24 @@ version = "0.2.174"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
[[package]]
name = "linux-raw-sys"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
[[package]]
name = "litemap"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]]
name = "litrs"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
[[package]]
name = "lock_api"
version = "0.4.13"
@ -401,6 +553,12 @@ dependencies = [
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]]
name = "logos"
version = "0.15.0"
@ -470,10 +628,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
dependencies = [
"libc",
"log",
"wasi",
"windows-sys",
]
[[package]]
name = "ntapi"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
dependencies = [
"winapi",
]
[[package]]
name = "object"
version = "0.36.7"
@ -577,6 +745,26 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.17"
@ -596,6 +784,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
name = "rottlib"
version = "0.1.0"
dependencies = [
"backtrace",
"bumpalo",
"crossterm",
"logos",
]
@ -623,6 +814,19 @@ dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
dependencies = [
"bitflags 2.9.1",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "ryu"
version = "1.0.20"
@ -693,6 +897,27 @@ dependencies = [
"syn",
]
[[package]]
name = "signal-hook"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-mio"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
dependencies = [
"libc",
"mio",
"signal-hook",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.5"
@ -752,6 +977,21 @@ dependencies = [
"syn",
]
[[package]]
name = "sysinfo"
version = "0.30.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
dependencies = [
"cfg-if",
"core-foundation-sys",
"libc",
"ntapi",
"once_cell",
"rayon",
"windows",
]
[[package]]
name = "tinystr"
version = "0.8.1"
@ -903,6 +1143,12 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unicode-segmentation"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
[[package]]
name = "url"
version = "2.5.4"
@ -937,6 +1183,22 @@ version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.9"
@ -946,6 +1208,31 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
dependencies = [
"windows-core",
"windows-targets",
]
[[package]]
name = "windows-core"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-sys"
version = "0.59.0"

View File

@ -20,7 +20,7 @@ codegen-units = 1 # Reduce number of codegen units to increase optimizations
debug = false # strip all debug info
[profile.flamegraph]
inherits = "release" # start from release
inherits = "dev" # start from release
strip = false
debug = true # full DWARF info for unwinding
split-debuginfo = "unpacked" # keep symbols inside the binary

View File

@ -3,22 +3,21 @@ name = "dev_tests"
version = "0.1.0"
edition = "2024"
[[bin]]
name = "dump_tokens"
path = "src/dump_tokens.rs"
[[bin]]
name = "uc_lexer_verify"
path = "src/uc_lexer_verify.rs"
[[bin]]
name = "temp"
path = "src/temp.rs"
name = "verify_expr"
path = "src/verify_expr.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rottlib = { version = "0", path = "../rottlib", features = ["debug"] }
is-terminal = "0.4"
libc = "0.2"
sysinfo = "0.30"
walkdir="2.5"
encoding_rs="0.8"
chardet="0.2"

View File

@ -1,76 +0,0 @@
use std::{
fs,
path::{Path, PathBuf},
};
use encoding_rs::{Encoding, UTF_8};
use rottlib::lexer::{DebugTools, TokenizedFile};
/// Recursively search `root` for the first file whose *basename* matches
/// `needle` (case-sensitive).
///
/// Returns the absolute path.
fn find_file(root: &Path, needle: &str) -> Option<PathBuf> {
for entry in walkdir::WalkDir::new(root)
.into_iter()
.filter_map(Result::ok)
{
let path = entry.path();
if path.is_file() && (path.file_name().and_then(|name| name.to_str()) == Some(needle)) {
return fs::canonicalize(path).ok();
}
}
None
}
/// CLI: `dump_tokens <root_dir> <file_name>` - searches for `<file_name>`
/// recursively inside `<root_dir>`.
///
/// This utility takes *root directory* and *file name* instead of the full path
/// to help us avoid searching for them typing names out:
///
/// - We know where all the sources are;
/// - We usually just know the name of the file that is being problematic.
fn main() {
let mut args = std::env::args().skip(1);
let root_dir = args.next().unwrap_or_else(|| {
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
std::process::exit(1);
});
let file_name = args.next().unwrap_or_else(|| {
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
std::process::exit(1);
});
let root = PathBuf::from(&root_dir);
if !root.exists() {
eprintln!("Root directory '{root_dir}' does not exist.");
std::process::exit(1);
}
let found_path = find_file(&root, &file_name).map_or_else(
|| {
eprintln!("File '{file_name}' not found under '{root_dir}'.");
std::process::exit(1);
},
|path| path,
);
// Read & decode
let raw_bytes = match fs::read(&found_path) {
Ok(sources) => sources,
Err(error) => {
eprintln!("Could not read {}: {error}", found_path.display());
std::process::exit(1);
}
};
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
let encoding = Encoding::for_label(encoding_label.as_bytes()).unwrap_or(UTF_8);
let (decoded_str, _, _) = encoding.decode(&raw_bytes);
let source_text = decoded_str.to_string();
let tokenized_file = TokenizedFile::from_str(&source_text);
tokenized_file.dump_debug_layout();
}

View File

@ -1,129 +0,0 @@
//! src/main.rs
//! --------------------------------------------
//! Build & run:
//! cargo run
//! --------------------------------------------
use std::env;
use std::fs;
use std::io::{self, Read, Write};
use std::path::Path;
use rottlib::arena::Arena;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::{ParseError, Parser, pretty::ExprTree};
/*
- Convenient array definitions: [1, 3, 5, 2, 4]
- Boolean dynamic arrays
- Structures in default properties
- Auto conversion of arrays into strings
- Making 'var' and 'local' unnecessary
- Allowing variable creation in 'for' loops
- Allowing variable creation at any place inside a function
- Default parameters for functions
- Function overloading?
- repeat/until
- The syntax of the default properties block is pretty strict for an arcane reason. Particularly adding spaces before or after the "=" will lead to errors in pre-UT2003 versions.
- Scopes
- different names for variables and in config file
- anonymous pairs (objects?) and value destruction
>>> AST > HIR > MIR > byte code
*/
/// Closest plan:
/// - Add top-level declaration parsing
/// - Handle pretty.rs shit somehow
/// - COMMITS
/// ---------------------------------------
/// - Add fancy error reporting
/// - Make a fancy REPL
/// - Add evaluation
///
/// WARNINGS:
/// - Empty code/switch blocks
fn parse_and_print(src: &str) -> Result<(), ParseError> {
let tokenized = TokenizedFile::from_str(src);
let arena = Arena::new();
let mut parser = Parser::new(&tokenized, &arena);
let expr = parser.parse_expression(); // ArenaNode<Expression>
println!("{}", ExprTree(&*expr)); // if ArenaNode<Deref>
// or: println!("{}", ExprTree(expr.as_ref())); // if no Deref
Ok(())
}
fn repl_once() -> Result<(), ParseError> {
print!("Enter an statement > ");
io::stdout().flush().unwrap();
let mut input = String::new();
if io::stdin().read_line(&mut input).is_err() {
eprintln!("failed to read input");
return Ok(());
}
if input.trim().is_empty() {
return Ok(());
}
parse_and_print(&input)
}
fn read_stdin_all() -> io::Result<String> {
let mut buf = String::new();
io::stdin().read_to_string(&mut buf)?;
Ok(buf)
}
fn read_file_to_string(path: &Path) -> io::Result<String> {
fs::read_to_string(path)
}
fn main() -> Result<(), ParseError> {
// Accept a single positional arg as the input path.
// "-" means read all of stdin.
let mut args = env::args().skip(1);
if let Some(arg1) = args.next() {
if arg1 == "-h" || arg1 == "--help" {
println!("Usage:");
println!(
" {} # REPL",
env::args().next().unwrap_or_else(|| "prog".into())
);
println!(
" {} <file> # parse file",
env::args().next().unwrap_or_else(|| "prog".into())
);
println!(
" {} - # read source from stdin",
env::args().next().unwrap_or_else(|| "prog".into())
);
return Ok(());
}
if arg1 == "-" {
match read_stdin_all() {
Ok(src) => return parse_and_print(&src),
Err(e) => {
eprintln!("stdin read error: {}", e);
return Ok(());
}
}
} else {
let path = Path::new(&arg1);
match read_file_to_string(path) {
Ok(src) => return parse_and_print(&src),
Err(e) => {
eprintln!("file read error ({}): {}", path.display(), e);
return Ok(());
}
}
}
}
// No filename provided -> keep REPL behavior
repl_once()
}

View File

@ -1,122 +1,356 @@
use std::{collections::HashSet, fs, path::PathBuf};
#![allow(
clippy::all,
clippy::pedantic,
clippy::nursery,
clippy::cargo,
clippy::restriction
)]
use rottlib::lexer::{DebugTools, TokenizedFile};
use std::{
collections::HashSet,
fs,
io::{self, Write},
path::PathBuf,
time::Instant,
};
/// Read `ignore.txt` (one path per line, `#` for comments) from root directory
/// and turn it into a canonicalized [`HashSet<PathBuf>`].
use encoding_rs::Encoding;
use rottlib::diagnostics::Diagnostic as Diag;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::Parser;
// ---------- CONFIG ----------
const FILE_LIMIT: usize = 10000; // cap on files scanned
const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics
const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics
/// If true, print the old debug struct dump after each pretty diagnostic.
const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true;
// Cargo.toml additions:
// is-terminal = "0.4"
// sysinfo = { version = "0.30", features = ["multithread"] }
// walkdir = "2"
// chardet = "0.2"
// encoding_rs = "0.8"
fn render_diagnostic(diag: &Diag, file: &TokenizedFile<'_>, file_name: &str) {
diag.render(file, file_name);
}
fn render_diagnostics_window(diags: &[Diag], tf: &TokenizedFile<'_>, file_name: &str) {
let total = diags.len();
let first_n = DIAG_SHOW_FIRST.min(total);
let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));
if total > first_n + last_n {
for (k, d) in diags.iter().take(first_n).enumerate() {
render_diagnostic(d, tf, file_name);
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{}: {:#?}", k + 1, d);
}
}
eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
let start = total - last_n;
for (offset, d) in diags.iter().skip(start).enumerate() {
let idx_global = start + offset + 1;
render_diagnostic(d, tf, file_name);
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{idx_global}: {d:#?}");
}
}
} else {
for (k, d) in diags.iter().enumerate() {
render_diagnostic(d, tf, file_name);
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{}: {:#?}", k + 1, d);
}
}
}
}
// Linux-only accurate RSS in MB. Fallback uses sysinfo.
fn rss_mb() -> u64 {
#[cfg(target_os = "linux")]
{
use std::io::Read;
let mut s = String::new();
if let Ok(mut f) = std::fs::File::open("/proc/self/statm")
&& f.read_to_string(&mut s).is_ok()
&& let Some(rss_pages) = s
.split_whitespace()
.nth(1)
.and_then(|x| x.parse::<u64>().ok())
{
let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 };
return (rss_pages * page) / (1024 * 1024);
}
}
use sysinfo::{System, get_current_pid};
let mut sys = System::new();
sys.refresh_processes();
let Ok(pid) = get_current_pid() else { return 0 };
sys.process(pid).map_or(0, |p| p.memory() / 1024)
}
fn mark(label: &str, t0: Instant) {
println!(
"[{:>14}] t={:>8.2?} rss={} MB",
label,
t0.elapsed(),
rss_mb()
);
}
/// Read `ignore.txt` next to `root` and build a canonicalized set.
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
let ignore_file = root.join("ignore.txt");
if !ignore_file.exists() {
return HashSet::new();
}
let content = match fs::read_to_string(&ignore_file) {
Ok(content) => content,
Err(error) => {
eprintln!("Could not read {}: {error}", ignore_file.display());
Ok(s) => s,
Err(e) => {
eprintln!("Could not read {}: {e}", ignore_file.display());
return HashSet::new();
}
};
content
.lines()
.map(str::trim)
.filter(|line| !line.is_empty() && !line.starts_with('#'))
.filter(|l| !l.is_empty() && !l.starts_with('#'))
.filter_map(|line| {
let next_path = PathBuf::from(line);
let absolute_path = if next_path.is_absolute() {
next_path
} else {
root.join(next_path)
};
fs::canonicalize(absolute_path).ok()
let p = PathBuf::from(line);
let abs = if p.is_absolute() { p } else { root.join(p) };
fs::canonicalize(abs).ok()
})
.collect()
}
/// CLI: `verify_uc <root_dir>` - find all `.uc` files in the provided directory
/// (except those listed in `ignore.txt` in the root) and test them all.
///
/// Reported execution time is the tokenization time, without considering time
/// it takes to read files from disk.
///
/// `ignore.txt` is for listing specific files, not directories.
fn main() {
let root_dir = std::env::args().nth(1).unwrap(); // it is fine to crash debug utility
let root = PathBuf::from(&root_dir);
/// Wait for Enter if running in a TTY, shown before printing errors.
fn wait_before_errors(msg: &str) {
let _ = io::stdout().flush();
if is_terminal::is_terminal(io::stdin()) {
eprint!("{msg}");
let _ = io::stderr().flush();
let mut s = String::new();
let _ = io::stdin().read_line(&mut s);
}
}
/// CLI: `verify_uc <root_dir> [file_name]`
///
fn main() {
let mut args = std::env::args().skip(1);
let root_dir = args.next().unwrap_or_else(|| {
eprintln!("Usage: verify_uc <root_dir> [file_name]");
std::process::exit(1);
});
let target_raw = args.next(); // optional file name hint
let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase());
let single_mode = target_ci.is_some();
let root = PathBuf::from(&root_dir);
if !root.exists() {
eprintln!("Root directory '{root_dir}' does not exist.");
std::process::exit(1);
}
// Load files
let ignored_paths = load_ignore_set(&root);
let t0 = Instant::now();
mark("baseline", t0);
// Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode
let ignored = load_ignore_set(&root);
let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
let mut seen = 0usize;
let mut picked_any = false;
for entry in walkdir::WalkDir::new(&root)
.into_iter()
.filter_map(Result::ok) // for debug tool this is ok
.filter(|entry| {
let path = entry.path();
// Skip anything explicitly ignored
if let Ok(absolute_path) = fs::canonicalize(path) {
if ignored_paths.contains(&absolute_path) {
return false;
}
.filter_map(Result::ok)
.filter(|e| {
let path = e.path();
if let Ok(abs) = fs::canonicalize(path)
&& ignored.contains(&abs)
{
return false;
}
// Must be *.uc
path.is_file()
&& path
.extension()
.and_then(|extension| extension.to_str())
.is_some_and(|extension| extension.eq_ignore_ascii_case("uc"))
.and_then(|e| e.to_str())
.is_some_and(|e| e.eq_ignore_ascii_case("uc"))
})
{
if !single_mode && seen >= FILE_LIMIT {
break;
}
// If in single-file mode, keep only the first whose file name matches.
if let Some(needle) = target_ci.as_deref() {
let fname = entry
.path()
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("");
let fname_lc = fname.to_ascii_lowercase();
if !(fname_lc == needle || fname_lc.contains(needle)) {
continue;
}
}
seen += 1;
let path = entry.path();
match fs::read(path) {
Ok(raw_bytes) => {
// Auto-detect encoding for old Unreal script sources
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
let encoding = encoding_rs::Encoding::for_label(encoding_label.as_bytes())
.unwrap_or(encoding_rs::UTF_8);
let (decoded_text, _, _) = encoding.decode(&raw_bytes);
uc_files.push((path.to_path_buf(), decoded_text.into_owned()));
Ok(raw) => {
let (label, _, _) = chardet::detect(&raw);
let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8);
let (txt, _, _) = enc.decode(&raw);
uc_files.push((path.to_path_buf(), txt.into_owned()));
picked_any = true;
if single_mode {
// Only the first match.
break;
}
}
Err(error) => {
eprintln!("Failed to read `{}`: {error}", path.display());
Err(e) => {
wait_before_errors("Read error detected. Press Enter to print details...");
eprintln!("Failed to read `{}`: {e}", path.display());
std::process::exit(1);
}
}
}
println!("Loaded {} .uc files into memory.", uc_files.len());
// Tokenize and measure performance
let start_time = std::time::Instant::now();
let tokenized_files: Vec<(PathBuf, TokenizedFile)> = uc_files
.iter()
.map(|(path, source_code)| {
let tokenized_file = TokenizedFile::from_str(source_code);
if tokenized_file.has_errors() {
println!("TK: {}", path.display());
}
(path.clone(), tokenized_file)
})
.collect();
let elapsed_time = start_time.elapsed();
if single_mode && !picked_any {
let needle = target_raw.as_deref().unwrap();
eprintln!(
"No .uc file matching '{needle}' found under '{}'.",
root.display()
);
std::process::exit(1);
}
println!(
"Loaded {} .uc files into memory (cap={}, reached={}).",
uc_files.len(),
FILE_LIMIT,
if !single_mode && uc_files.len() >= FILE_LIMIT {
"yes"
} else {
"no"
}
);
mark("after_read", t0);
// Stage 1: tokenize all
let t_tok = Instant::now();
let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len());
let mut tk_error_idx: Option<usize> = None;
for (i, (path, source)) in uc_files.iter().enumerate() {
let tf = TokenizedFile::tokenize(source);
if tk_error_idx.is_none() && tf.has_errors() {
tk_error_idx = Some(i);
}
tokenized.push((path.clone(), tf));
}
println!(
"Tokenized {} files in {:.2?}",
tokenized_files.len(),
elapsed_time
tokenized.len(),
t_tok.elapsed()
);
mark("after_tokenize", t0);
// Round-trip check
for ((path, original), (_, tokenized_file)) in uc_files.iter().zip(tokenized_files.iter()) {
let reconstructed = tokenized_file.reconstruct_source();
if original != &reconstructed {
eprintln!("Reconstruction mismatch in `{}`!", path.display());
std::process::exit(1);
// If tokenization error: wait, dump tokens for the first failing file, then exit.
// If tokenization error: wait, print lexer diagnostics for the first failing file, then exit.
if let Some(idx) = tk_error_idx {
let (bad_path, bad_tf) = &tokenized[idx];
wait_before_errors("Tokenization issues detected. Press Enter to print diagnostics...");
let fname = bad_path.display().to_string();
eprintln!("--- Tokenization issues in first failing file ---");
eprintln!("File: {fname}");
let diags = bad_tf.diagnostics();
if diags.is_empty() {
eprintln!("(no diagnostics captured)");
} else {
render_diagnostics_window(diags, bad_tf, &fname);
}
std::process::exit(1);
}
// Stage 2: parse all with ONE arena kept alive
let arena = rottlib::arena::Arena::new();
let t_parse = Instant::now();
// First failing parse: (tokenized_index, diagnostics, fatal)
let mut first_fail: Option<(usize, Vec<Diag>, Option<String>)> = None;
for (i, (path, tk)) in tokenized.iter().enumerate() {
// --- progress line BEFORE parsing this file ---
{
use std::io::Write;
eprint!(
"Parsing [{}/{}] {} | rss={} MB\r\n",
i + 1,
tokenized.len(),
path.display(),
rss_mb()
);
let _ = io::stderr().flush();
}
let mut parser = Parser::new(tk, &arena);
match parser.parse_source_file() {
Ok(_) => {
if !parser.diagnostics.is_empty() && first_fail.is_none() {
first_fail = Some((i, parser.diagnostics.clone(), None));
}
}
Err(e) => {
if first_fail.is_none() {
first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}"))));
}
}
}
}
println!("All .uc files matched successfully.");
println!(
"Parsed {} files in {:.2?}",
tokenized.len(),
t_parse.elapsed()
);
mark("after_parse", t0);
// Summary
println!("--- Summary ---");
println!("Files processed: {}", tokenized.len());
println!("File cap: {FILE_LIMIT}");
if let Some((idx, diags, fatal)) = first_fail {
wait_before_errors("Parse issues detected. Press Enter to print diagnostics...");
let (path, tf) = &tokenized[idx];
eprintln!("--- Parse issues in first failing file ---");
eprintln!("File: {}", path.display());
if let Some(f) = &fatal {
eprintln!("Fatal parse error: {f}");
}
if diags.is_empty() && fatal.is_none() {
eprintln!("(no diagnostics captured)");
} else {
let fname = path.display().to_string();
render_diagnostics_window(&diags, tf, &fname);
}
std::process::exit(1);
}
println!("All files parsed without diagnostics.");
}

View File

@ -0,0 +1,261 @@
#![allow(
clippy::all,
clippy::pedantic,
clippy::nursery,
clippy::cargo,
clippy::restriction
)]
use rottlib::arena::Arena;
use rottlib::diagnostics::Diagnostic;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::Parser;
/// Lexer-focused fixtures.
///
/// Keep these small: the goal is to inspect lexer diagnostics and delimiter
/// recovery behavior, not full parser behavior.
const TEST_CASES: &[(&str, &str)] = &[
// P0034 - SwitchMissingBody
("files/P0034_01.uc", "switch(A) local\n"),
("files/P0034_02.uc", "switch\n(A)\nvar"),
("files/P0034_03.uc", "switch(\n A\n)\n"),
("files/P0034_04.uc", "switch\n(\n A\n)\n"),
("files/P0034_05.uc", "switch(A)\ncase 1:\n"),
// P0035 - SwitchTopLevelItemNotCase
("files/P0035_01.uc", "switch(A) {\n Log(\"bad\");\n}\n"),
(
"files/P0035_02.uc",
"switch\n(A)\n{\n Log(\"bad\");\n Log(\"worse\");\n case 1:\n}\n",
),
("files/P0035_03.uc", "switch(A) {\n 123;\n default:\n}\n"),
(
"files/P0035_04.uc",
"switch\n(\n A\n)\n{\n if (A) {}\n case 1:\n}\n",
),
(
"files/P0035_05.uc",
"switch(A) {\n {\n Log(\"nested\");\n }\n case 1:\n}\n",
),
// P0036 - SwitchCaseMissingColon
(
"files/P0036_01.uc",
"switch(A) {\n case 1\n case 2:\n}\n",
),
(
"files/P0036_02.uc",
"switch\n(A)\n{\n case\n 1\n default:\n}\n",
),
(
"files/P0036_03.uc",
"switch(A) {\n case (A)\n case B:\n}\n",
),
(
"files/P0036_04.uc",
"switch\n(\n A\n)\n{\n case\n A + B\n default\n :\n}\n",
),
(
"files/P0036_05.uc",
"switch(A) {\n case Foo.Bar(Baz)\n case Other:\n}\n",
),
// P0037 - SwitchDefaultMissingColon
(
"files/P0037_01.uc",
"switch(A) {\n default\n if (A) {}\n}\n",
),
(
"files/P0037_02.uc",
"switch\n(A)\n{\n default\n while (A) {}\n}\n",
),
(
"files/P0037_03.uc",
"switch(A) {\n default\n for (;;) {}\n}\n",
),
(
"files/P0037_04.uc",
"switch\n(\n A\n)\n{\n default\n switch(B) {\n case 1:\n }\n}\n",
),
(
"files/P0037_05.uc",
"switch(A) {\n default\n case 1:\n}\n",
),
// P0038 - SwitchDuplicateDefault
(
"files/P0038_01.uc",
"switch(A) {\n default:\n default:\n}\n",
),
(
"files/P0038_02.uc",
"switch\n(A)\n{\n default\n :\n default\n :\n}\n",
),
(
"files/P0038_03.uc",
"switch(A) {\n default:\n default:\n default:\n}\n",
),
(
"files/P0038_04.uc",
"switch\n(\n A\n)\n{\n default:\n Log(\"first\");\n default:\n Log(\"second\");\n}\n",
),
// P0039 - SwitchCasesAfterDefault
(
"files/P0039_01.uc",
"switch(A) {\n default:\n case 1:\n}\n",
),
(
"files/P0039_02.uc",
"switch\n(A)\n{\n default\n :\n case\n 1\n :\n}\n",
),
(
"files/P0039_03.uc",
"switch(A) {\n default:\n case 1:\n case 2:\n}\n",
),
(
"files/P0039_04.uc",
"switch\n(\n A\n)\n{\n default:\n case 1:\n Log(\"one\");\n case 2:\n Log(\"two\");\n}\n",
),
(
"files/P0039_05.uc",
"switch(A) {\n default:\n Log(\"done\");\n case 1:\n case 2:\n Log(\"stacked\");\n}\n",
),
// P0040 - SwitchMissingClosingBrace
("files/P0040_01.uc", "switch(A) {\n"),
("files/P0040_02.uc", "switch(A) {\n case 1:\n"),
("files/P0040_03.uc", "switch\n(A)\n{\n default:\n"),
(
"files/P0040_04.uc",
"switch\n(\n A\n)\n{\n case 1:\n case 2:\n",
),
(
"files/P0040_05.uc",
"switch(A) {\n case 1:\n Log(\"body\");\n",
),
// P0041 - SwitchCaseMissingExpression
("files/P0041_01.uc", "switch(A) {\n case:\n}\n"),
("files/P0041_02.uc", "switch\n(A)\n{\n case\n :\n}\n"),
("files/P0041_03.uc", "switch(A) {\n case:\n default:\n}\n"),
(
"files/P0041_04.uc",
"switch\n(\n A\n)\n{\n case\n :\n case 1:\n}\n",
),
(
"files/P0041_05.uc",
"switch(A) {\n case\n :\n default:\n}\n",
),
// P0042 - SwitchCaseExpressionInvalidStart
("files/P0042_01.uc", "switch(A) {\n case *:\n}\n"),
(
"files/P0042_02.uc",
"switch\n(A)\n{\n case\n =\n :\n}\n",
),
("files/P0042_03.uc", "switch(A) {\n case &&:\n default:\n}\n"),
(
"files/P0042_04.uc",
"switch\n(\n A\n)\n{\n case\n .\n :\n case 1:\n}\n",
),
(
"files/P0042_05.uc",
"switch(A) {\n case ]:\n}\n",
),
// Mixed / intentional cascades
(
"files/P0042_cascade_01.uc",
"switch(A) {\n case *\n case 1:\n}\n",
),
(
"files/P0042_cascade_02.uc",
"switch\n(\n A\n)\n{\n case\n =\n default:\n}\n",
),
];
/// If true, also run the parser after tokenization.
///
/// For lexer-focused fixtures this is usually noisy, so keep it off unless you
/// want to inspect how parser recovery behaves after lexer diagnostics.
const RUN_PARSER: bool = true;
/// If true, print the parsed expression using Debug formatting.
const PRINT_PARSED_EXPR: bool = false;
const PRINT_LEXER_DIAGNOSTICS: bool = false;
const ALWAYS_PRINT_DIAGNOSTICS: bool = true;
fn main() {
let arena = Arena::new();
let mut had_any_problem = false;
for (idx, (label, source)) in TEST_CASES.iter().enumerate() {
println!("============================================================");
println!("Case {}: {}", idx + 1, label);
println!("------------------------------------------------------------");
let tf = TokenizedFile::tokenize(source);
let lexer_diagnostics = tf.diagnostics();
if lexer_diagnostics.is_empty() {
println!("Lexer diagnostics: none");
} else {
had_any_problem = true;
if PRINT_LEXER_DIAGNOSTICS {
println!("Lexer diagnostics:");
for diag in lexer_diagnostics {
render_diagnostic(diag, &tf, Some(label), false);
}
}
}
if RUN_PARSER {
let mut parser = Parser::new(&tf, &arena);
let expr = parser.parse_expression();
if PRINT_PARSED_EXPR {
println!("Parsed expression:");
println!("{expr:#?}");
}
if parser.diagnostics.is_empty() {
println!("Parser diagnostics: none");
} else {
had_any_problem = true;
if ALWAYS_PRINT_DIAGNOSTICS {
println!("Parser diagnostics:");
for diag in &parser.diagnostics {
render_diagnostic(diag, &tf, Some(label), false);
}
}
}
}
println!();
}
println!("============================================================");
if had_any_problem {
println!("Done. At least one case had lexer or parser diagnostics.");
std::process::exit(1);
} else {
println!("Done. All cases completed without diagnostics.");
}
}
fn render_diagnostic(
diag: &Diagnostic,
file: &TokenizedFile<'_>,
file_name: Option<&str>,
_colors: bool,
) {
diag.render(file, file_name.unwrap_or("<default>"));
}

BIN
perf.data.old Normal file

Binary file not shown.

View File

@ -7,6 +7,11 @@ edition = "2024"
default = []
debug = []
[lints]
workspace = true
[dependencies]
logos = "0.15"
bumpalo = { version = "3", features = ["boxed", "collections"] }
bumpalo = { version = "3", features = ["boxed", "collections"] }
backtrace = "0.3"
crossterm = "0.*"

View File

@ -1,21 +1,25 @@
//! Arena submodule defining types that exist in their own memory space and
//! allow multiple cheap allocations (both performance- and fragmentation-wise).
//! Arena submodule defining types allocated from a dedicated bump arena,
//! allowing many cheap allocations with fast bulk reclamation.
//!
//! ## Memory safety
//! ## Destruction and resource management
//!
//! Dropping the [`Arena`] frees all its memory at once and does not run
//! [`Drop`] for values allocated within it. Avoid storing types that implement
//! [`Drop`] or own external resources inside [`ArenaNode`], [`ArenaVec`], or
//! [`ArenaString`]. If you must, arrange an explicit "drain/drop" pass before
//! the arena is dropped.
//! Dropping the [`Arena`] reclaims the arena's memory in bulk. Destructors are
//! not run for arena allocations that are still live at that point. Therefore,
//! avoid storing types whose cleanup must reliably happen at arena release,
//! especially types that own memory allocations or external resources outside
//! the arena.
//!
//! [`ArenaNode`], [`ArenaVec`], and [`ArenaString`] are provided so commonly
//! used owned data can keep their storage inside the arena rather than in
//! separate global-heap allocations.
use core::borrow::Borrow;
use core::fmt::{Debug, Display, Formatter, Result};
use core::ops::{Deref, DerefMut};
use bumpalo::{Bump, boxed, collections};
use crate::ast::AstSpan;
use crate::lexer::TokenLocation;
use crate::lexer::{TokenPosition, TokenSpan};
/// Object that manages a separate memory space, which can be deallocated all
/// at once after use.
@ -23,11 +27,8 @@ use crate::lexer::TokenLocation;
/// All allocations borrow the arena immutably.
///
/// Dropping the [`Arena`] does not run [`Drop`] for values allocated within it
/// (including values contained in [`ArenaNode`], [`ArenaVec`]
/// and [`ArenaString`]).
///
/// This arena is not thread-safe (`!Send`, `!Sync`). Values borrow the arena
/// and therefore cannot be sent across threads independently.
/// (including values contained in [`ArenaNode`], [`ArenaVec`] and
/// [`ArenaString`]).
#[derive(Debug)]
pub struct Arena {
bump: Bump,
@ -48,38 +49,47 @@ impl Arena {
ArenaVec(collections::Vec::new_in(&self.bump))
}
///Allocates a copy of `string` in this arena and returns
/// Allocates a copy of `string` in this arena and returns
/// an [`ArenaString`].
#[must_use]
pub fn string(&self, string: &str) -> ArenaString<'_> {
ArenaString(collections::String::from_str_in(string, &self.bump))
}
/// Allocates `value` in this arena with the given `span`,
/// returning an [`ArenaNode`].
/// Allocates `value` in this arena and attaches `span`.
///
/// The node's storage borrows this arena and cannot outlive it.
///
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
/// The returned node borrows the arena and cannot outlive it.
/// If it is still live when the arena is dropped, its destructor is not run.
#[must_use]
pub fn alloc<T>(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> {
pub fn alloc_node<T>(&self, value: T, span: TokenSpan) -> ArenaNode<'_, T> {
ArenaNode {
inner: boxed::Box::new_in(value, &self.bump),
value: boxed::Box::new_in(value, &self.bump),
span,
}
}
pub fn alloc_between<T>(
/// Allocates `value` in this arena and attaches the span from `start` to
/// `end`.
///
/// The returned node borrows the arena and cannot outlive it.
/// If it is still live when the arena is dropped, its destructor is not run.
#[must_use]
pub fn alloc_node_between<T>(
&self,
value: T,
from: TokenLocation,
to: TokenLocation,
start: TokenPosition,
end: TokenPosition,
) -> ArenaNode<'_, T> {
self.alloc(value, AstSpan { from, to })
self.alloc_node(value, TokenSpan::range(start, end))
}
pub fn alloc_at<T>(&self, value: T, at: TokenLocation) -> ArenaNode<'_, T> {
self.alloc(value, AstSpan { from: at, to: at })
/// Allocates `value` in this arena and attaches a span covering `at`.
///
/// The returned node borrows the arena and cannot outlive it.
/// If it is still live when the arena is dropped, its destructor is not run.
#[must_use]
pub fn alloc_node_at<T>(&self, value: T, at: TokenPosition) -> ArenaNode<'_, T> {
self.alloc_node(value, TokenSpan::new(at))
}
}
@ -91,91 +101,66 @@ impl Default for Arena {
/// An arena-allocated box with an attached source span.
///
/// Equality and hashing take into account both the contained `T` and the `span`
/// (when `T: Eq + Hash`).
///
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
/// Dropping the node normally runs `Drop` for the inner value.
/// Dropping the arena does not itself perform a separate destructor pass.
#[derive(Hash, PartialEq, Eq)]
pub struct ArenaNode<'arena, T> {
/// Value allocated in the arena; this node owns it.
inner: boxed::Box<'arena, T>,
/// Token range covered by the value.
span: AstSpan,
value: boxed::Box<'arena, T>,
span: TokenSpan,
}
impl<'arena, T> ArenaNode<'arena, T> {
/// Creates a new [`ArenaNode`] by allocating `value` in `arena`.
#[must_use]
pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self {
pub fn new_in(value: T, span: TokenSpan, arena: &'arena Arena) -> Self {
Self {
inner: boxed::Box::new_in(value, &arena.bump),
value: boxed::Box::new_in(value, &arena.bump),
span,
}
}
/// Creates a new [`ArenaNode`] for an AST node that spans a single token.
pub fn from_token_location(
value: T,
token_location: crate::lexer::TokenLocation,
arena: &'arena Arena,
) -> Self {
Self {
inner: boxed::Box::new_in(value, &arena.bump),
span: AstSpan {
from: token_location,
to: token_location,
},
}
}
pub fn span_mut(&mut self) -> &mut AstSpan {
/// Returns a mutable reference to the token span covered by this node.
#[must_use]
pub const fn span_mut(&mut self) -> &mut TokenSpan {
&mut self.span
}
pub fn extend_to(&mut self, to: TokenLocation) {
self.span.to = to;
}
pub fn extend_from(&mut self, from: TokenLocation) {
self.span.from = from;
}
/// Returns the token span covered by this node.
pub fn span(&self) -> &AstSpan {
#[must_use]
pub const fn span(&self) -> &TokenSpan {
&self.span
}
}
impl<'arena, T> Deref for ArenaNode<'arena, T> {
impl<T> Deref for ArenaNode<'_, T> {
type Target = T;
fn deref(&self) -> &T {
&self.inner
&self.value
}
}
impl<'arena, T> DerefMut for ArenaNode<'arena, T> {
impl<T> DerefMut for ArenaNode<'_, T> {
fn deref_mut(&mut self) -> &mut T {
&mut self.inner
&mut self.value
}
}
impl<'arena, T: Debug> Debug for ArenaNode<'arena, T> {
impl<T: Debug> Debug for ArenaNode<'_, T> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.debug_struct("ArenaNode")
.field("inner", &**self)
.field("span", &self.span())
.field("span", self.span())
.finish()
}
}
/// Version of [`Vec`] that can be safely used inside a memory arena.
/// Version of [`Vec`] whose backing storage lives in the arena.
///
/// Elements do not have their destructors run when the arena is dropped.
///
/// This type dereferences to `[T]` and supports iteration by reference
/// (`&ArenaVec` and `&mut ArenaVec` implement [`IntoIterator`]).
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
/// Elements are dropped when the `ArenaVec` itself is dropped normally.
/// Capacity growth may leave old buffers in the arena until the whole arena
/// is reclaimed.
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct ArenaVec<'arena, T>(collections::Vec<'arena, T>);
impl<'arena, T> ArenaVec<'arena, T> {
@ -190,18 +175,28 @@ impl<'arena, T> ArenaVec<'arena, T> {
/// Growth is backed by the arena; increasing capacity allocates new space
/// in the arena and never frees previous blocks.
pub fn push(&mut self, value: T) {
self.0.push(value)
self.0.push(value);
}
/// Reserves capacity for at least `additional` more elements.
///
/// The collection may reserve more space to avoid frequent reallocations.
/// If growth requires a new allocation, the previous buffer remains in the
/// arena until the arena is reclaimed.
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
self.0.reserve(additional);
}
pub fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
self.0.extend(it)
/// Extends the vector with the contents of `items`.
///
/// Growth may allocate a new buffer in the arena and leave the previous
/// buffer in place until the arena is reclaimed.
pub fn extend<I: IntoIterator<Item = T>>(&mut self, items: I) {
self.0.extend(items);
}
}
impl<'arena, T> Deref for ArenaVec<'arena, T> {
impl<T> Deref for ArenaVec<'_, T> {
type Target = [T];
fn deref(&self) -> &Self::Target {
@ -209,48 +204,41 @@ impl<'arena, T> Deref for ArenaVec<'arena, T> {
}
}
impl<'arena, T> DerefMut for ArenaVec<'arena, T> {
impl<T> DerefMut for ArenaVec<'_, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl<'arena, 's, T> IntoIterator for &'s ArenaVec<'arena, T> {
type Item = &'s T;
type IntoIter = core::slice::Iter<'s, T>;
impl<'iter, T> IntoIterator for &'iter ArenaVec<'_, T> {
type Item = &'iter T;
type IntoIter = core::slice::Iter<'iter, T>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
impl<'arena, 's, T> IntoIterator for &'s mut ArenaVec<'arena, T> {
type Item = &'s mut T;
type IntoIter = core::slice::IterMut<'s, T>;
impl<'iter, T> IntoIterator for &'iter mut ArenaVec<'_, T> {
type Item = &'iter mut T;
type IntoIter = core::slice::IterMut<'iter, T>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter_mut()
}
}
/// Version of [`String`] that can be safely used inside a memory arena.
///
/// This type dereferences to [`str`] and implements [`AsRef<str>`] and
/// [`core::borrow::Borrow<str>`] for ergonomic use with APIs expecting string
/// slices.
///
/// The string borrows the arena and cannot outlive it. Dropping the arena
/// frees its memory without running `Drop` for the string contents.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
/// Version of [`String`] whose backing storage lives in the arena.
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct ArenaString<'arena>(collections::String<'arena>);
impl<'arena> ArenaString<'arena> {
/// Allocates a copy of `string` in `arena` and returns an [`ArenaString`].
#[must_use]
pub fn from_str_in(string: &str, arena: &'arena Arena) -> Self {
Self(collections::String::from_str_in(string, &arena.bump))
pub fn from_str_in(text: &str, arena: &'arena Arena) -> Self {
Self(collections::String::from_str_in(text, &arena.bump))
}
}
impl<'arena> Deref for ArenaString<'arena> {
impl Deref for ArenaString<'_> {
type Target = str;
fn deref(&self) -> &Self::Target {
@ -258,19 +246,19 @@ impl<'arena> Deref for ArenaString<'arena> {
}
}
impl<'arena> AsRef<str> for ArenaString<'arena> {
impl AsRef<str> for ArenaString<'_> {
fn as_ref(&self) -> &str {
&self.0
}
}
impl<'arena> core::borrow::Borrow<str> for ArenaString<'arena> {
impl Borrow<str> for ArenaString<'_> {
fn borrow(&self) -> &str {
&self.0
}
}
impl<'arena> Display for ArenaString<'arena> {
impl Display for ArenaString<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
Display::fmt(&self.0, f)
}

View File

@ -1,376 +0,0 @@
use crate::arena::ArenaVec;
use super::lexer::TokenLocation;
use core::fmt;
use crate::arena::{Arena, ArenaNode, ArenaString};
// All inclusive!
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct AstSpan {
pub from: TokenLocation,
pub to: TokenLocation,
}
impl AstSpan {
pub fn merge(left_span: &AstSpan, right_span: &AstSpan) -> AstSpan {
AstSpan {
from: left_span.from,
to: right_span.to,
}
}
pub fn new(single_location: TokenLocation) -> AstSpan {
AstSpan {
from: single_location,
to: single_location,
}
}
pub fn range(from: TokenLocation, to: TokenLocation) -> AstSpan {
AstSpan { from, to }
}
pub fn extend_to(&mut self, right_most_location: TokenLocation) {
if right_most_location > self.to {
self.to = right_most_location
}
}
}
#[derive(Clone, Copy, Debug)]
pub enum PrefixOperator {
Not,
Minus,
BitwiseNot,
Increment,
Decrement,
}
#[derive(Clone, Copy, Debug)]
pub enum PostfixOperator {
Increment,
Decrement,
}
#[derive(Clone, Copy, Debug)]
pub enum InfixOperator {
// Assignments
Assign,
MultiplyAssign,
DivideAssign,
ModuloAssign,
PlusAssign,
MinusAssign,
ConcatAssign,
ConcatSpaceAssign,
// String operations
ConcatSpace,
Concat,
// Logical
And,
Xor,
Or,
// Bit-wise
BitwiseAnd,
BitwiseOr,
BitwiseXor,
// Not-equal
NotEqual,
// Comparison
Equal,
ApproximatelyEqual,
Less,
LessEqual,
Greater,
GreaterEqual,
ClockwiseFrom,
// Shifts
LeftShift,
LogicalRightShift,
RightShift,
// Terms
Plus,
Minus,
// Modulo
Modulo,
// Factor
Multiply,
Divide,
Dot,
Cross,
// Exponentiation
Exponentiation,
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
pub enum Expression<'src, 'arena> {
Binary(
ExpressionRef<'src, 'arena>,
InfixOperator,
ExpressionRef<'src, 'arena>,
),
LeftUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
RightUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
Identifier(&'src str),
String(ArenaString<'arena>),
Integer(i128),
Float(f64),
Bool(bool),
None,
Parentheses(ExpressionRef<'src, 'arena>),
Block {
// All these end with `;`
statements: ArenaVec<'arena, StatementRef<'src, 'arena>>,
// Last statement, but only if it doesn't end with `;`
tail: Option<ExpressionRef<'src, 'arena>>,
},
If {
condition: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
else_body: Option<ExpressionRef<'src, 'arena>>,
},
While {
condition: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
},
DoUntil {
condition: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
},
ForEach {
iterator: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
},
For {
init: Option<ExpressionRef<'src, 'arena>>,
condition: Option<ExpressionRef<'src, 'arena>>,
step: Option<ExpressionRef<'src, 'arena>>,
body: ExpressionRef<'src, 'arena>,
},
Switch {
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, CaseRef<'src, 'arena>>,
// default case
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
// last statement of the case block
tail: Option<ExpressionRef<'src, 'arena>>,
},
Goto(ArenaString<'arena>),
Continue,
Break(Option<ExpressionRef<'src, 'arena>>),
Return(Option<ExpressionRef<'src, 'arena>>),
// For injecting in place of parts that couldn't be parsed
// (along with text that wasn't able to be parsed)
Error,
}
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
#[derive(Debug)]
pub struct VariableDeclarator<'src, 'arena> {
pub name: ArenaString<'arena>,
pub initializer: Option<ExpressionRef<'src, 'arena>>,
}
#[derive(Debug)]
pub struct SwitchCase<'src, 'arena> {
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>, // UScript allows expressions; multiple labels ok
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, // allow fallthrough unless a Break/Goto ends it
}
pub type CaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
#[derive(Debug)]
pub enum Statement<'src, 'arena> {
// For the cases where user just used too many semi-colons `;;;;`
Empty,
Expression(ExpressionRef<'src, 'arena>),
// Just declarations without assignment:
// `local int i, j, k`
LocalVariableDeclaration {
type_name: ArenaString<'arena>,
identifiers: ArenaVec<'arena, ArenaString<'arena>>,
},
// Just `int i, j = 3, k = 0`
VariableDeclaration {
type_name: ArenaString<'arena>,
declarations: ArenaVec<'arena, VariableDeclarator<'src, 'arena>>,
},
Label(ArenaString<'arena>),
// For injecting in place of parts that couldn't be parsed
// (along with text that wasn't able to be parsed)
Error,
}
pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>;
impl<'src, 'arena> Expression<'src, 'arena> {
pub fn new_prefix(
arena: &'arena Arena,
op_position: TokenLocation,
op: PrefixOperator,
rhs: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan {
from: op_position,
to: rhs.span().to,
};
ArenaNode::new_in(Self::LeftUnary(op, rhs), span, arena)
}
pub fn new_postfix(
arena: &'arena Arena,
lhs: ArenaNode<'arena, Self>,
op: PostfixOperator,
op_position: TokenLocation,
) -> ArenaNode<'arena, Self> {
let span = AstSpan {
from: lhs.span().from,
to: op_position,
};
ArenaNode::new_in(Self::RightUnary(lhs, op), span, arena)
}
pub fn new_binary(
arena: &'arena Arena,
lhs: ArenaNode<'arena, Self>,
op: InfixOperator,
rhs: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::merge(&lhs.span(), &rhs.span());
ArenaNode::new_in(Self::Binary(lhs, op, rhs), span, arena)
}
}
/// Returns `true` for expressions that require `;` when used as a statement
/// (i.e., everything except blocky control-flow forms).
pub trait NeedsSemi {
fn needs_semicolon(&self) -> bool;
}
impl<'src, 'arena> NeedsSemi for Expression<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
match self {
Expression::Block { .. }
| Expression::If { .. }
| Expression::While { .. }
| Expression::DoUntil { .. }
| Expression::ForEach { .. }
| Expression::For { .. }
| Expression::Error => false,
// All other expressions require `;` when used as a statement.
_ => true,
}
}
}
// If `ArenaNode<T>` derefs to `T`, this works as-is.
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
impl<'src, 'arena> NeedsSemi for ExpressionRef<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
(**self).needs_semicolon()
}
}
impl<'src, 'arena> NeedsSemi for Statement<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
match self {
Statement::Empty | Statement::Label { .. } | Statement::Error { .. } => false,
// All other expressions require `;` when used as a statement.
_ => true,
}
}
}
// If `ArenaNode<T>` derefs to `T`, this works as-is.
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
impl<'src, 'arena> NeedsSemi for StatementRef<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
(**self).needs_semicolon()
}
}
impl fmt::Display for PrefixOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
PrefixOperator::Not => "!",
PrefixOperator::Minus => "-",
PrefixOperator::BitwiseNot => "~",
PrefixOperator::Increment => "++.",
PrefixOperator::Decrement => "--.",
};
write!(f, "{s}")
}
}
impl fmt::Display for PostfixOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
PostfixOperator::Increment => ".++",
PostfixOperator::Decrement => ".--",
};
write!(f, "{s}")
}
}
impl fmt::Display for InfixOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use InfixOperator::*;
let s = match self {
// Assignments
Assign => "=",
MultiplyAssign => "*=",
DivideAssign => "/=",
ModuloAssign => "%=",
PlusAssign => "+=",
MinusAssign => "-=",
ConcatAssign => "$=",
ConcatSpaceAssign => "@=",
// String operations
ConcatSpace => "@",
Concat => "$",
// Logical
And => "&&",
Xor => "^^",
Or => "||",
// Bitwise
BitwiseAnd => "&",
BitwiseOr => "|",
BitwiseXor => "^",
// Not equal
NotEqual => "!=",
// Comparison
Equal => "==",
ApproximatelyEqual => "~+",
Less => "<",
LessEqual => "<=",
Greater => ">",
GreaterEqual => ">=",
ClockwiseFrom => "ClockwiseFrom",
// Shift
LeftShift => "<<",
LogicalRightShift => ">>>",
RightShift => ">>",
// Term
Plus => "+",
Minus => "-",
// Modulo
Modulo => "%",
// Factor
Multiply => "*",
Divide => "/",
Dot => "Dot",
Cross => "Cross",
// Exp
Exponentiation => "**",
};
write!(f, "{s}")
}
}

View File

@ -0,0 +1,234 @@
//! Callable-declaration AST nodes.
//!
//! This module defines function-like declarations together with their
//! parameter lists and callable modifiers.
//!
//! The language groups several callable forms under a largely shared header
//! structure, including ordinary functions, events, delegates, and operator
//! declarations. This module preserves those forms as AST nodes together with
//! source-relevant modifier and parameter information.
use super::{BlockBody, ExpressionRef, IdentifierToken, InfixOperatorName, PostfixOperatorName,
PrefixOperatorName, TypeSpecifierRef,
};
use crate::arena::ArenaVec;
use crate::lexer::{TokenSpan, Keyword, TokenPosition};
use crate::arena::ArenaNode;
use core::convert::TryFrom;
/// Parameter modifier kind.
///
/// These modifiers apply to a single callable parameter and are preserved in
/// source order on the parameter node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ParameterModifierKind {
Optional,
Out,
Skip,
Coerce,
}
/// Parameter modifier together with the source position of its token.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ParameterModifier {
pub kind: ParameterModifierKind,
pub position: TokenPosition,
}
/// One callable parameter declaration.
#[derive(Debug, PartialEq)]
pub struct Parameter<'src, 'arena> {
/// Parameter modifiers in source order.
pub modifiers: ArenaVec<'arena, ParameterModifier>,
/// Declared parameter type.
pub type_specifier: TypeSpecifierRef<'src, 'arena>,
/// Declared parameter name.
pub name: IdentifierToken,
/// Optional array-size expression from `[expr]`.
pub array_size: Option<ExpressionRef<'src, 'arena>>,
/// Optional default-value expression after `=`.
pub default_value: Option<ExpressionRef<'src, 'arena>>,
}
/// Stable arena reference to a parameter node.
pub type ParameterRef<'src, 'arena> = ArenaNode<'arena, Parameter<'src, 'arena>>;
/// Syntactic callable declaration kind.
///
/// This enum distinguishes ordinary callable declarations from operator
/// declarations and preserves operator fixity / precedence where applicable.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum CallableKind {
/// Ordinary function declaration: `function`.
Function,
/// Event declaration: `event`.
Event,
/// Delegate declaration: `delegate`.
Delegate,
/// Prefix operator declaration: `preoperator`.
PrefixOperator,
/// Infix operator declaration: `operator(<precedence>)`.
///
/// Precedence can be skipped as all supported operators already have
/// built-in precedence value that can't actually be changed in
/// `UnrealScript`. So omitting precedence when redefining operators is
/// a better approach.
InfixOperator(Option<u128>),
/// Postfix operator declaration: `postoperator`.
PostfixOperator,
}
impl TryFrom<Keyword> for CallableKind {
type Error = ();
/// Converts a keyword into a [`CallableKind`] when the callable form
/// is fully determined by the keyword alone.
///
/// Returns `Err(())` for keywords that either do not represent callable
/// declarations or require additional syntax to determine the final kind
/// (for example `operator(<precedence>)`).
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
let kind = match keyword {
Keyword::Function => Self::Function,
Keyword::Event => Self::Event,
Keyword::Delegate => Self::Delegate,
Keyword::PreOperator => Self::PrefixOperator,
Keyword::PostOperator => Self::PostfixOperator,
_ => return Err(()),
};
Ok(kind)
}
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum CallableName {
Identifier(IdentifierToken),
PrefixOperator(PrefixOperatorName),
InfixOperator(InfixOperatorName),
PostfixOperator(PostfixOperatorName),
}
/// Callable definition.
///
/// This node represents the common syntactic shape shared by function-like
/// declarations, including ordinary functions, events, delegates, and
/// operator forms.
#[derive(Debug, PartialEq)]
pub struct CallableDefinition<'src, 'arena> {
/// Declared callable name.
pub name: CallableName,
/// Callable declaration form.
pub kind: CallableKind,
/// Optional return type.
///
/// Some callable forms may omit a return type entirely.
pub return_type_specifier: Option<TypeSpecifierRef<'src, 'arena>>,
/// Declaration modifiers attached to the callable header.
pub modifiers: ArenaVec<'arena, CallableModifier>,
/// Formal parameters in source order.
pub parameters: ArenaVec<'arena, ParameterRef<'src, 'arena>>,
/// Optional callable body.
///
/// `None` represents a header-only declaration terminated by `;`.
/// `Some(...)` stores the parsed block statements belonging to the body.
pub body: Option<BlockBody<'src, 'arena>>,
}
/// Stable arena reference to a callable definition node.
pub type CallableDefinitionRef<'src, 'arena> = ArenaNode<'arena, CallableDefinition<'src, 'arena>>;
/// Callable declaration modifier kind.
///
/// These modifiers apply to the callable declaration itself rather than to an
/// individual parameter.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CallableModifierKind {
Final,
/// `native` or `native(<index>)`
Native(Option<u128>),
Abstract,
Transient,
Public,
Protected,
Private,
Static,
/// `config(<name>)`
Config(IdentifierToken),
Const,
Deprecated,
NoExport,
Export,
Simulated,
Latent,
Iterator,
Singular,
Exec,
Reliable,
Unreliable,
NativeReplication,
}
impl TryFrom<Keyword> for CallableModifierKind {
type Error = ();
/// Converts a keyword into a [`CallableModifierKind`] when the modifier
/// is fully determined by the keyword alone.
///
/// Returns `Err(())` for keywords that either do not represent callable
/// modifiers or require additional syntax
/// (e.g. `native(...)`, `config(...)`).
#[allow(clippy::enum_glob_use)]
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
use CallableModifierKind::*;
let kind = match keyword {
Keyword::Final => Final,
Keyword::Abstract => Abstract,
Keyword::Transient => Transient,
Keyword::Public => Public,
Keyword::Protected => Protected,
Keyword::Private => Private,
Keyword::Static => Static,
Keyword::Const => Const,
Keyword::Deprecated => Deprecated,
Keyword::NoExport => NoExport,
Keyword::Export => Export,
Keyword::Simulated => Simulated,
Keyword::Latent => Latent,
Keyword::Iterator => Iterator,
Keyword::Singular => Singular,
Keyword::Exec => Exec,
Keyword::Reliable => Reliable,
Keyword::Unreliable => Unreliable,
Keyword::NativeReplication => NativeReplication,
_ => return Err(()),
};
Ok(kind)
}
}
/// Callable modifier together with its full source span.
///
/// A modifier may occupy more than one token in source, for example when it
/// carries an argument like `native(12)` or `config(System)`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct CallableModifier {
/// Modifier kind.
pub kind: CallableModifierKind,
/// Span covering the full modifier syntax.
pub span: TokenSpan,
}
impl Keyword {
#[must_use]
pub fn is_callable_modifier(self) -> bool {
matches!(self, Self::Native | Self::Config) || CallableModifierKind::try_from(self).is_ok()
}
#[must_use]
pub fn is_callable_kind_keyword(self) -> bool {
matches!(self, Self::Operator) || CallableKind::try_from(self).is_ok()
}
}

View File

@ -0,0 +1,294 @@
//! Expression AST nodes.
//!
//! This module defines ordinary expressions together with expression-shaped
//! control-flow and block forms parsed by the language.
use super::{
IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator, QualifiedIdentifierRef,
StatementRef,
};
use crate::arena::ArenaVec;
use crate::lexer::TokenSpan;
use super::super::lexer::TokenPosition;
use crate::arena::{Arena, ArenaNode, ArenaString};
/// Expression node used for both ordinary expressions and expression-shaped
/// statement/control-flow forms.
///
/// This AST is intentionally broad: besides operators and literals, it also
/// includes blocks and control-flow constructs that syntactically occupy
/// expression parsing positions in the language.
#[allow(clippy::large_enum_variant)]
#[derive(Debug, PartialEq)]
pub enum Expression<'src, 'arena> {
/// Plain identifier expression.
Identifier(IdentifierToken),
/// String literal.
///
/// The contents stored in arena memory are transformed (unescaped) version
/// of raw strings from the source.
String(ArenaString<'arena>),
/// Integer literal.
Integer(u128),
/// Floating-point literal.
Float(f64),
/// Boolean literal.
Bool(bool),
/// `None` literal / null-like language value.
None,
/// Explicit parenthesized subexpression: `(expr)`.
///
/// Parentheses are preserved as a node instead of being discarded so later
/// stages can retain grouping information for diagnostics, formatting, or
/// source-faithful reconstruction.
Parentheses(ExpressionRef<'src, 'arena>),
/// Class-type reference parsed as a qualified identifier path.
///
/// This is used for class-like type mentions that are not represented as a
/// tagged name literal.
ClassType(QualifiedIdentifierRef<'arena>),
/// Tagged or untagged quoted name literal.
///
/// Examples:
/// - `class'Foo'`
/// - `Texture'Pkg.Group.Name'`
/// - `'Pkg.Group.Name'` if the grammar permits an untagged form
///
/// `tag` stores the leading identifier token when present. `name` is the
/// raw content between quotes and is preserved exactly as written.
NameLiteral {
tag: Option<IdentifierToken>,
name: &'src str,
},
/// Indexing operation: `target[index]`.
///
/// This is produced after postfix parsing and binds tighter than any infix
/// operator.
Index {
target: ExpressionRef<'src, 'arena>,
index: ExpressionRef<'src, 'arena>,
},
/// Member access: `target.name`.
///
/// The member name is stored as a token reference rather than an owned
/// string so later stages can resolve exact spelling and source location
/// from the lexer/token stream.
Member {
target: ExpressionRef<'src, 'arena>,
name: IdentifierToken,
},
/// Call expression: `callee(arg1, arg2, ...)`.
///
/// Arguments are stored as `Option<ExpressionRef>` to preserve omitted
/// arguments in syntaxes that allow empty slots.
Call {
callee: ExpressionRef<'src, 'arena>,
arguments: ArgumentList<'src, 'arena>,
},
/// Prefix unary operator application: `op rhs`.
PrefixUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
/// Postfix unary operator application: `lhs op`.
PostfixUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
/// Binary operator application: `lhs op rhs`.
Binary(
ExpressionRef<'src, 'arena>,
InfixOperator,
ExpressionRef<'src, 'arena>,
),
/// Block expression / statement block: `{ ... }`.
///
/// The contained statements are preserved in source order.
Block(StatementList<'src, 'arena>),
/// Conditional expression / statement.
///
/// Both arms use `BranchBody` so the parser can preserve legacy one-line
/// bodies, optional trailing semicolons, and recovery anchors.
If {
condition: ExpressionRef<'src, 'arena>,
then_body: BranchBody<'src, 'arena>,
else_body: Option<BranchBody<'src, 'arena>>,
},
/// `while (condition) body`
While {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// `do body until (condition)`
DoUntil {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// `foreach iterator body`
///
/// The iteration source / iterator expression is stored as a normal
/// expression node because the language permits nontrivial syntax there.
ForEach {
iterator_expression: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// Traditional three-part `for` loop.
///
/// Each header component is optional to support forms such as:
/// - `for (;;)`
/// - `for (init;;)`
/// - `for (;cond;)`
/// - `for (;;step)`
For {
initializer: Option<ExpressionRef<'src, 'arena>>,
condition: Option<ExpressionRef<'src, 'arena>>,
step: Option<ExpressionRef<'src, 'arena>>,
body: BranchBody<'src, 'arena>,
},
/// `switch` construct.
///
/// `cases` contains all explicit case arms in source order.
/// `default_arm` stores the statements of the default branch, if present.
Switch {
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
},
/// `goto` statement.
///
/// Stores the token position of the target token rather than duplicating
/// its textual representation in the AST. On successful parsing refers to
/// either identifier or name literal.
Goto(TokenPosition),
/// `continue` statement.
Continue,
/// `break` statement, optionally with an attached expression if the
/// language form allows one.
Break(Option<ExpressionRef<'src, 'arena>>),
/// `return` statement, optionally carrying a returned expression.
Return(Option<ExpressionRef<'src, 'arena>>),
/// Object construction / allocation form using the language's `new` syntax.
///
/// The first three arguments are optional positional control arguments.
/// `class_specifier` is the required class expression that identifies what
/// should be constructed.
New {
outer_argument: Option<ExpressionRef<'src, 'arena>>,
name_argument: Option<ExpressionRef<'src, 'arena>>,
flags_argument: Option<ExpressionRef<'src, 'arena>>,
class_specifier: ExpressionRef<'src, 'arena>,
},
/// Recovery placeholder inserted when an expression could not be parsed.
///
/// This allows the parser to continue building a larger AST and report more
/// than one error in a single pass.
Error,
}
/// Arguments in any comma-separated list.
pub type ArgumentList<'src, 'arena> = ArenaVec<'arena, OptionalExpression<'src, 'arena>>;
/// Statements contained in a `{ ... }` block.
pub type StatementList<'src, 'arena> = ArenaVec<'arena, StatementRef<'src, 'arena>>;
/// Statements contained in a `{ ... }` block with a span.
#[derive(Debug, PartialEq)]
pub struct BlockBody<'src, 'arena> {
pub statements: StatementList<'src, 'arena>,
pub span: TokenSpan,
}
/// Stable arena reference to an expression node.
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
/// Optional expression payload used in grammar positions where an expression
/// may be omitted entirely.
pub type OptionalExpression<'src, 'arena> = Option<ExpressionRef<'src, 'arena>>;
/// Body of a control-flow branch.
///
/// Branch bodies are stored separately so constructs such as `if`, `while`,
/// and `for` can preserve both the parsed body and branch-specific source
/// details.
#[derive(Debug, PartialEq)]
pub struct BranchBody<'src, 'arena> {
/// Parsed branch payload.
///
/// This is `None` when the body is absent or could not be parsed in a
/// recoverable way.
pub expression: Option<ExpressionRef<'src, 'arena>>,
/// Optional semicolon that appears immediately after a non-block branch
/// body in legacy constructs such as `if`, `for`, `while`, etc.
///
/// This is intentionally preserved rather than normalized away so later
/// stages can diagnose or reproduce source structure more precisely.
pub semicolon_position: Option<TokenPosition>,
/// Token position that can be used as a fallback end anchor for spans and
/// diagnostics when the body itself is missing.
///
/// In malformed constructs this may be the only reliable location attached
/// to the branch.
pub end_anchor_token_position: TokenPosition,
}
/// One `case` arm inside a `switch`.
///
/// UnrealScript-style syntax allows each arm to have multiple labels and uses
/// statement lists as bodies, with fallthrough being possible unless control
/// flow terminates explicitly.
#[derive(Debug, PartialEq)]
pub struct SwitchCase<'src, 'arena> {
/// Case labels associated with this arm.
///
/// Labels are stored as expressions because the language allows
/// expression-valued labels rather than only simple constants.
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>,
/// Statements belonging to the arm body.
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
}
/// Stable arena reference to a `switch` case arm.
pub type SwitchCaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
impl<'arena> Expression<'_, 'arena> {
/// Construct a binary expression and assign it a span from `left_hand_side`
/// through `right_hand_side`.
#[must_use]
pub fn new_binary(
arena: &'arena Arena,
left_hand_side: ArenaNode<'arena, Self>,
op: InfixOperator,
right_hand_side: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = TokenSpan::merge(left_hand_side.span(), right_hand_side.span());
ArenaNode::new_in(
Self::Binary(left_hand_side, op, right_hand_side),
span,
arena,
)
}
/// Construct a prefix unary expression and assign it a span from the
/// operator token through the end of `right_hand_side`.
#[must_use]
pub fn new_prefix(
arena: &'arena Arena,
operation_position: TokenPosition,
operation: PrefixOperator,
right_hand_side: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = TokenSpan::range(operation_position, right_hand_side.span().end);
ArenaNode::new_in(Self::PrefixUnary(operation, right_hand_side), span, arena)
}
/// Construct a postfix unary expression and assign it a span from the start
/// of `left_hand_side` through the operator token.
#[must_use]
pub fn new_postfix(
arena: &'arena Arena,
left_hand_side: ArenaNode<'arena, Self>,
operation: PostfixOperator,
operation_position: TokenPosition,
) -> ArenaNode<'arena, Self> {
let span = TokenSpan::range(left_hand_side.span().start, operation_position);
ArenaNode::new_in(Self::PostfixUnary(left_hand_side, operation), span, arena)
}
}

283
rottlib/src/ast/mod.rs Normal file
View File

@ -0,0 +1,283 @@
// `;` are encoded in spans of statement nodes as very last token
// Need to do a proper check to figure out what should and shouldn't be a node
use crate::arena::ArenaVec;
use super::lexer::{TokenPosition, TokenSpan};
use crate::arena::{Arena, ArenaNode, ArenaString};
pub mod callables;
pub mod expressions;
pub mod operators;
pub mod types;
pub use callables::*;
pub use expressions::*;
pub use operators::*;
pub use types::*;
// Get rid of identifier field
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct IdentifierToken(pub TokenPosition);
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct OperatorToken(pub TokenPosition);
#[derive(Debug, Hash, PartialEq, Eq)]
pub struct QualifiedIdentifier<'arena> {
pub head: IdentifierToken,
pub tail: Option<ArenaVec<'arena, IdentifierToken>>, // None => single segment
}
pub type QualifiedIdentifierRef<'arena> = ArenaNode<'arena, QualifiedIdentifier<'arena>>;
impl<'arena> QualifiedIdentifier<'arena> {
#[inline]
#[must_use]
pub const fn is_single(&self) -> bool {
self.tail.is_none()
}
#[inline]
#[allow(clippy::len_without_is_empty)] // Suppress useless suggestion for `is_empty()`
#[must_use]
pub fn len(&self) -> usize {
1 + self.tail.as_ref().map_or(0, |v| v.len())
}
#[inline]
#[must_use]
pub const fn head(&self) -> IdentifierToken {
self.head
}
/// Iterates all identifier segments in order without allocating.
pub fn iter(&self) -> impl Iterator<Item = IdentifierToken> + '_ {
core::iter::once(self.head).chain(self.tail.iter().flat_map(|v| v.iter().copied()))
}
/// Cheap constructor from a single identifier. No Vec allocated.
pub fn from_ident(arena: &'arena Arena, id: IdentifierToken) -> QualifiedIdentifierRef<'arena> {
let span = TokenSpan::new(id.0);
ArenaNode::new_in(
Self {
head: id,
tail: None,
},
span,
arena,
)
}
/// Cheap constructor from a single identifier. No Vec allocated.
pub fn from_position(
arena: &'arena Arena,
position: TokenPosition,
) -> QualifiedIdentifierRef<'arena> {
let span = TokenSpan::new(position);
ArenaNode::new_in(
Self {
head: IdentifierToken(position),
tail: None,
},
span,
arena,
)
}
}
#[derive(Debug, PartialEq)]
pub enum Statement<'src, 'arena> {
// For the cases where user just used too many semi-colons `;;;;`
Empty,
Expression(ExpressionRef<'src, 'arena>),
// Just declarations without assignment:
// `local int i, j, k`
LocalVariableDeclaration {
type_spec: TypeSpecifierRef<'src, 'arena>,
declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // CHANGED
},
Label(ArenaString<'arena>),
/// Nested function definitions inside blocks or states.
Function(CallableDefinitionRef<'src, 'arena>),
// For injecting in place of parts that couldn't be parsed
Error,
}
pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>;
#[derive(Debug)]
pub enum DeclarationLiteral<'src, 'arena> {
None,
Bool(bool),
Integer(i128),
Float(f64),
String(ArenaString<'arena>),
Identifier(&'src str),
TaggedName {
tag: IdentifierToken,
quoted: ArenaString<'arena>,
}, // NEW
}
#[derive(Debug)]
pub struct DeclarationLiteralRef<'src, 'arena> {
pub literal: DeclarationLiteral<'src, 'arena>,
pub position: TokenPosition,
}
impl IdentifierToken {
#[must_use]
pub const fn span(self) -> TokenSpan {
TokenSpan::new(self.0)
}
}
pub enum ClassModifier<'arena> {
Final,
Native,
Abstract,
Transient,
Public,
Protected,
Private,
Static,
Config(Option<IdentifierToken>),
NativeReplication,
ExportStructs,
SafeReplace,
Const,
Deprecated,
NoExport,
Export,
Localized,
Placeable,
NotPlaceable,
Instanced,
EditConst,
EditInline,
EditInlineNew,
NotEditInlineNew,
CollapseCategories,
DontCollapseCategories,
HideCategories(ArenaVec<'arena, IdentifierToken>),
ShowCategories(ArenaVec<'arena, IdentifierToken>),
Within(IdentifierToken),
DependsOn(IdentifierToken),
GlobalConfig,
PerObjectConfig,
DynamicRecompile,
HideDropdown,
ParseConfig,
CacheExempt,
}
pub type ClassModifierRef<'arena> = ArenaNode<'arena, ClassModifier<'arena>>;
pub struct ClassDeclaration<'arena> {
pub name: IdentifierToken,
pub parent: Option<IdentifierToken>,
pub modifiers: Vec<ClassModifierRef<'arena>>,
}
// --- in ast.rs ---
#[derive(Debug)]
pub struct ClassVarDecl<'src, 'arena> {
/// var(<...>) e.g. var(Display, "Advanced")
/// Each item is an `ArenaNode`, so token locations are preserved.
pub paren_specs: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
/// variable modifiers like public/protected/private/static/const/...
/// Each modifier is an `ArenaNode` capturing its span; order preserved.
pub modifiers: ArenaVec<'arena, VarModifier>,
pub type_spec: TypeSpecifierRef<'src, 'arena>, // Named/InlineEnum/InlineStruct
pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // a, b=expr
pub span: TokenSpan,
}
pub type ClassVarDeclRef<'src, 'arena> = ArenaNode<'arena, ClassVarDecl<'src, 'arena>>;
#[derive(Debug)]
pub struct ClassConstDecl<'src, 'arena> {
pub name: IdentifierToken,
pub value: DeclarationLiteralRef<'src, 'arena>,
pub span: TokenSpan,
}
pub type ClassConstDeclRef<'src, 'arena> = ArenaNode<'arena, ClassConstDecl<'src, 'arena>>;
pub enum ClassMember<'src, 'arena>
where
'src: 'arena,
{
Function(CallableDefinitionRef<'src, 'arena>),
TypeDefEnum(EnumDefRef<'src, 'arena>),
TypeDefStruct(StructDefRef<'src, 'arena>),
Var(ClassVarDeclRef<'src, 'arena>),
Replication(ReplicationBlockRef<'src, 'arena>),
State(StateDeclRef<'src, 'arena>),
Const(ClassConstDeclRef<'src, 'arena>),
Exec(ExecDirectiveRef<'arena>),
}
pub type ClassMemberRef<'src, 'arena> = ArenaNode<'arena, ClassMember<'src, 'arena>>;
#[derive(Clone, Copy, Debug)]
pub enum Reliability {
Reliable,
Unreliable,
}
#[derive(Debug)]
pub struct ReplicationRule<'src, 'arena> {
pub reliability: Reliability, // reliable|unreliable
pub condition: Option<ExpressionRef<'src, 'arena>>, // if (<expr>) or None
pub members: ArenaVec<'arena, IdentifierToken>, // a, b, Foo()
pub span: TokenSpan,
}
pub type ReplicationRuleRef<'src, 'arena> = ArenaNode<'arena, ReplicationRule<'src, 'arena>>;
#[derive(Debug)]
pub struct ReplicationBlock<'src, 'arena> {
pub rules: ArenaVec<'arena, ReplicationRuleRef<'src, 'arena>>,
pub span: TokenSpan,
}
pub type ReplicationBlockRef<'src, 'arena> = ArenaNode<'arena, ReplicationBlock<'src, 'arena>>;
// ---------- States ----------
#[derive(Clone, Copy, Debug)]
pub enum StateModifier {
Auto, // 'auto'
Simulated, // 'simulated'
}
#[derive(Debug)]
pub struct StateDecl<'src, 'arena> {
pub name: IdentifierToken,
pub parent: Option<IdentifierToken>, // 'extends BaseState'
pub modifiers: ArenaVec<'arena, StateModifier>, // auto, simulated
pub ignores: Option<ArenaVec<'arena, IdentifierToken>>, // 'ignores Foo, Bar;'
/// Body: ordinary statements plus nested function definitions (see `Statement::Function`).
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
pub span: TokenSpan,
}
pub type StateDeclRef<'src, 'arena> = ArenaNode<'arena, StateDecl<'src, 'arena>>;
// NEW: exec directive node
#[derive(Debug)]
pub struct ExecDirective<'arena> {
pub text: ArenaString<'arena>, // full line without trailing newline(s)
pub span: TokenSpan,
}
pub type ExecDirectiveRef<'arena> = ArenaNode<'arena, ExecDirective<'arena>>;
/// Keep your existing `ClassDeclaration` as the header.
/// Optionally: `pub type ClassHeader<'src, 'arena> = ClassDeclaration<'src, 'arena>;`
pub struct ClassDefinition<'src, 'arena>
where
'src: 'arena,
{
pub header: ClassDeclaration<'arena>, // or ClassHeader if you rename
pub members: ArenaVec<'arena, ClassMemberRef<'src, 'arena>>,
}

View File

@ -0,0 +1,268 @@
//! Operator AST nodes.
//!
//! This module defines the prefix, postfix, and infix operator kinds used by
//! expression AST nodes.
//!
//! The enums here represent only the *syntactic operator category* recorded in
//! the AST. They do not encode precedence, associativity, overload behavior,
//! or token spelling details beyond the normalized operator kind itself.
//! Those concerns are handled by the expression parser and precedence tables.
use crate::lexer::{Keyword, Token, TokenPosition};
use core::convert::TryFrom;
/// Prefix unary operators.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum PrefixOperator {
/// Logical negation: `!expr`.
Not,
/// Arithmetic negation: `-expr`.
Minus,
/// Unary plus: `+expr`.
Plus,
/// Bitwise negation: `~expr`.
BitwiseNot,
/// Prefix increment: `++expr`.
Increment,
/// Prefix decrement: `--expr`.
Decrement,
}
/// Postfix unary operators.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum PostfixOperator {
/// Postfix increment: `expr++`.
Increment,
/// Postfix decrement: `expr--`.
Decrement,
}
/// Binary / infix operators.
///
/// These operators appear between left-hand side and right-hand side operands.
/// This enum stores only the normalized AST-level operator kind.
///
/// The parser assigns precedence and associativity separately.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum InfixOperator {
/// Simple assignment: `left_hand_side = right_hand_side`.
Assign,
/// Multiplicative assignment: `left_hand_side *= right_hand_side`.
MultiplyAssign,
/// Division assignment: `left_hand_side /= right_hand_side`.
DivideAssign,
/// Modulo assignment: `left_hand_side %= right_hand_side`.
ModuloAssign,
/// Additive assignment: `left_hand_side += right_hand_side`.
PlusAssign,
/// Subtractive assignment: `left_hand_side -= right_hand_side`.
MinusAssign,
/// String concatenation assignment: `left_hand_side $= right_hand_side`.
ConcatAssign,
/// Space-concatenation assignment: `left_hand_side @= right_hand_side`.
ConcatSpaceAssign,
/// String concatenation without inserted whitespace:
/// `left_hand_side $ right_hand_side`.
Concat,
/// String concatenation with an inserted space:
/// `left_hand_side @ right_hand_side`.
ConcatSpace,
/// Logical conjunction: `left_hand_side && right_hand_side`.
And,
/// Logical exclusive-or: `left_hand_side ^^ right_hand_side`.
Xor,
/// Logical disjunction: `left_hand_side || right_hand_side`.
Or,
/// Bitwise AND: `left_hand_side & right_hand_side`.
BitwiseAnd,
/// Bitwise OR: `left_hand_side | right_hand_side`.
BitwiseOr,
/// Bitwise XOR: `left_hand_side ^ right_hand_side`.
BitwiseXor,
/// Inequality test: `left_hand_side != right_hand_side`.
NotEqual,
/// Equality test: `left_hand_side == right_hand_side`.
Equal,
/// Approximate equality test: `left_hand_side ~= right_hand_side`.
ApproximatelyEqual,
/// Less-than comparison: `left_hand_side < right_hand_side`.
Less,
/// Less-than-or-equal comparison: `left_hand_side <= right_hand_side`.
LessEqual,
/// Greater-than comparison: `left_hand_side > right_hand_side`.
Greater,
/// Greater-than-or-equal comparison: `left_hand_side >= right_hand_side`.
GreaterEqual,
/// UnrealScript-specific directional comparison:
/// `left_hand_side ClockwiseFrom right_hand_side`.
ClockwiseFrom,
/// Left shift: `left_hand_side << right_hand_side`.
LeftShift,
/// Logical right shift: `left_hand_side >>> right_hand_side`.
LogicalRightShift,
/// Arithmetic / ordinary right shift: `left_hand_side >> right_hand_side`.
RightShift,
/// Addition: `left_hand_side + right_hand_side`.
Plus,
/// Subtraction: `left_hand_side - right_hand_side`.
Minus,
/// Remainder / modulo: `left_hand_side % right_hand_side`.
Modulo,
/// Multiplication: `left_hand_side * right_hand_side`.
Multiply,
/// Division: `left_hand_side / right_hand_side`.
Divide,
/// Dot product: `left_hand_side Dot right_hand_side`.
///
/// This is spelled as a keyword-level operator in source.
Dot,
/// Cross product: `left_hand_side Cross right_hand_side`.
///
/// This is spelled as a keyword-level operator in source.
Cross,
/// Exponentiation: `left_hand_side ** right_hand_side`.
Exponentiation,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PrefixOperatorName {
pub kind: PrefixOperator,
pub position: TokenPosition,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct InfixOperatorName {
pub kind: InfixOperator,
pub position: TokenPosition,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PostfixOperatorName {
pub kind: PostfixOperator,
pub position: TokenPosition,
}
impl TryFrom<Token> for PostfixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PostfixOperator::{Decrement, Increment};
match token {
Token::Increment => Ok(Increment),
Token::Decrement => Ok(Decrement),
_ => Err(()),
}
}
}
impl TryFrom<Token> for PrefixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PrefixOperator::{BitwiseNot, Decrement, Increment, Minus, Not, Plus};
match token {
Token::Not => Ok(Not),
Token::Minus => Ok(Minus),
Token::Plus => Ok(Plus),
Token::BitwiseNot => Ok(BitwiseNot),
Token::Increment => Ok(Increment),
Token::Decrement => Ok(Decrement),
_ => Err(()),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct InfixOperatorInfo {
pub operator: InfixOperator,
pub right_precedence_rank: u8,
}
pub(crate) const fn infix_operator_info(token: Token) -> Option<InfixOperatorInfo> {
use InfixOperator::{
And, ApproximatelyEqual, Assign, BitwiseAnd, BitwiseOr, BitwiseXor, ClockwiseFrom, Concat,
ConcatAssign, ConcatSpace, ConcatSpaceAssign, Cross, Divide, DivideAssign, Dot, Equal,
Exponentiation, Greater, GreaterEqual, LeftShift, Less, LessEqual, LogicalRightShift,
Minus, MinusAssign, Modulo, ModuloAssign, Multiply, MultiplyAssign, NotEqual, Or, Plus,
PlusAssign, RightShift, Xor,
};
let (precedence_rank, operator) = match token {
Token::Exponentiation => (12, Exponentiation),
Token::Multiply => (16, Multiply),
Token::Divide => (16, Divide),
Token::Keyword(Keyword::Cross) => (16, Cross),
Token::Keyword(Keyword::Dot) => (16, Dot),
Token::Modulo => (18, Modulo),
Token::Plus => (20, Plus),
Token::Minus => (20, Minus),
Token::LeftShift => (22, LeftShift),
Token::RightShift => (22, RightShift),
Token::LogicalRightShift => (22, LogicalRightShift),
Token::Less => (24, Less),
Token::LessEqual => (24, LessEqual),
Token::Greater => (24, Greater),
Token::GreaterEqual => (24, GreaterEqual),
Token::Equal => (24, Equal),
Token::ApproximatelyEqual => (24, ApproximatelyEqual),
Token::Keyword(Keyword::ClockwiseFrom) => (24, ClockwiseFrom),
Token::NotEqual => (26, NotEqual),
Token::BitwiseAnd => (28, BitwiseAnd),
Token::BitwiseXor => (28, BitwiseXor),
Token::BitwiseOr => (28, BitwiseOr),
Token::LogicalAnd => (30, And),
Token::LogicalXor => (30, Xor),
Token::LogicalOr => (32, Or),
Token::MultiplyAssign => (34, MultiplyAssign),
Token::DivideAssign => (34, DivideAssign),
Token::PlusAssign => (34, PlusAssign),
Token::MinusAssign => (34, MinusAssign),
Token::Assign => (34, Assign),
Token::ModuloAssign => (34, ModuloAssign),
Token::Concat => (40, Concat),
Token::ConcatSpace => (40, ConcatSpace),
Token::ConcatAssign => (44, ConcatAssign),
Token::ConcatSpaceAssign => (44, ConcatSpaceAssign),
_ => return None,
};
Some(InfixOperatorInfo {
operator,
right_precedence_rank: precedence_rank,
})
}
impl TryFrom<Token> for InfixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
infix_operator_info(token)
.map(|info| info.operator)
.ok_or(())
}
}

277
rottlib/src/ast/types.rs Normal file
View File

@ -0,0 +1,277 @@
//! Type-specifier and declaration AST nodes.
//!
//! This module defines syntactic forms used to represent type names, inline
//! type declarations, variable declarators, and declaration modifiers.
use super::{TokenSpan, ExpressionRef, IdentifierToken, QualifiedIdentifierRef};
use crate::arena::{ArenaNode, ArenaString, ArenaVec};
use crate::lexer::{Keyword, Token, TokenPosition};
use core::convert::TryFrom;
/// Type syntax used in declarations, fields, and other type-annotated grammar
/// positions.
///
/// This enum covers both named types and inline type-definition forms supported
/// by the language.
#[derive(Debug, PartialEq)]
pub enum TypeSpecifier<'src, 'arena> {
/// Named type reference such as `EDrawType` or `Pkg.Group.Type`.
Named(QualifiedIdentifierRef<'arena>),
/// Inline enum definition used directly in type position.
///
/// Example:
/// `enum EMyKind { A, B, C }`
InlineEnum(EnumDefRef<'src, 'arena>),
/// Inline struct definition used directly in type position.
///
/// Example:
/// `struct SMyData { var int X; }`
InlineStruct(StructDefRef<'src, 'arena>),
/// Generic array type: `array<...>`.
///
/// The parser currently allows a sequence of variable-style modifiers to
/// appear before the inner type and preserves them here.
Array {
/// Modifiers parsed before the inner type inside `array<...>`.
element_modifiers: ArenaVec<'arena, VarModifier>,
/// Element / inner type.
element_type: TypeSpecifierRef<'src, 'arena>,
},
/// `class` or `class<SomeType>`.
///
/// `None` represents a bare `class` with no type argument.
Class(Option<QualifiedIdentifierRef<'arena>>),
}
/// Stable arena reference to a type-specifier node.
pub type TypeSpecifierRef<'src, 'arena> = ArenaNode<'arena, TypeSpecifier<'src, 'arena>>;
/// Enum definition used either inline in a type position or elsewhere in the
/// declaration grammar.
#[derive(Debug, PartialEq, Eq)]
pub struct EnumDefinition<'arena> {
/// Declared enum name.
pub name: IdentifierToken,
/// Enum variants in source order.
pub variants: ArenaVec<'arena, IdentifierToken>,
}
/// Stable arena reference to an enum definition.
pub type EnumDefRef<'src, 'arena> = ArenaNode<'arena, EnumDefinition<'arena>>;
/// Struct-level modifier kind.
///
/// These are modifiers that apply to the struct declaration itself rather than
/// to an individual field.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum StructModifierKind {
Native,
Export,
NoExport,
Transient,
Deprecated,
Init,
Long,
}
/// Struct declaration modifier together with its source token position.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StructModifier {
/// Modifier kind.
pub kind: StructModifierKind,
/// Position of the modifier token in the source stream.
pub position: TokenPosition,
}
impl StructModifier {
/// Span covering just this modifier token.
#[must_use]
pub const fn span(self) -> TokenSpan {
TokenSpan::new(self.position)
}
/// Construct a struct modifier from kind and token position.
#[must_use]
pub const fn new(kind: StructModifierKind, token: TokenPosition) -> Self {
Self {
kind,
position: token,
}
}
}
/// Struct field declaration.
///
/// A field stores the declared type together with one or more declarators
/// sharing that type, plus optional `var(...)` editor specifiers and ordinary
/// declaration modifiers.
#[derive(Debug, PartialEq)]
pub struct StructField<'src, 'arena> {
/// Field type.
pub type_specifier: TypeSpecifierRef<'src, 'arena>,
/// One or more declarators declared with the same type.
///
/// Examples:
/// - `var int A;`
/// - `var int A, B[4], C = 10;`
pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
/// Optional `var(...)` editor specifiers attached to the field declaration.
///
/// Example:
/// `var(Display, "Advanced/Hidden")`
pub editor_specifiers: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
/// Declaration modifiers attached to the field.
///
/// These are preserved in source order.
pub declaration_modifiers: ArenaVec<'arena, VarModifier>,
}
/// Stable arena reference to a struct field declaration.
pub type StructFieldRef<'src, 'arena> = ArenaNode<'arena, StructField<'src, 'arena>>;
/// Struct definition used either inline in a type position or elsewhere in the
/// declaration grammar.
#[derive(Debug, PartialEq)]
pub struct StructDefinition<'src, 'arena> {
/// Struct name, if present.
///
/// Anonymous inline structs use `None`.
pub name: Option<IdentifierToken>,
/// Optional base struct after `extends`.
pub base_type_name: Option<QualifiedIdentifierRef<'arena>>,
/// Modifiers attached to the struct declaration itself.
pub modifiers: ArenaVec<'arena, StructModifier>,
/// Struct fields in source order.
pub fields: ArenaVec<'arena, StructFieldRef<'src, 'arena>>,
}
/// Stable arena reference to a struct definition.
pub type StructDefRef<'src, 'arena> = ArenaNode<'arena, StructDefinition<'src, 'arena>>;
/// One declared variable name together with optional array size and initializer.
///
/// This node represents one declarator inside a declaration that may contain
/// several comma-separated declarators sharing the same type.
#[derive(Debug, PartialEq)]
pub struct VariableDeclarator<'src, 'arena> {
/// Declared variable name.
pub name: IdentifierToken,
/// Optional initializer after `=`.
pub initializer: Option<ExpressionRef<'src, 'arena>>,
/// Optional array-size expression from `[expr]`.
pub array_size: Option<ExpressionRef<'src, 'arena>>,
}
/// Stable arena reference to a variable declarator.
///
/// The node span is expected to cover the entire declarator, not only the
/// identifier token.
pub type VariableDeclaratorRef<'src, 'arena> = ArenaNode<'arena, VariableDeclarator<'src, 'arena>>;
/// One item inside `var(...)` editor specifiers.
#[derive(Debug, PartialEq, Eq)]
pub enum VarEditorSpecifier<'arena> {
/// Identifier-like editor specifier such as `Display` or `Advanced`.
Identifier(IdentifierToken),
/// String editor specifier such as `"Category/Sub"`.
String(ArenaString<'arena>),
}
/// Stable arena reference to an editor specifier.
pub type VarEditorSpecifierRef<'src, 'arena> = ArenaNode<'arena, VarEditorSpecifier<'arena>>;
/// Field / variable declaration modifier kind.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum VarModifierKind {
Transient,
Public,
Protected,
Private,
Static,
Const,
Deprecated,
NoExport,
Export,
Config,
Localized,
GlobalConfig,
PerObjectConfig,
Input,
EdFindable,
EditConst,
EditConstArray,
EditInline,
EditInlineUse,
EditInlineNew,
EditInlineNotify,
NotEditInlineNew,
Automated,
Native,
Travel,
Cache,
}
/// Variable-style declaration modifier together with its token position.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct VarModifier {
/// Modifier kind.
pub kind: VarModifierKind,
/// Position of the modifier token in the source stream.
pub position: TokenPosition,
}
impl TryFrom<Keyword> for VarModifierKind {
type Error = ();
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
use VarModifierKind::{
Automated, Cache, Config, Const, Deprecated, EdFindable, EditConst, EditConstArray,
EditInline, EditInlineNew, EditInlineNotify, EditInlineUse, Export, GlobalConfig,
Input, Localized, Native, NoExport, NotEditInlineNew, PerObjectConfig, Private,
Protected, Public, Static, Transient, Travel,
};
let kind = match keyword {
Keyword::Transient => Transient,
Keyword::Public => Public,
Keyword::Protected => Protected,
Keyword::Private => Private,
Keyword::Static => Static,
Keyword::Const => Const,
Keyword::Deprecated => Deprecated,
Keyword::NoExport => NoExport,
Keyword::Export => Export,
Keyword::Config => Config,
Keyword::Localized => Localized,
Keyword::GlobalConfig => GlobalConfig,
Keyword::PerObjectConfig => PerObjectConfig,
Keyword::EdFindable => EdFindable,
Keyword::EditConst => EditConst,
Keyword::EditConstArray => EditConstArray,
Keyword::EditInline => EditInline,
Keyword::EditInlineUse => EditInlineUse,
Keyword::EditInlineNew => EditInlineNew,
Keyword::EditInlineNotify => EditInlineNotify,
Keyword::NotEditInlineNew => NotEditInlineNew,
Keyword::Automated => Automated,
Keyword::Native => Native,
Keyword::Input => Input,
Keyword::Travel => Travel,
Keyword::Cache => Cache,
_ => return Err(()),
};
Ok(kind)
}
}
impl TryFrom<(Token, TokenPosition)> for VarModifier {
type Error = ();
fn try_from((token, position): (Token, TokenPosition)) -> Result<Self, Self::Error> {
let Token::Keyword(keyword) = token else {
return Err(());
};
let kind = VarModifierKind::try_from(keyword)?;
Ok(Self { kind, position })
}
}

View File

@ -4,7 +4,11 @@
//! parsing or doing lightweight frontend checks. They are intentionally small,
//! depend only on [`AstSpan`], and are easy to construct and store.
use crate::ast::AstSpan;
mod parse_error_diagnostics;
mod render;
use crate::lexer::TokenSpan;
pub(crate) use parse_error_diagnostics::diagnostic_from_parse_error;
/// Classification of a diagnostic by its impact.
///
@ -36,7 +40,7 @@ pub enum Severity {
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct Label {
/// Span to highlight in source coordinates.
pub span: AstSpan,
pub span: TokenSpan,
/// Short inline text shown next to the caret line.
pub message: String,
}
@ -54,9 +58,6 @@ pub struct Diagnostic {
/// Codes must match `^[LPTSXD][0-9]{4}$` where the prefix is the domain:
/// `L` lexer, `P` parser, `T` type check, `S` semantics, `X` lints,
/// `D` deprecations.
///
/// Codes help users search documentation and suppress or elevate specific
/// diagnostics. Keep codes stable across releases once published.
code: Option<String>,
/// Marks the main location the user should look at first.
///
@ -110,43 +111,51 @@ impl Diagnostic {
}
/// Returns `true` iff severity is [`Severity::Error`].
#[must_use]
pub fn stops_compilation(&self) -> bool {
self.severity == Severity::Error
}
/// Returns the diagnostic code if present.
///
/// See [DiagnosticBuilder::code] for code scheme.
/// See [`DiagnosticBuilder::code`] for code scheme.
#[must_use]
pub fn code(&self) -> Option<&str> {
self.code.as_deref()
}
/// Returns the primary label, if any.
pub fn primary_label(&self) -> Option<&Label> {
#[must_use]
pub const fn primary_label(&self) -> Option<&Label> {
self.primary_label.as_ref()
}
/// Returns the secondary labels in insertion order.
#[must_use]
pub fn secondary_labels(&self) -> &[Label] {
&self.secondary_labels
}
/// Returns the headline.
#[must_use]
pub fn headline(&self) -> &str {
&self.headline
}
/// Returns the severity.
pub fn severity(&self) -> Severity {
#[must_use]
pub const fn severity(&self) -> Severity {
self.severity
}
/// Returns the notes.
#[must_use]
pub fn notes(&self) -> &[String] {
&self.notes
}
/// Returns the help message, if any.
#[must_use]
pub fn help(&self) -> Option<&str> {
self.help.as_deref()
}
@ -201,7 +210,7 @@ impl DiagnosticBuilder {
/// One sentence, starting with lowercase letter, no period at the end.
/// Since only one primary label can be specified, the previous primary is
/// replaced.
pub fn primary_label(mut self, span: AstSpan, message: impl Into<String>) -> Self {
pub fn primary_label(mut self, span: TokenSpan, message: impl Into<String>) -> Self {
self.diagnostic.primary_label = Some(Label {
span,
message: message.into(),
@ -212,7 +221,7 @@ impl DiagnosticBuilder {
/// Add a secondary label.
///
/// One sentence, starting with lowercase letter, no period at the end.
pub fn secondary_label(mut self, span: AstSpan, message: impl Into<String>) -> Self {
pub fn secondary_label(mut self, span: TokenSpan, message: impl Into<String>) -> Self {
self.diagnostic.secondary_labels.push(Label {
span,
message: message.into(),

View File

@ -0,0 +1,111 @@
use super::{Diagnostic, DiagnosticBuilder, FoundAt, found_at};
use crate::lexer::{Token, TokenSpan, TokenizedFile};
use crate::parser::ParseError;
/// P0025
pub(super) fn diagnostic_block_missing_semicolon_after_expression<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let expression_span = error.related_spans.get("expression_span").copied();
let primary_span = TokenSpan::new(error.blame_span.end);
let primary_text = match found_at(file, primary_span.end) {
FoundAt::Token(token_text) => format!("expected `;` before `{}`", token_text),
FoundAt::EndOfFile => "expected `;` before end of file".to_string(),
FoundAt::Unknown => "expected `;` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `;` after expression statement");
if let Some(expression_span) = expression_span {
if file.same_line(expression_span.start, primary_span.end) {
builder = builder.secondary_label(expression_span, "expression statement");
} else {
builder = builder.secondary_label(
TokenSpan::new(expression_span.end),
"expression statement ends here",
);
}
}
builder
.primary_label(primary_span, primary_text)
.code("P0025")
.build()
}
/// P0026
pub(super) fn diagnostic_block_missing_closing_brace<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_brace_span = error.related_spans.get("left_brace").copied();
let unexpected_token_span = error.related_spans.get("unexpected_token").copied();
let (mut primary_span, primary_text) =
if let Some(unexpected_token_span) = unexpected_token_span {
let primary_span = TokenSpan::new(unexpected_token_span.end);
let primary_text = match found_at(file, primary_span.end) {
FoundAt::Token(token_text) => format!("expected `}}` before `{}`", token_text),
FoundAt::EndOfFile => "expected `}` before end of file".to_string(),
FoundAt::Unknown => "expected `}` here".to_string(),
};
(primary_span, primary_text)
} else {
(
TokenSpan::new(error.blame_span.end),
"expected `}` before end of file".to_string(),
)
};
let builder = DiagnosticBuilder::error("missing `}` to close block");
if let Some(left_brace_span) = left_brace_span
&& !file.same_line(left_brace_span.start, primary_span.end)
{
primary_span.start = left_brace_span.start;
}
builder
.primary_label(primary_span, primary_text)
.code("P0026")
.build()
}
/// P0027
pub(super) fn diagnostic_block_expected_item<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let primary_span = error.blame_span;
let (title, primary_text) = match file.token_at(primary_span.start).map(|data| data.token) {
Some(Token::ExecDirective) => (
"expected statement or expression, found `#exec` directive".to_string(),
"`#exec` directives are not allowed in a statement block".to_string(),
),
_ => match found_at(file, primary_span.start) {
FoundAt::Token(token_text) => (
format!("expected statement or expression, found `{}`", token_text),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected statement or expression, found end of file".to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected statement or expression".to_string(),
"expected statement or expression here".to_string(),
),
},
};
DiagnosticBuilder::error(title)
.primary_label(primary_span, primary_text)
.code("P0027")
.build()
}

View File

@ -0,0 +1,810 @@
use super::{
Diagnostic, DiagnosticBuilder, FoundAt, collapse_span_to_end_on_same_line, found_at,
primary_span_with_optional_multiline_context, should_show_context_label,
};
use crate::lexer::{TokenSpan, TokenizedFile};
use crate::parser::{ParseError, diagnostic_labels};
pub(super) fn diagnostic_condition_expected<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let control_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let control_keyword_text = control_keyword_span.and_then(|span| file.token_text(span.end));
let do_keyword_span = error.related_spans.get("do_keyword").copied();
// Present only for the recovery path where a parsed block expression is
// treated as the likely branch body, meaning the condition before it is
// missing.
let branch_body_span = error.related_spans.get("branch_body").copied();
let found_branch_body = branch_body_span.is_some();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match (control_keyword_text, found) {
(Some(keyword_text), FoundAt::Token(token_text)) => {
let primary_text = if found_branch_body && token_text == "{" {
"body starts here, but the condition is missing".to_string()
} else {
format!("unexpected `{}`", token_text)
};
(
format!(
"expected condition after `{}`, found `{}`",
keyword_text, token_text
),
primary_text,
)
}
(Some(keyword_text), FoundAt::EndOfFile) => (
format!(
"expected condition after `{}`, found end of file",
keyword_text
),
"reached end of file here".to_string(),
),
(Some(keyword_text), FoundAt::Unknown) => (
format!("expected condition after `{}`", keyword_text),
"expected condition here".to_string(),
),
(None, FoundAt::Token(token_text)) => {
let primary_text = if found_branch_body && token_text == "{" {
"body starts here, but the condition is missing".to_string()
} else {
format!("unexpected `{}`", token_text)
};
(
format!("expected condition, found `{}`", token_text),
primary_text,
)
}
(None, FoundAt::EndOfFile) => (
"expected condition, found end of file".to_string(),
"reached end of file here".to_string(),
),
(None, FoundAt::Unknown) => (
"expected condition".to_string(),
"expected condition here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
let spans_are_same =
|left: TokenSpan, right: TokenSpan| left.start == right.start && left.end == right.end;
if let Some(do_span) = do_keyword_span {
let same_as_control_keyword = control_keyword_span
.map(|control_span| spans_are_same(do_span, control_span))
.unwrap_or(false);
let same_line_as_control_keyword = control_keyword_span
.map(|control_span| file.same_line(do_span.start, control_span.start))
.unwrap_or(false);
if !same_as_control_keyword
&& !same_line_as_control_keyword
&& !file.same_line(do_span.start, error.blame_span.end)
{
builder = builder.secondary_label(do_span, "`do` expression starts here");
}
}
if let Some(control_keyword_span) = control_keyword_span
&& !file.same_line(control_keyword_span.start, error.blame_span.end)
{
let secondary_text = if let Some(keyword_text) = control_keyword_text {
format!("after this `{}`, a condition was expected", keyword_text)
} else {
"after this control-flow keyword, a condition was expected".to_string()
};
builder = builder.secondary_label(control_keyword_span, secondary_text);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0012")
.build()
}
pub(super) fn diagnostic_control_flow_body_expected<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let control_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let control_keyword_text = control_keyword_span.and_then(|span| file.token_text(span.end));
let body_context_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_EXPECTED_AFTER)
.copied();
let body_context_text = body_context_span.and_then(|span| file.token_text(span.end));
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match (control_keyword_text, found) {
(Some(keyword_text), FoundAt::Token(token_text)) => (
format!(
"expected body for `{}`, found `{}`",
keyword_text, token_text
),
format!("unexpected `{}`", token_text),
),
(Some(keyword_text), FoundAt::EndOfFile) => (
format!("expected body for `{}`, found end of file", keyword_text),
"reached end of file here".to_string(),
),
(Some(keyword_text), FoundAt::Unknown) => (
format!("expected body for `{}`", keyword_text),
"expected body here".to_string(),
),
(None, FoundAt::Token(token_text)) => (
format!("expected body, found `{}`", token_text),
format!("unexpected `{}`", token_text),
),
(None, FoundAt::EndOfFile) => (
"expected body, found end of file".to_string(),
"reached end of file here".to_string(),
),
(None, FoundAt::Unknown) => (
"expected body".to_string(),
"expected body here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
let spans_are_same =
|left: TokenSpan, right: TokenSpan| left.start == right.start && left.end == right.end;
let body_context_is_same_as_keyword = match (control_keyword_span, body_context_span) {
(Some(control_span), Some(body_span)) => spans_are_same(control_span, body_span),
_ => false,
};
let body_context_is_trivial_same_line_eof = match body_context_span {
Some(body_span) => {
matches!(found, FoundAt::EndOfFile)
&& file.same_line(body_span.start, error.blame_span.end)
}
None => false,
};
let should_fallback_to_control_keyword = match body_context_span {
None => true,
Some(_) if body_context_is_same_as_keyword => true,
Some(_) => false,
};
if let Some(body_span) = body_context_span
&& !body_context_is_same_as_keyword
&& !body_context_is_trivial_same_line_eof
{
let secondary_text = if let Some(context_text) = body_context_text {
format!("after this `{}`, a body was expected", context_text)
} else {
"after this construct, a body was expected".to_string()
};
builder = builder.secondary_label(body_span, secondary_text);
} else if should_fallback_to_control_keyword
&& let Some(keyword_span) = control_keyword_span
&& !file.same_line(keyword_span.start, error.blame_span.end)
{
let secondary_text = if let Some(keyword_text) = control_keyword_text {
format!("after this `{}`, a body was expected", keyword_text)
} else {
"after this control-flow keyword, a body was expected".to_string()
};
builder = builder.secondary_label(keyword_span, secondary_text);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0013")
.build()
}
pub(super) fn diagnostic_do_missing_until<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let do_keyword_span = error.related_spans.get("do_keyword").copied();
let found = found_at(file, error.blame_span.end);
let primary_text = match found {
FoundAt::Token(token_text) => {
format!("expected `until` before `{}`", token_text)
}
FoundAt::EndOfFile => "expected `until` before end of file".to_string(),
FoundAt::Unknown => "expected `until` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `until` after `do` body");
let primary_context_span =
do_keyword_span.filter(|span| should_show_context_label(file, *span, error.blame_span));
if let Some(span) = primary_context_span {
builder = builder.secondary_label(span, "`do` expression starts here");
}
let primary_span =
primary_span_with_optional_multiline_context(file, primary_context_span, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0014")
.build()
}
pub(super) fn diagnostic_for_each_iterator_expression_expected<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let control_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let control_keyword_text = control_keyword_span.and_then(|span| file.token_text(span.end));
let found = found_at(file, error.blame_span.end);
let _ = matches!(found, FoundAt::Token("{"));
let (header_text, primary_text) = match (control_keyword_text, found) {
(Some(keyword_text), FoundAt::Token(token_text)) => {
let primary_text = if token_text == "{" {
"body starts here, but the iterator expression is missing".to_string()
} else {
format!("unexpected `{}`", token_text)
};
(
format!(
"expected iterator expression after `{}`, found `{}`",
keyword_text, token_text
),
primary_text,
)
}
(Some(keyword_text), FoundAt::EndOfFile) => (
format!(
"expected iterator expression after `{}`, found end of file",
keyword_text
),
"reached end of file here".to_string(),
),
(Some(keyword_text), FoundAt::Unknown) => (
format!("expected iterator expression after `{}`", keyword_text),
"expected iterator expression here".to_string(),
),
(None, FoundAt::Token(token_text)) => {
let primary_text = if token_text == "{" {
"body starts here, but the iterator expression is missing".to_string()
} else {
format!("unexpected `{}`", token_text)
};
(
format!("expected iterator expression, found `{}`", token_text),
primary_text,
)
}
(None, FoundAt::EndOfFile) => (
"expected iterator expression, found end of file".to_string(),
"reached end of file here".to_string(),
),
(None, FoundAt::Unknown) => (
"expected iterator expression".to_string(),
"expected iterator expression here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(control_keyword_span) = control_keyword_span
&& !file.same_line(control_keyword_span.start, error.blame_span.end)
{
let secondary_text = if let Some(keyword_text) = control_keyword_text {
format!(
"after this `{}`, an iterator expression was expected",
keyword_text
)
} else {
"after this control-flow keyword, an iterator expression was expected".to_string()
};
builder = builder.secondary_label(control_keyword_span, secondary_text);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0015")
.build()
}
pub(super) fn diagnostic_for_loop_header_initializer_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let for_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let left_parenthesis_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_EXPECTED_AFTER)
.copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!(
"expected initializer expression or `;` after `(` in `for` header, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected initializer expression or `;` after `(` in `for` header, found end of file"
.to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected initializer expression or `;` after `(` in `for` header".to_string(),
"expected initializer expression or `;` here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
let left_parenthesis_label_is_shown = if let Some(span) = left_parenthesis_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(
span,
"after this `(`, an initializer expression or `;` was expected",
);
true
} else {
false
};
if !left_parenthesis_label_is_shown && let Some(for_span) = for_keyword_span {
let for_is_separated_from_error = !file.same_line(for_span.start, error.blame_span.end);
let for_is_separated_from_left_parenthesis = left_parenthesis_span
.map(|span| !file.same_line(for_span.start, span.start))
.unwrap_or(true);
if for_is_separated_from_error && for_is_separated_from_left_parenthesis {
builder = builder.secondary_label(for_span, "`for` loop starts here");
}
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0016")
.build()
}
pub(super) fn diagnostic_for_loop_header_missing_semicolon_after_initializer<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let initializer_span = error.related_spans.get("for_header_initializer").copied();
let found = found_at(file, error.blame_span.end);
let initializer_is_omitted = initializer_span.is_none();
let header_text = if initializer_is_omitted {
"missing first `;` in `for` header".to_string()
} else {
"missing `;` after initializer in `for` header".to_string()
};
let primary_text = match found {
FoundAt::Token(token_text) => {
if initializer_is_omitted {
format!("expected first `;` before `{}`", token_text)
} else {
format!("expected `;` before `{}`", token_text)
}
}
FoundAt::EndOfFile => {
if initializer_is_omitted {
"expected first `;` before end of file".to_string()
} else {
"expected `;` before end of file".to_string()
}
}
FoundAt::Unknown => {
if initializer_is_omitted {
"expected first `;` here".to_string()
} else {
"expected `;` here".to_string()
}
}
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = initializer_span {
builder = builder.secondary_label(span, "initializer ends here");
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0017")
.build()
}
pub(super) fn diagnostic_for_loop_header_condition_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let for_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let first_semicolon_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_EXPECTED_AFTER)
.copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!(
"expected condition expression or second `;` in `for` header, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected condition expression or second `;` in `for` header, found end of file"
.to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected condition expression or second `;` in `for` header".to_string(),
"expected condition expression or `;` here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
let first_semicolon_label_is_shown = if let Some(span) = first_semicolon_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(
span,
"after this `;`, a condition expression or another `;` was expected",
);
true
} else {
false
};
if !first_semicolon_label_is_shown && let Some(for_span) = for_keyword_span {
let for_is_separated_from_error = !file.same_line(for_span.start, error.blame_span.end);
let for_is_separated_from_first_semicolon = first_semicolon_span
.map(|span| !file.same_line(for_span.start, span.start))
.unwrap_or(true);
if for_is_separated_from_error && for_is_separated_from_first_semicolon {
builder = builder.secondary_label(for_span, "`for` loop starts here");
}
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0018")
.build()
}
pub(super) fn diagnostic_for_loop_header_missing_semicolon_after_condition<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let condition_span = error.related_spans.get("for_header_condition").copied();
let found = found_at(file, error.blame_span.end);
let condition_is_omitted = condition_span.is_none();
let found_is_eof = matches!(found, FoundAt::EndOfFile);
let header_text = if condition_is_omitted && found_is_eof {
"missing second `;` in `for` header".to_string()
} else {
"missing `;` after condition in `for` header".to_string()
};
let primary_text = match found {
FoundAt::Token(token_text) => {
if condition_is_omitted {
format!("expected second `;` before `{}`", token_text)
} else {
format!("expected `;` before `{}`", token_text)
}
}
FoundAt::EndOfFile => {
if condition_is_omitted {
"expected second `;` before end of file".to_string()
} else {
"expected `;` before end of file".to_string()
}
}
FoundAt::Unknown => {
if condition_is_omitted {
"expected second `;` here".to_string()
} else {
"expected `;` here".to_string()
}
}
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = condition_span {
builder = builder.secondary_label(span, "condition ends here");
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0019")
.build()
}
pub(super) fn diagnostic_for_loop_header_step_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let for_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let second_semicolon_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_EXPECTED_AFTER)
.copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(";") => (
"unexpected third `;` in `for` header".to_string(),
"expected step expression or `)` after the second `;`".to_string(),
),
FoundAt::Token(token_text) => (
format!(
"expected step expression or `)` after the second `;` in `for` header, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected step expression or `)` after the second `;` in `for` header, found end of file"
.to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected step expression or `)` after the second `;` in `for` header".to_string(),
"expected step expression or `)` here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
let second_semicolon_label_is_shown = if let Some(span) = second_semicolon_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(
span,
"after this `;`, a step expression or `)` was expected",
);
true
} else {
false
};
if !second_semicolon_label_is_shown && let Some(for_span) = for_keyword_span {
let for_is_separated_from_error = !file.same_line(for_span.start, error.blame_span.end);
let for_is_separated_from_second_semicolon = second_semicolon_span
.map(|span| !file.same_line(for_span.start, span.start))
.unwrap_or(true);
if for_is_separated_from_error && for_is_separated_from_second_semicolon {
builder = builder.secondary_label(for_span, "`for` loop starts here");
}
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0020")
.build()
}
pub(super) fn diagnostic_for_loop_header_missing_closing_parenthesis<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let for_header_start_span = error.related_spans.get("for_header_start").copied();
let primary_text = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => format!("expected `)` before `{}`", token_text),
FoundAt::EndOfFile => "expected `)` before end of file".to_string(),
FoundAt::Unknown => "expected `)` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `)` to close `for` header");
let primary_context_span = for_header_start_span
.filter(|span| should_show_context_label(file, *span, error.blame_span));
if let Some(span) = primary_context_span {
builder = builder.secondary_label(span, "`for` header starts here");
}
let primary_span =
primary_span_with_optional_multiline_context(file, primary_context_span, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0021")
.build()
}
pub(super) fn diagnostic_return_value_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let return_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!(
"expected return value expression or `;` after `return`, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected return value expression or `;` after `return`, found end of file"
.to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected return value expression or `;` after `return`".to_string(),
"expected return value expression here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = return_keyword_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(
span,
"after this `return`, a value expression or `;` was expected",
);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0022")
.build()
}
pub(super) fn diagnostic_break_value_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let break_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!(
"expected break value expression or `;` after `break`, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected break value expression or `;` after `break`, found end of file"
.to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected break value expression or `;` after `break`".to_string(),
"expected break value expression here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = break_keyword_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(
span,
"after this `break`, a value expression or `;` was expected",
);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0023")
.build()
}
pub(super) fn diagnostic_goto_missing_label<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let goto_keyword_span = error.related_spans.get("goto_keyword").copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!("expected label after `goto`, found `{}`", token_text),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected label after `goto`, found end of file".to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected label after `goto`".to_string(),
"expected label here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = goto_keyword_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(span, "after this `goto`, a label was expected");
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0024")
.build()
}

View File

@ -0,0 +1,197 @@
//! Conversion from parser errors to user-facing diagnostics.
//!
//! This module maps [`ParseError`] values produced by the parser to structured
//! [`Diagnostic`] values suitable for rendering.
//!
//! It owns the top-level dispatch by [`crate::parser::ParseErrorKind`] and
//! keeps small shared utilities used by parse-error diagnostic constructors.
//!
//! Concrete diagnostic constructors are grouped into submodules that mirror
//! parser areas or grammar families.
use super::{Diagnostic, DiagnosticBuilder};
use crate::diagnostics::parse_error_diagnostics::block_items::{
diagnostic_block_expected_item, diagnostic_block_missing_closing_brace,
diagnostic_block_missing_semicolon_after_expression,
};
use crate::lexer::{TokenPosition, TokenSpan, TokenizedFile};
use crate::parser::{ParseError, ParseErrorKind};
mod block_items;
mod control_flow_expressions;
mod primary_expressions;
mod selector_expressions;
mod switch_expressions;
#[derive(Clone, Copy)]
enum FoundAt<'src> {
Token(&'src str),
EndOfFile,
Unknown,
}
fn found_at<'src>(file: &TokenizedFile<'src>, position: TokenPosition) -> FoundAt<'src> {
if let Some(token_text) = file.token_text(position) {
FoundAt::Token(token_text)
} else if file.is_eof(&position) {
FoundAt::EndOfFile
} else {
FoundAt::Unknown
}
}
fn collapse_span_to_end_on_same_line(file: &TokenizedFile<'_>, mut span: TokenSpan) -> TokenSpan {
if file.same_line(span.start, span.end) {
span.start = span.end;
}
span
}
fn should_show_context_label<'src>(
file: &TokenizedFile<'src>,
context_span: TokenSpan,
blame_span: TokenSpan,
) -> bool {
!file.same_line(context_span.start, blame_span.end)
}
fn primary_span_with_optional_multiline_context<'src>(
file: &TokenizedFile<'src>,
context_span: Option<TokenSpan>,
blame_span: TokenSpan,
) -> TokenSpan {
if let Some(context_span) = context_span
&& should_show_context_label(file, context_span, blame_span)
{
TokenSpan {
start: context_span.start,
end: blame_span.end,
}
} else {
collapse_span_to_end_on_same_line(file, blame_span)
}
}
pub(crate) fn diagnostic_from_parse_error<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
use control_flow_expressions::*;
use primary_expressions::*;
use selector_expressions::*;
use switch_expressions::*;
match error.kind {
// primary_expressions.rs
ParseErrorKind::ParenthesizedExpressionInvalidStart => {
diagnostic_parenthesized_expression_invalid_start(error, file)
}
ParseErrorKind::ExpressionExpected => diagnostic_expression_expected(error, file),
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis => {
diagnostic_parenthesized_expression_missing_closing_parenthesis(error, file)
}
ParseErrorKind::ClassTypeMissingTypeArgument => {
diagnostic_class_type_missing_type_argument(error, file)
}
ParseErrorKind::ClassTypeExpectedQualifiedTypeName => {
diagnostic_class_type_expected_qualified_type_name(error, file)
}
ParseErrorKind::ClassTypeInvalidStart => diagnostic_class_type_invalid_start(error, file),
ParseErrorKind::ClassTypeMissingClosingAngleBracket => {
diagnostic_class_type_missing_closing_angle_bracket(error, file)
}
ParseErrorKind::NewMissingClassSpecifier => {
diagnostic_new_missing_class_specifier(error, file)
}
ParseErrorKind::NewTooManyArguments => diagnostic_new_too_many_arguments(error, file),
ParseErrorKind::NewMissingClosingParenthesis => {
diagnostic_new_missing_closing_parenthesis(error, file)
}
ParseErrorKind::NewArgumentMissingComma => {
diagnostic_new_argument_missing_comma(error, file)
}
// control_flow_expressions.rs
ParseErrorKind::ConditionExpected => diagnostic_condition_expected(error, file),
ParseErrorKind::ControlFlowBodyExpected => {
diagnostic_control_flow_body_expected(error, file)
}
ParseErrorKind::DoMissingUntil => diagnostic_do_missing_until(error, file),
ParseErrorKind::ForEachIteratorExpressionExpected => {
diagnostic_for_each_iterator_expression_expected(error, file)
}
ParseErrorKind::ForLoopHeaderInitializerInvalidStart => {
diagnostic_for_loop_header_initializer_invalid_start(error, file)
}
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterInitializer => {
diagnostic_for_loop_header_missing_semicolon_after_initializer(error, file)
}
ParseErrorKind::ForLoopHeaderConditionInvalidStart => {
diagnostic_for_loop_header_condition_invalid_start(error, file)
}
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterCondition => {
diagnostic_for_loop_header_missing_semicolon_after_condition(error, file)
}
ParseErrorKind::ForLoopHeaderStepInvalidStart => {
diagnostic_for_loop_header_step_invalid_start(error, file)
}
ParseErrorKind::ForLoopHeaderMissingClosingParenthesis => {
diagnostic_for_loop_header_missing_closing_parenthesis(error, file)
}
ParseErrorKind::ReturnValueInvalidStart => {
diagnostic_return_value_invalid_start(error, file)
}
ParseErrorKind::BreakValueInvalidStart => diagnostic_break_value_invalid_start(error, file),
ParseErrorKind::GotoMissingLabel => diagnostic_goto_missing_label(error, file),
// block_items.rs
ParseErrorKind::BlockMissingSemicolonAfterExpression => {
diagnostic_block_missing_semicolon_after_expression(error, file)
}
ParseErrorKind::BlockMissingClosingBrace => {
diagnostic_block_missing_closing_brace(error, file)
}
ParseErrorKind::BlockExpectedItem => diagnostic_block_expected_item(error, file),
// selector_expression.rs
ParseErrorKind::MemberAccessMissingMemberName => {
diagnostic_member_access_missing_member_name(error, file)
}
ParseErrorKind::IndexMissingExpression => diagnostic_index_missing_expression(error, file),
ParseErrorKind::IndexMissingClosingBracket => {
diagnostic_index_missing_closing_bracket(error, file)
}
ParseErrorKind::FunctionCallArgumentMissingComma => {
diagnostic_function_call_argument_missing_comma(error, file)
}
ParseErrorKind::FunctionCallMissingClosingParenthesis => {
diagnostic_function_call_missing_closing_parenthesis(error, file)
}
ParseErrorKind::FunctionCallUnexpectedTokenInArgumentList => {
diagnostic_function_call_unexpected_token_in_argument_list(error, file)
}
// switch_expressions.rs
ParseErrorKind::SwitchMissingBody => diagnostic_switch_missing_body(error, file),
ParseErrorKind::SwitchTopLevelItemNotCase => {
diagnostic_switch_top_level_item_not_case(error, file)
}
ParseErrorKind::SwitchCaseMissingColon => diagnostic_switch_case_missing_colon(error, file),
ParseErrorKind::SwitchDefaultMissingColon => {
diagnostic_switch_default_missing_colon(error, file)
}
ParseErrorKind::SwitchDuplicateDefault => diagnostic_switch_duplicate_default(error, file),
ParseErrorKind::SwitchCasesAfterDefault => {
diagnostic_switch_cases_after_default(error, file)
}
ParseErrorKind::SwitchMissingClosingBrace => {
diagnostic_switch_missing_closing_brace(error, file)
}
ParseErrorKind::SwitchCaseMissingExpression => {
diagnostic_switch_case_missing_expression(error, file)
}
ParseErrorKind::SwitchCaseExpressionInvalidStart => {
diagnostic_switch_case_expression_invalid_start(error, file)
}
_ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind))
.primary_label(error.covered_span, "happened here")
.build(),
}
}

View File

@ -0,0 +1,543 @@
use super::{Diagnostic, DiagnosticBuilder, FoundAt, collapse_span_to_end_on_same_line, found_at};
use crate::lexer::{TokenSpan, TokenizedFile};
use crate::parser::ParseError;
pub(super) fn diagnostic_parenthesized_expression_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let (header_text, primary_text) = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => (
format!(
"expected expression inside parentheses, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected expression, found end of file".to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected expression inside parentheses".to_string(),
"expected expression".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(related_span) = error.related_spans.get("left_parenthesis")
&& !file.same_line(related_span.start, error.blame_span.end)
{
builder = builder.secondary_label(*related_span, "parenthesized expression starts here");
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0001")
.build()
}
pub(super) fn diagnostic_expression_expected<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let prefix_operator_span = error.related_spans.get("prefix_operator").copied();
let infix_operator_span = error.related_spans.get("infix_operator").copied();
let operator_span = infix_operator_span.or(prefix_operator_span);
let operator_text = operator_span.and_then(|span| file.token_text(span.end));
let (header_text, primary_text) = match (operator_text, found_at(file, error.blame_span.end)) {
(Some(operator_text), FoundAt::Token(token_text)) => (
format!(
"expected expression after `{}`, found `{}`",
operator_text, token_text
),
format!("unexpected `{}`", token_text),
),
(Some(operator_text), FoundAt::EndOfFile) => (
format!(
"expected expression after `{}`, found end of file",
operator_text
),
"reached end of file here".to_string(),
),
(Some(operator_text), FoundAt::Unknown) => (
format!("expected expression after `{}`", operator_text),
"expected expression".to_string(),
),
(None, FoundAt::Token(token_text)) => (
format!("expected expression, found `{}`", token_text),
format!("unexpected `{}`", token_text),
),
(None, FoundAt::EndOfFile) => (
"expected expression, found end of file".to_string(),
"reached end of file here".to_string(),
),
(None, FoundAt::Unknown) => (
"expected expression".to_string(),
"expected expression".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = operator_span
&& !file.same_line(span.start, error.blame_span.end)
{
let secondary_text = if let Some(operator_text) = operator_text {
format!("after this `{}`, an expression was expected", operator_text)
} else {
"an expression was expected after this operator".to_string()
};
builder = builder.secondary_label(span, secondary_text);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0002")
.build()
}
pub(super) fn diagnostic_parenthesized_expression_missing_closing_parenthesis<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_parenthesis_span = error.related_spans.get("left_parenthesis").copied();
let primary_text = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => format!("expected `)` before `{}`", token_text),
FoundAt::EndOfFile => "expected `)` before end of file".to_string(),
FoundAt::Unknown => "expected `)` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `)` to close parenthesized expression");
if let Some(span) = left_parenthesis_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(span, "parenthesized expression starts here");
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0003")
.build()
}
pub(super) fn diagnostic_class_type_missing_type_argument<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_angle_bracket_span = error.related_spans.get("left_angle_bracket").copied();
let primary_text = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => format!("expected a type name before `{}`", token_text),
FoundAt::EndOfFile => "expected a type name before end of file".to_string(),
FoundAt::Unknown => "expected a type name here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing type argument in `class<...>`");
if let Some(span) = left_angle_bracket_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(span, "type argument starts here");
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0004")
.build()
}
pub(super) fn diagnostic_class_type_expected_qualified_type_name<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let qualifier_dot_span = error.related_spans.get("qualifier_dot").copied();
let class_span = error.related_spans.get("class_keyword").copied();
let blame_pos = error.blame_span.end;
let (header_text, primary_text) = match found_at(file, blame_pos) {
FoundAt::Token(token_text) => (
format!(
"expected another type segment after `.`, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected another type segment after `.`, found end of file".to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected another type segment after `.`".to_string(),
"expected another type segment here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(dot_span) = qualifier_dot_span {
if !file.same_line(dot_span.start, blame_pos) {
builder = builder.secondary_label(
dot_span,
"after this `.`, another type segment was expected",
);
}
if let Some(class_span) = class_span
&& !file.same_line(class_span.end, dot_span.start)
{
builder = builder.secondary_label(
class_span,
"while parsing this `class<...>` type expression",
);
}
}
let primary_span = TokenSpan {
start: blame_pos,
end: blame_pos,
};
builder
.primary_label(primary_span, primary_text)
.code("P0005")
.build()
}
pub(super) fn diagnostic_class_type_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_angle_bracket_span = error.related_spans.get("left_angle_bracket").copied();
let class_keyword_span = error.related_spans.get("class_keyword").copied();
let (header_text, primary_text) = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => (
format!(
"expected a type argument after `<` in `class<...>`, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected a type argument after `<` in `class<...>`, found end of file".to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected a type argument after `<` in `class<...>`".to_string(),
"expected a type argument here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(left_angle_span) = left_angle_bracket_span {
if !file.same_line(left_angle_span.start, error.blame_span.end) {
builder = builder.secondary_label(left_angle_span, "type argument starts here");
}
if let Some(class_span) = class_keyword_span
&& !file.same_line(class_span.end, left_angle_span.start)
{
builder = builder.secondary_label(
class_span,
"while parsing this `class<...>` type expression",
);
}
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0006")
.build()
}
pub(super) fn diagnostic_class_type_missing_closing_angle_bracket<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_angle_bracket_span = error.related_spans.get("left_angle_bracket").copied();
let class_keyword_span = error.related_spans.get("class_keyword").copied();
let primary_text = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => format!("expected `>` before `{}`", token_text),
FoundAt::EndOfFile => "expected `>` before end of file".to_string(),
FoundAt::Unknown => "expected `>` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `>` to close `class<...>`");
if let Some(left_angle_bracket_span) = left_angle_bracket_span {
if !file.same_line(left_angle_bracket_span.start, error.blame_span.end) {
builder = builder.secondary_label(left_angle_bracket_span, "type argument starts here");
}
if let Some(class_keyword_span) = class_keyword_span
&& !file.same_line(class_keyword_span.end, left_angle_bracket_span.start)
{
builder = builder.secondary_label(
class_keyword_span,
"while parsing this `class<...>` type expression",
);
}
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0007")
.build()
}
pub(super) fn diagnostic_new_missing_class_specifier<'src>(
mut error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let new_keyword_span = error.related_spans.get("new_keyword").copied();
let argument_list_end_span = error.related_spans.get("argument_list_end").copied();
let construct_text = if argument_list_end_span.is_some() {
"`new(...)`"
} else {
"`new`"
};
let (header_text, primary_text) = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => (
format!(
"expected class expression after {}, found `{}`",
construct_text, token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
format!(
"expected class expression after {}, found end of file",
construct_text
),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
format!("expected class expression after {}", construct_text),
"expected class expression here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(new_keyword_span) = new_keyword_span
&& !file.same_line(new_keyword_span.start, error.blame_span.end)
{
builder = builder.secondary_label(new_keyword_span, "`new` expression starts here");
}
match argument_list_end_span {
Some(argument_list_end_span)
if !file.same_line(argument_list_end_span.start, error.blame_span.end) =>
{
builder = builder.secondary_label(
argument_list_end_span,
"optional `new(...)` arguments end here",
);
error.blame_span.start = argument_list_end_span.start;
}
Some(_) | None => {
error.blame_span.start = error.blame_span.end;
}
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0008")
.build()
}
pub(super) fn diagnostic_new_too_many_arguments<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let new_keyword_span = error.related_spans.get("new_keyword").copied();
let left_parenthesis_span = error.related_spans.get("left_parenthesis").copied();
let last_allowed_argument_span = error.related_spans.get("last_allowed_argument").copied();
let first_extra_argument_span = error.related_spans.get("first_extra_argument").copied();
let found = found_at(file, error.blame_span.end);
let (primary_text, mut primary_span) =
if let Some(first_extra_argument_span) = first_extra_argument_span {
(
"a fourth argument is not allowed in `new(...)`".to_string(),
first_extra_argument_span,
)
} else if matches!(found, FoundAt::Token(",")) {
(
"this `,` starts a fourth argument, which is not allowed here".to_string(),
error.blame_span,
)
} else if matches!(found, FoundAt::EndOfFile) {
(
"a fourth argument is not allowed here".to_string(),
error.blame_span,
)
} else if let FoundAt::Token(token_text) = found {
(
format!("unexpected start of a fourth argument: `{}`", token_text),
error.blame_span,
)
} else {
(
"a fourth argument is not allowed here".to_string(),
error.blame_span,
)
};
let mut builder = DiagnosticBuilder::error("too many arguments in `new(...)`");
if let Some(new_keyword_span) = new_keyword_span {
let show_new_label = !file.same_line(new_keyword_span.start, primary_span.end)
&& match left_parenthesis_span {
Some(left_parenthesis_span) => {
!file.same_line(new_keyword_span.start, left_parenthesis_span.start)
}
None => true,
};
if show_new_label {
builder = builder.secondary_label(new_keyword_span, "`new` expression starts here");
}
}
if let Some(left_parenthesis_span) = left_parenthesis_span
&& !file.same_line(left_parenthesis_span.start, primary_span.end)
{
builder = builder.secondary_label(
left_parenthesis_span,
"`new(...)` argument list starts here",
);
}
if let Some(last_allowed_argument_span) = last_allowed_argument_span
&& !file.same_line(last_allowed_argument_span.start, primary_span.end)
{
builder = builder.secondary_label(
last_allowed_argument_span,
"the third allowed argument ends here",
);
primary_span.start = last_allowed_argument_span.start;
}
builder
.primary_label(primary_span, primary_text)
.note("`new(...)` accepts up to three optional arguments: `outer`, `name`, and `flags`.")
.code("P0009")
.build()
}
pub(super) fn diagnostic_new_missing_closing_parenthesis<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_parenthesis_span = error.related_spans.get("left_parenthesis").copied();
let new_keyword_span = error.related_spans.get("new_keyword").copied();
let primary_text = match found_at(file, error.blame_span.end) {
FoundAt::Token(token_text) => format!("expected `)` before `{}`", token_text),
FoundAt::EndOfFile => "expected `)` before end of file".to_string(),
FoundAt::Unknown => "expected `)` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `)` to close `new(...)` argument list");
if let Some(left_parenthesis_span) = left_parenthesis_span {
if !file.same_line(left_parenthesis_span.start, error.blame_span.end) {
if let Some(new_keyword_span) = new_keyword_span
&& !file.same_line(new_keyword_span.end, left_parenthesis_span.start)
&& !file.same_line(new_keyword_span.start, error.blame_span.end)
{
builder = builder.secondary_label(new_keyword_span, "`new` expression starts here");
}
builder = builder.secondary_label(
left_parenthesis_span,
"`new(...)` argument list starts here",
);
}
}
let primary_span = collapse_span_to_end_on_same_line(file, error.blame_span);
builder
.primary_label(primary_span, primary_text)
.code("P0010")
.build()
}
pub(super) fn diagnostic_new_argument_missing_comma<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let new_keyword_span = error.related_spans.get("new_keyword").copied();
let left_parenthesis_span = error.related_spans.get("left_parenthesis").copied();
let previous_argument_span = error.related_spans.get("previous_argument").copied();
let primary_span = TokenSpan::new(error.blame_span.end);
let primary_text = match found_at(file, primary_span.end) {
FoundAt::Token(token_text) => format!("expected `,` before `{}`", token_text),
FoundAt::EndOfFile => "expected `,` before end of file".to_string(),
FoundAt::Unknown => "expected `,` before this argument".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `,` between `new(...)` arguments");
if let Some(new_keyword_span) = new_keyword_span {
let show_new_label = !file.same_line(new_keyword_span.start, primary_span.end)
&& match left_parenthesis_span {
Some(left_parenthesis_span) => {
!file.same_line(new_keyword_span.start, left_parenthesis_span.start)
}
None => true,
};
if show_new_label {
builder = builder.secondary_label(new_keyword_span, "`new` expression starts here");
}
}
if let Some(left_parenthesis_span) = left_parenthesis_span
&& !file.same_line(left_parenthesis_span.start, primary_span.end)
{
builder = builder.secondary_label(
left_parenthesis_span,
"`new(...)` argument list starts here",
);
}
if let Some(previous_argument_span) = previous_argument_span {
builder = builder.secondary_label(previous_argument_span, "previous argument ends here");
}
builder
.primary_label(primary_span, primary_text)
.code("P0011")
.build()
}

View File

@ -0,0 +1,354 @@
use super::{Diagnostic, DiagnosticBuilder, FoundAt, found_at};
use crate::lexer::{TokenSpan, TokenizedFile};
use crate::parser::{ParseError, diagnostic_labels};
const PERIOD: &str = "period";
const LEFT_BRACKET: &str = "left_bracket";
const CALLEE: &str = "callee";
const LEFT_PARENTHESIS: &str = "left_parenthesis";
const PREVIOUS_ARGUMENT: &str = "previous_argument";
const ARGUMENT: &str = "argument";
/// P0028
pub(super) fn diagnostic_member_access_missing_member_name<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let period_span = error.related_spans.get(PERIOD).copied();
let blame_span = error.blame_span;
let period_context_span = match period_span {
Some(period_span) if !file.same_line(period_span.start, blame_span.end) => {
Some(period_span)
}
_ => None,
};
let found = found_at(file, blame_span.start);
let title = match found {
FoundAt::Token(token_text) => {
format!("expected member name after `.`, found `{}`", token_text)
}
FoundAt::EndOfFile => "expected member name after `.`, found end of file".to_string(),
FoundAt::Unknown => "expected member name after `.`".to_string(),
};
let primary_text = match found {
FoundAt::Token(token_text) => format!("unexpected `{}`", token_text),
FoundAt::EndOfFile => "reached end of file here".to_string(),
FoundAt::Unknown => "expected member name here".to_string(),
};
let mut builder = DiagnosticBuilder::error(title);
if let Some(period_context_span) = period_context_span {
builder = builder.secondary_label(
period_context_span,
"after this `.`, a member name was expected",
);
}
builder
.primary_label(blame_span, primary_text)
.code("P0028")
.build()
}
/// P0029
pub(super) fn diagnostic_index_missing_expression<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_bracket_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_EXPECTED_AFTER)
.copied();
let blame_span = error.blame_span;
let left_bracket_context_span = match left_bracket_span {
Some(left_bracket_span) if !file.same_line(left_bracket_span.start, blame_span.end) => {
Some(left_bracket_span)
}
_ => None,
};
let primary_span = match left_bracket_context_span {
Some(left_bracket_context_span) => TokenSpan {
start: left_bracket_context_span.start,
end: blame_span.end,
},
None => blame_span,
};
let found = found_at(file, blame_span.start);
let title = match found {
FoundAt::Token(token_text) => {
format!("expected index expression after `[`, found `{}`", token_text)
}
FoundAt::EndOfFile => "expected index expression after `[`, found end of file".to_string(),
FoundAt::Unknown => "expected index expression after `[`".to_string(),
};
let primary_text = match found {
FoundAt::Token("]") => "expected expression before `]`".to_string(),
FoundAt::Token(token_text) => format!("unexpected `{}`", token_text),
FoundAt::EndOfFile => "reached end of file here".to_string(),
FoundAt::Unknown => "expected index expression here".to_string(),
};
let mut builder = DiagnosticBuilder::error(title);
if let Some(left_bracket_context_span) = left_bracket_context_span {
builder = builder.secondary_label(
left_bracket_context_span,
"after this `[`, an index expression was expected",
);
}
builder
.primary_label(primary_span, primary_text)
.code("P0029")
.build()
}
/// P0030
pub(super) fn diagnostic_index_missing_closing_bracket<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let left_bracket_span = error.related_spans.get(LEFT_BRACKET).copied();
let blame_span = error.blame_span;
let left_bracket_context_span = match left_bracket_span {
Some(left_bracket_span) if !file.same_line(left_bracket_span.start, blame_span.end) => {
Some(left_bracket_span)
}
_ => None,
};
let primary_span = match left_bracket_context_span {
Some(left_bracket_context_span) => TokenSpan {
start: left_bracket_context_span.start,
end: blame_span.end,
},
None => blame_span,
};
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(token_text) => format!("expected `]` before `{}`", token_text),
FoundAt::EndOfFile => "expected `]` before end of file".to_string(),
FoundAt::Unknown => "expected `]` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `]` to close index selector");
if let Some(left_bracket_context_span) = left_bracket_context_span {
builder = builder.secondary_label(left_bracket_context_span, "index selector starts here");
}
builder
.primary_label(primary_span, primary_text)
.code("P0030")
.build()
}
/// P0031
pub(super) fn diagnostic_function_call_argument_missing_comma<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let callee_span = error.related_spans.get(CALLEE).copied();
let left_parenthesis_span = error.related_spans.get(LEFT_PARENTHESIS).copied();
let previous_argument_span = error.related_spans.get(PREVIOUS_ARGUMENT).copied();
let blame_span = error.blame_span;
let argument_list_context_span = match left_parenthesis_span {
Some(left_parenthesis_span)
if !file.same_line(left_parenthesis_span.start, blame_span.end) =>
{
Some(left_parenthesis_span)
}
_ => None,
};
let callee_context_span = match (callee_span, left_parenthesis_span) {
(Some(callee_span), Some(left_parenthesis_span))
if !file.same_line(callee_span.end, left_parenthesis_span.start)
&& !file.same_line(callee_span.end, blame_span.end) =>
{
Some(callee_span)
}
_ => None,
};
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(token_text) => format!("expected `,` before `{}`", token_text),
FoundAt::EndOfFile => "expected `,` before end of file".to_string(),
FoundAt::Unknown => "expected `,` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `,` between function call arguments");
if let Some(callee_context_span) = callee_context_span {
builder = builder.secondary_label(callee_context_span, "function called here");
}
if let Some(argument_list_context_span) = argument_list_context_span {
builder = builder.secondary_label(
argument_list_context_span,
"function call argument list starts here",
);
}
if let Some(previous_argument_span) = previous_argument_span {
builder = builder.secondary_label(previous_argument_span, "previous argument ends here");
}
builder
.primary_label(blame_span, primary_text)
.code("P0031")
.build()
}
/// P0032
pub(super) fn diagnostic_function_call_missing_closing_parenthesis<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let callee_span = error.related_spans.get(CALLEE).copied();
let left_parenthesis_span = error.related_spans.get(LEFT_PARENTHESIS).copied();
let blame_span = error.blame_span;
let argument_list_context_span = match left_parenthesis_span {
Some(left_parenthesis_span)
if !file.same_line(left_parenthesis_span.start, blame_span.end) =>
{
Some(left_parenthesis_span)
}
_ => None,
};
let callee_context_span = match (callee_span, left_parenthesis_span) {
(Some(callee_span), Some(left_parenthesis_span))
if !file.same_line(callee_span.end, left_parenthesis_span.start)
&& !file.same_line(callee_span.end, blame_span.end) =>
{
Some(callee_span)
}
_ => None,
};
let primary_span = match argument_list_context_span {
Some(argument_list_context_span) => TokenSpan {
start: argument_list_context_span.start,
end: blame_span.end,
},
None => blame_span,
};
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(token_text) => format!("expected `)` before `{}`", token_text),
FoundAt::EndOfFile => "expected `)` before end of file".to_string(),
FoundAt::Unknown => "expected `)` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `)` to close function call argument list");
if let Some(callee_context_span) = callee_context_span {
builder = builder.secondary_label(callee_context_span, "function called here");
}
if let Some(argument_list_context_span) = argument_list_context_span {
builder = builder.secondary_label(
argument_list_context_span,
"function call argument list starts here",
);
}
builder
.primary_label(primary_span, primary_text)
.code("P0032")
.build()
}
/// P0033
pub(super) fn diagnostic_function_call_unexpected_token_in_argument_list<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let callee_span = error.related_spans.get(CALLEE).copied();
let left_parenthesis_span = error.related_spans.get(LEFT_PARENTHESIS).copied();
let argument_span = error
.related_spans
.get(ARGUMENT)
.or_else(|| error.related_spans.get(PREVIOUS_ARGUMENT))
.copied();
let blame_span = error.blame_span;
let argument_or_blame_span = match argument_span {
Some(argument_span) => argument_span,
None => blame_span,
};
let argument_list_context_span = match left_parenthesis_span {
Some(left_parenthesis_span)
if !file.same_line(left_parenthesis_span.start, argument_or_blame_span.end) =>
{
Some(left_parenthesis_span)
}
_ => None,
};
let callee_context_span = match (callee_span, left_parenthesis_span) {
(Some(callee_span), Some(left_parenthesis_span))
if !file.same_line(callee_span.end, left_parenthesis_span.start)
&& !file.same_line(callee_span.end, argument_or_blame_span.end) =>
{
Some(callee_span)
}
_ => None,
};
let found = found_at(file, blame_span.start);
let title = match found {
FoundAt::Token(token_text) => {
format!("expected `,` or `)` after argument, found `{}`", token_text)
}
FoundAt::EndOfFile => {
"expected `,` or `)` after argument, found end of file".to_string()
}
FoundAt::Unknown => "expected `,` or `)` after argument".to_string(),
};
let primary_text = match found {
FoundAt::Token(token_text) => format!("unexpected `{}`", token_text),
FoundAt::EndOfFile => "reached end of file here".to_string(),
FoundAt::Unknown => "expected `,` or `)` here".to_string(),
};
let mut builder = DiagnosticBuilder::error(title);
if let Some(callee_context_span) = callee_context_span {
builder = builder.secondary_label(callee_context_span, "function called here");
}
if let Some(argument_list_context_span) = argument_list_context_span {
builder = builder.secondary_label(
argument_list_context_span,
"function call argument list starts here",
);
}
if let Some(argument_span) = argument_span {
builder = builder.secondary_label(argument_span, "argument ends here");
}
builder
.primary_label(blame_span, primary_text)
.code("P0033")
.build()
}

View File

@ -0,0 +1,417 @@
use super::{Diagnostic, DiagnosticBuilder, FoundAt, found_at, should_show_context_label};
use crate::lexer::{TokenSpan, TokenizedFile};
use crate::parser::ParseError;
const SWITCH_KEYWORD: &str = "switch_keyword";
const LEFT_BRACE: &str = "left_brace";
const CASE_KEYWORD: &str = "case_keyword";
const DEFAULT_KEYWORD: &str = "default_keyword";
const CASE_EXPRESSION: &str = "case_expression";
const FIRST_DEFAULT: &str = "first_default";
const DUPLICATE_DEFAULT_PREFIX: &str = "duplicate_default";
const CASE_AFTER_DEFAULT_PREFIX: &str = "case_after_default";
fn related_span(error: &ParseError, label: &str) -> Option<TokenSpan> {
error.related_spans.get(label).copied()
}
fn context_span_if_useful(
file: &TokenizedFile<'_>,
context_span: Option<TokenSpan>,
blame_span: TokenSpan,
) -> Option<TokenSpan> {
context_span.filter(|span| should_show_context_label(file, *span, blame_span))
}
fn primary_span_with_context(
context_span: Option<TokenSpan>,
blame_span: TokenSpan,
) -> TokenSpan {
match context_span {
Some(context_span) => TokenSpan {
start: context_span.start,
end: blame_span.end,
},
None => blame_span,
}
}
fn same_span(left: TokenSpan, right: TokenSpan) -> bool {
left.start == right.start && left.end == right.end
}
fn switch_keyword_context_span(
file: &TokenizedFile<'_>,
error: &ParseError,
blame_span: TokenSpan,
left_brace_span: Option<TokenSpan>,
) -> Option<TokenSpan> {
let switch_keyword_span = related_span(error, SWITCH_KEYWORD)?;
if file.same_line(switch_keyword_span.start, blame_span.end) {
return None;
}
if let Some(left_brace_span) = left_brace_span
&& file.same_line(switch_keyword_span.start, left_brace_span.start)
{
return None;
}
Some(switch_keyword_span)
}
fn switch_case_label_context_span(
file: &TokenizedFile<'_>,
error: &ParseError,
blame_span: TokenSpan,
) -> Option<TokenSpan> {
let case_keyword_span = related_span(error, CASE_KEYWORD)?;
let case_label_span = match related_span(error, CASE_EXPRESSION) {
Some(case_expression_span) => TokenSpan {
start: case_keyword_span.start,
end: case_expression_span.end,
},
None => case_keyword_span,
};
context_span_if_useful(file, Some(case_label_span), blame_span)
}
fn numbered_related_count(error: &ParseError, prefix: &str) -> usize {
let mut count = 0;
for index in 1.. {
let label = format!("{}_{}", prefix, index);
if related_span(error, &label).is_none() {
break;
}
count += 1;
}
count
}
fn add_numbered_related_labels(
mut builder: DiagnosticBuilder,
error: &ParseError,
prefix: &str,
primary_span: TokenSpan,
first_text: &'static str,
later_text: &'static str,
) -> DiagnosticBuilder {
for index in 1.. {
let label = format!("{}_{}", prefix, index);
let Some(span) = related_span(error, &label) else {
break;
};
if same_span(span, primary_span) {
continue;
}
let text = if index == 1 { first_text } else { later_text };
builder = builder.secondary_label(span, text);
}
builder
}
/// P0034
pub(super) fn diagnostic_switch_missing_body(
error: ParseError,
file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let switch_context_span =
context_span_if_useful(file, related_span(&error, SWITCH_KEYWORD), blame_span);
let primary_span = primary_span_with_context(switch_context_span, blame_span);
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(token_text) => format!("expected `{{` before `{}`", token_text),
FoundAt::EndOfFile => "expected `{` after the switch expression".to_string(),
FoundAt::Unknown => "expected `{` here".to_string(),
};
DiagnosticBuilder::error("missing `{` to start `switch` body")
.primary_label(primary_span, primary_text)
.code("P0034")
.build()
}
/// P0035
pub(super) fn diagnostic_switch_top_level_item_not_case(
error: ParseError,
file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let left_brace_span = related_span(&error, LEFT_BRACE);
let left_brace_context_span = context_span_if_useful(file, left_brace_span, blame_span);
let switch_keyword_context_span =
switch_keyword_context_span(file, &error, blame_span, left_brace_span);
let primary_text = if matches!(found_at(file, blame_span.start), FoundAt::Token("{")) {
"this block must be inside a `case` or `default` section"
} else if related_span(&error, "multiple_items").is_some() {
"these statements must be inside a `case` or `default` section"
} else {
"this statement must be inside a `case` or `default` section"
};
let mut builder =
DiagnosticBuilder::error("expected `case` or `default` section label in switch body");
if let Some(switch_keyword_context_span) = switch_keyword_context_span {
builder = builder.secondary_label(switch_keyword_context_span, "`switch` starts here");
}
if let Some(left_brace_context_span) = left_brace_context_span {
builder = builder.secondary_label(left_brace_context_span, "switch body starts here");
}
builder
.primary_label(blame_span, primary_text)
.code("P0035")
.build()
}
/// P0036
pub(super) fn diagnostic_switch_case_missing_colon(
error: ParseError,
file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let case_label_context_span = switch_case_label_context_span(file, &error, blame_span);
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(token_text) => format!("expected `:` before `{}`", token_text),
FoundAt::EndOfFile => "expected `:` before end of file".to_string(),
FoundAt::Unknown => "expected `:` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `:` after `case` label");
if let Some(case_label_context_span) = case_label_context_span {
builder = builder.secondary_label(
case_label_context_span,
"this `case` label needs a trailing `:`",
);
}
builder
.primary_label(blame_span, primary_text)
.code("P0036")
.build()
}
/// P0037
pub(super) fn diagnostic_switch_default_missing_colon(
error: ParseError,
file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let default_context_span =
context_span_if_useful(file, related_span(&error, DEFAULT_KEYWORD), blame_span);
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(token_text) => format!("expected `:` before `{}`", token_text),
FoundAt::EndOfFile => "expected `:` before end of file".to_string(),
FoundAt::Unknown => "expected `:` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `:` after `default`");
if let Some(default_context_span) = default_context_span {
builder = builder.secondary_label(
default_context_span,
"this `default` label needs a trailing `:`",
);
}
builder
.primary_label(blame_span, primary_text)
.code("P0037")
.build()
}
/// P0038
pub(super) fn diagnostic_switch_duplicate_default(
error: ParseError,
_file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let duplicate_count = numbered_related_count(&error, DUPLICATE_DEFAULT_PREFIX);
let title = if duplicate_count > 1 {
"multiple `default` sections in switch"
} else {
"duplicate `default` section in switch"
};
let mut builder = DiagnosticBuilder::error(title);
if let Some(first_default_span) = related_span(&error, FIRST_DEFAULT)
&& !same_span(first_default_span, blame_span)
{
builder = builder.secondary_label(first_default_span, "first `default` section is here");
}
builder = add_numbered_related_labels(
builder,
&error,
DUPLICATE_DEFAULT_PREFIX,
blame_span,
"duplicate `default` section",
"another duplicate `default` section",
);
builder
.primary_label(blame_span, "duplicate `default` section")
.code("P0038")
.build()
}
/// P0039
pub(super) fn diagnostic_switch_cases_after_default(
error: ParseError,
_file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let case_after_default_count = numbered_related_count(&error, CASE_AFTER_DEFAULT_PREFIX);
let title = if case_after_default_count > 1 {
"multiple `case` sections appear after `default`"
} else {
"`case` section appears after `default`"
};
let mut builder = DiagnosticBuilder::error(title);
if let Some(first_default_span) = related_span(&error, FIRST_DEFAULT)
&& !same_span(first_default_span, blame_span)
{
builder = builder.secondary_label(
first_default_span,
"`default` must be the last section in this switch",
);
}
builder = add_numbered_related_labels(
builder,
&error,
CASE_AFTER_DEFAULT_PREFIX,
blame_span,
"`case` after `default`",
"another `case` after `default`",
);
builder
.primary_label(blame_span, "`case` after `default`")
.code("P0039")
.build()
}
/// P0040
pub(super) fn diagnostic_switch_missing_closing_brace(
error: ParseError,
file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let left_brace_span = related_span(&error, LEFT_BRACE);
let left_brace_context_span = context_span_if_useful(file, left_brace_span, blame_span);
let switch_keyword_context_span =
switch_keyword_context_span(file, &error, blame_span, left_brace_span);
let primary_span = primary_span_with_context(left_brace_context_span, blame_span);
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(token_text) => format!("expected `}}` before `{}`", token_text),
FoundAt::EndOfFile => "expected `}` before end of file".to_string(),
FoundAt::Unknown => "expected `}` here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing `}` to close switch body");
if let Some(switch_keyword_context_span) = switch_keyword_context_span {
builder = builder.secondary_label(switch_keyword_context_span, "`switch` starts here");
}
builder
.primary_label(primary_span, primary_text)
.code("P0040")
.build()
}
/// P0041
pub(super) fn diagnostic_switch_case_missing_expression(
error: ParseError,
file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let case_context_span =
context_span_if_useful(file, related_span(&error, CASE_KEYWORD), blame_span);
let primary_text = match found_at(file, blame_span.start) {
FoundAt::Token(":") => "expected expression before `:`".to_string(),
FoundAt::Token(token_text) => format!("expected expression before `{}`", token_text),
FoundAt::EndOfFile => "expected expression before end of file".to_string(),
FoundAt::Unknown => "expected expression here".to_string(),
};
let mut builder = DiagnosticBuilder::error("missing expression after `case`");
if let Some(case_context_span) = case_context_span {
builder = builder.secondary_label(
case_context_span,
"after this `case`, an expression was expected",
);
}
builder
.primary_label(blame_span, primary_text)
.code("P0041")
.build()
}
/// P0042
pub(super) fn diagnostic_switch_case_expression_invalid_start(
error: ParseError,
file: &TokenizedFile<'_>,
) -> Diagnostic {
let blame_span = error.blame_span;
let case_context_span =
context_span_if_useful(file, related_span(&error, CASE_KEYWORD), blame_span);
let found = found_at(file, blame_span.start);
let title = match found {
FoundAt::Token(token_text) => {
format!("expected expression after `case`, found `{}`", token_text)
}
FoundAt::EndOfFile => "expected expression after `case`, found end of file".to_string(),
FoundAt::Unknown => "expected expression after `case`".to_string(),
};
let primary_text = match found {
FoundAt::Token(token_text) => format!("unexpected `{}`", token_text),
FoundAt::EndOfFile => "reached end of file here".to_string(),
FoundAt::Unknown => "expected expression here".to_string(),
};
let mut builder = DiagnosticBuilder::error(title);
if let Some(case_context_span) = case_context_span {
builder = builder.secondary_label(
case_context_span,
"after this `case`, an expression was expected",
);
}
builder
.primary_label(blame_span, primary_text)
.code("P0042")
.build()
}

View File

@ -0,0 +1,732 @@
use crate::diagnostics::{Diagnostic, Severity};
use crate::lexer::{TokenSpan, TokenizedFile};
use core::convert::Into;
use crossterm::style::Stylize;
use std::cmp::max;
use std::collections::HashMap;
use std::ops::RangeInclusive;
const INDENT: &str = " ";
/*
error[P0034]: missing `{` to start `switch` body
in file: files/P0034_04.uc
1 | switch
| ------ `switch` starts here
2 | (
3 | A
4 | )
| ^ expected `{` before end of file
| ^ switch selector is here
Outmost guideline isn't reaching ^!. And should it be `^` even?
*/
/*
error: expected one of `,`, `:`, or `}`, found `token_to`
--> rottlib/src/ast/mod.rs:80:13
|
78 | Self {
| ---- while parsing this struct
79 | token_from: self.token_from,scd
| --- while parsing this struct field
80 | token_to: std::cmp::max(self.token_to, right_most_index),
| ^^^^^^^^ expected one of `,`, `:`, or `}`
*/
/*
|
76 | / "asdasdas
77 | | asd1
78 | | asd2
79 | | asdasd"
| |___________________^ expected `()`, found `&str`
*/
/*
1. Get each span's range and total lines covered by spans as ranges;
2. We need `+N` more lines for `N` labels;
3.
*/
// TODO: check if blue guidelines are sometimes red or vice versa
// TODO: tabs needs to be replaced with 1-width character
// These are abstract rendering events, not self-contained draw commands.
// They are emitted in increasing order of "significant lines" (range starts/ends).
// The actual source span for a label is recovered later from its LabelType.
#[derive(PartialEq, Eq, Clone, Copy)]
enum RendererCommands {
StartRange {
label_type: LabelType,
column: usize,
},
FinishRange {
label_type: LabelType,
column: usize,
},
SingleRange {
label_type: LabelType,
},
}
enum LineIndexType {
Normal(usize),
Missing,
Ellipsis,
}
// Label ordering is semantic: primary first, then secondaries in diagnostic order.
// That order is also used to break visual ties when multiple labels would otherwise
// start or end on the same source line.
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
enum LabelType {
Primary,
Secondary(usize),
}
struct RangeSet {
primary_range: Option<RangeInclusive<usize>>,
secondary_ranges: Vec<RangeInclusive<usize>>,
}
impl RangeSet {
/*fn get(&self, index: usize) -> Option<&RangeInclusive<usize>> {
if self.primary_range.is_some() {
if index == 0 {
return self.primary_range.as_ref();
} else {
self.secondary_ranges.get(index - 1)
}
} else {
self.secondary_ranges.get(index)
}
}
fn len(&self) -> usize {
self.secondary_ranges.len() + if self.primary_range.is_some() { 1 } else { 0 }
}*/
fn iter(&self) -> impl Iterator<Item = &RangeInclusive<usize>> {
self.primary_range
.iter()
.chain(self.secondary_ranges.iter())
}
fn iter_labeled(&self) -> impl Iterator<Item = (LabelType, &RangeInclusive<usize>)> {
self.primary_range
.iter()
.map(|range| (LabelType::Primary, range))
.chain(
self.secondary_ranges
.iter()
.enumerate()
.map(|(index, range)| (LabelType::Secondary(index), range)),
)
}
fn get_first_bound_above(&self, line_number: Option<usize>) -> Option<usize> {
self.iter()
.filter_map(|range| {
let start = *range.start();
let end = *range.end();
let start_ok = line_number.is_none_or(|n| start > n).then_some(start);
let end_ok = line_number.is_none_or(|n| end > n).then_some(end);
match (start_ok, end_ok) {
(Some(a), Some(b)) => Some(a.min(b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}
})
.min()
}
}
// Converts labeled line ranges into an ordered stream of renderer events.
//
// Important invariants:
//
// 1. Commands are ordered by increasing significant line.
// A significant line is any line on which some label starts or ends.
//
// 2. If multiple labels would visually terminate on the same source line,
// the renderer treats them as ending on distinct phantom rows, ordered by
// diagnostic priority (primary/secondary order). This prevents intersections
// and means that same-line closings are intentionally linearized rather than
// treated as a geometric tie.
//
// 3. RendererCommands do not store source line numbers directly.
// Later rendering recovers the underlying span from LabelType and uses the
// event order to know when labels become active/inactive.
//
// 4. When a label starts on the same significant line where another label ends,
// starts are processed first. This is intentional: longer-lived/opening labels
// must occupy earlier columns so that shorter-lived/closing labels bend around
// them without intersecting.
fn make_renderer_commands(ranges: RangeSet) -> Vec<(usize, RendererCommands)> {
// Maps currently-open labels to the index of their StartRange command so that
// we can patch in the final column once the label closes.
let mut open_ranges = HashMap::new();
let mut commands = Vec::new();
let mut current_line = None;
while let Some(next_significant_line) = ranges.get_first_bound_above(current_line) {
current_line = Some(next_significant_line);
// First process all new ranges because they'll live longer and have
// to have earlier columns
for (label, range) in ranges.iter_labeled() {
if *range.start() == next_significant_line {
if range.start() != range.end() {
commands.push((
*range.start(),
RendererCommands::StartRange {
label_type: label,
column: 0,
},
));
open_ranges.insert(label, commands.len() - 1);
} else {
commands.push((
*range.start(),
RendererCommands::SingleRange { label_type: label },
));
}
}
}
// Closing pass.
// The assigned column is the number of ranges that remain open after removing
// this label. Because same-line visual ties are already linearized by label
// priority / phantom rows, processing labels in iter_labeled() order is
// intentional here.
for (label, range) in ranges.iter_labeled() {
if *range.end() == next_significant_line {
if let Some(index) = open_ranges.remove(&label) {
// Column meaning:
// 0 = outermost / earliest lane
// larger values = further inward lanes
//
// We assign the column at close time, not at open time, because the final lane
// depends on which other ranges outlive this one.
let column = open_ranges.len();
if let Some((line_number, RendererCommands::StartRange { .. })) =
commands.get(index)
{
commands[index] = (
*line_number,
RendererCommands::StartRange {
label_type: label,
column,
},
);
}
commands.push((
*range.end(),
RendererCommands::FinishRange {
label_type: label,
column,
},
));
}
}
}
}
commands
}
fn max_line_number_width(ranges: &RangeSet) -> usize {
let max_line = ranges.iter().map(|range| *range.end()).max().unwrap_or(0);
if max_line == 0 {
1
} else {
max_line.ilog10() as usize + 1
}
}
fn span_to_range<'src>(
span: TokenSpan,
file: &TokenizedFile<'src>,
) -> Option<RangeInclusive<usize>> {
let start_line = file.token_line(span.start)?;
let end_line = file.token_line(span.end)?;
if start_line <= end_line {
Some(start_line..=end_line)
} else {
None
}
}
fn make_ranges<'src>(file: &TokenizedFile<'src>, diagnostic: &Diagnostic) -> RangeSet {
let mut result = RangeSet {
primary_range: None,
secondary_ranges: Vec::new(),
};
result.primary_range = diagnostic
.primary_label()
.and_then(|label| span_to_range(label.span, file));
for secondary in diagnostic.secondary_labels() {
if let Some(range) = span_to_range(secondary.span, file) {
result.secondary_ranges.push(range);
}
}
result
}
impl Diagnostic {
pub fn render<'src>(&self, file: &TokenizedFile<'src>, file_path: impl Into<String>) {
self.render_header();
println!("{INDENT}{}: {}", "in file".blue().bold(), file_path.into());
self.render_lines(file);
self.render_help_and_notes(file);
}
/*StartRange {
label_type: LabelType,
column: usize,
},
FinishRange {
label_type: LabelType,
},
SingleRange {
label_type: LabelType,
}, */
fn label_data(&self, label_type: LabelType) -> Option<(TokenSpan, String)> {
match label_type {
LabelType::Primary => self
.primary_label()
.map(|label| (label.span, label.message.clone())),
LabelType::Secondary(id) => Some((
self.secondary_labels()[id].span,
self.secondary_labels()[id].message.clone(),
)),
}
}
fn render_lines<'src>(&self, file: &TokenizedFile<'src>) {
let ranges = make_ranges(file, &self);
let max_line_number_width = max(max_line_number_width(&ranges), 3);
let commands = make_renderer_commands(ranges);
let mut max_column = 0;
for command in &commands {
if let (_, RendererCommands::StartRange { column, .. }) = command {
max_column = max(max_column, *column);
}
}
let mut vertical_stack = Vec::new();
vertical_stack.resize(max_column + 1, None);
let mut i = 0;
while i < commands.len() {
let mut current_line = commands[i].0;
let mut single_commands = Vec::new();
let mut start_commands = Vec::new();
let mut finish_commands = Vec::new();
while i < commands.len() && current_line == commands[i].0 {
match commands[i].1 {
RendererCommands::SingleRange { label_type } => {
single_commands.push(label_type)
}
RendererCommands::StartRange { label_type, column } => {
start_commands.push((label_type, column));
}
RendererCommands::FinishRange { label_type, column } => {
finish_commands.push((label_type, column))
}
}
i += 1;
}
// !!!!!!!!!!!!!!!!
// First - update line drawing stack
// First - update line drawing stack
for &(label_type, column) in &start_commands {
vertical_stack[column] = Some(label_type);
}
// Next - draw the line
self.draw_line_with_starts(
current_line,
max_line_number_width,
file,
&vertical_stack,
&start_commands,
);
for label_type in single_commands {
self.render_single_command(
label_type,
max_line_number_width,
file,
&vertical_stack,
);
}
// Next - render finish commands (drop for now)
for (label_type, column) in finish_commands {
self.render_finish_command(
label_type,
max_line_number_width,
file,
&vertical_stack,
);
vertical_stack[column] = None;
}
// !!!!!!!!!!!!!!!!
// Render some more lines
let mut countdown = 3;
current_line += 1;
while i < commands.len() && current_line < commands[i].0 {
if countdown == 0 {
if current_line + 1 == commands[i].0 {
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
} else {
println!(
"{}",
self.make_line_prefix(
LineIndexType::Ellipsis,
max_line_number_width,
&vertical_stack
)
);
}
break;
} else {
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
}
current_line += 1;
countdown -= 1;
}
}
}
fn render_single_command<'src>(
&self,
label_type: LabelType,
max_line_number_width: usize,
file: &TokenizedFile<'src>,
vertical_stack: &[Option<LabelType>],
) {
let Some((span, message)) = self.label_data(label_type) else {
return;
};
let Some(visible) = file.span_visible_on_line(span) else {
return;
};
let mut builder = self.make_line_prefix(
LineIndexType::Missing,
max_line_number_width,
vertical_stack,
);
builder.push_str(&" ".repeat(visible.columns.start));
let underline_width = (visible.columns.end - visible.columns.start).max(1);
let mut underline_label = if label_type == LabelType::Primary {
"^".repeat(underline_width)
} else {
"-".repeat(underline_width)
};
underline_label.push_str(&format!(" {}", message));
match label_type {
LabelType::Primary => {
if self.severity == Severity::Error {
builder.push_str(&underline_label.red().bold().to_string());
} else {
builder.push_str(&underline_label.yellow().bold().to_string());
}
}
LabelType::Secondary(_) => {
builder.push_str(&underline_label.blue().bold().to_string());
}
}
println!("{builder}");
}
fn render_finish_command<'src>(
&self,
label_type: LabelType,
max_line_number_width: usize,
file: &TokenizedFile<'src>,
vertical_stack: &[Option<LabelType>],
) {
let Some((span, message)) = self.label_data(label_type) else {
return;
};
let Some(visible) = file
.token_visible_spans(span.end)
.and_then(|spans| spans.into_iter().last())
else {
return;
};
let mut builder =
self.make_finish_prefix(max_line_number_width, vertical_stack, label_type);
builder.push_str(&"".repeat(visible.columns.start).red().to_string());
let underline_width = (visible.columns.end - visible.columns.start).max(1);
let mut underline_label = "^".repeat(underline_width);
underline_label.push_str(&format!(" {}", message));
match label_type {
LabelType::Primary => {
if self.severity == Severity::Error {
builder.push_str(&underline_label.red().bold().to_string());
} else {
builder.push_str(&underline_label.yellow().bold().to_string());
}
}
LabelType::Secondary(_) => {
builder.push_str(&underline_label.blue().bold().to_string());
}
}
println!("{builder}");
}
fn draw_line<'src>(
&self,
current_line: usize,
max_line_number_width: usize,
file: &TokenizedFile<'src>,
vertical_stack: &[Option<LabelType>],
) {
println!(
"{}{}",
self.make_line_prefix(
LineIndexType::Normal(current_line),
max_line_number_width,
vertical_stack
),
file.line_text(current_line).unwrap_or_default()
);
}
fn draw_line_with_starts<'src>(
&self,
current_line: usize,
max_line_number_width: usize,
file: &TokenizedFile<'src>,
vertical_stack: &[Option<LabelType>],
start_commands: &[(LabelType, usize)],
) {
println!(
"{}{}",
self.make_start_prefix(
LineIndexType::Normal(current_line),
max_line_number_width,
vertical_stack,
start_commands,
),
file.line_text(current_line).unwrap_or_default()
);
}
fn make_line_prefix<'src>(
&self,
current_line: LineIndexType,
max_line_number_width: usize,
vertical_stack: &[Option<LabelType>],
) -> String {
let line_text = match current_line {
LineIndexType::Normal(current_line) => (current_line + 1).to_string(),
LineIndexType::Missing => "".to_string(),
LineIndexType::Ellipsis => "...".to_string(),
};
let line_padding = " ".repeat(max_line_number_width - line_text.len());
let mut builder = format!(" {}{} | ", line_padding, line_text)
.blue()
.bold()
.to_string();
for vertical_line in vertical_stack {
if let Some(label) = vertical_line {
let piece = match label {
LabelType::Primary => {
if self.severity == Severity::Error {
"".red()
} else {
"".yellow()
}
}
LabelType::Secondary(_) => "".blue(),
}
.to_string();
builder.push_str(&piece);
} else {
builder.push_str(" ");
}
}
builder
}
fn make_start_prefix(
&self,
current_line: LineIndexType,
max_line_number_width: usize,
vertical_stack: &[Option<LabelType>],
start_commands: &[(LabelType, usize)],
) -> String {
let line_text = match current_line {
LineIndexType::Normal(current_line) => (current_line + 1).to_string(),
LineIndexType::Missing => "".to_string(),
LineIndexType::Ellipsis => "...".to_string(),
};
let line_padding = " ".repeat(max_line_number_width - line_text.len());
let mut builder = format!(" {}{} | ", line_padding, line_text)
.blue()
.bold()
.to_string();
for (column, vertical_line) in vertical_stack.iter().enumerate() {
let piece = match vertical_line {
Some(label) => {
let starts_here = start_commands.iter().any(|(start_label, start_column)| {
*start_label == *label && *start_column == column
});
match label {
LabelType::Primary => {
if self.severity == Severity::Error {
if starts_here {
"".red()
} else {
"".red()
}
} else {
if starts_here {
"".yellow()
} else {
"".yellow()
}
}
}
LabelType::Secondary(_) => {
if starts_here {
"".blue()
} else {
"".blue()
}
}
}
.to_string()
}
None => " ".to_string(),
};
builder.push_str(&piece);
}
builder
}
fn make_finish_prefix(
&self,
max_line_number_width: usize,
vertical_stack: &[Option<LabelType>],
finishing_label: LabelType,
) -> String {
let line_text = "";
let line_padding = " ".repeat(max_line_number_width - line_text.len());
let mut builder = format!(" {}{} | ", line_padding, line_text)
.blue()
.bold()
.to_string();
for vertical_line in vertical_stack {
let piece = match vertical_line {
Some(label) if *label == finishing_label => match label {
LabelType::Primary => {
if self.severity == Severity::Error {
"".red()
} else {
"".yellow()
}
}
LabelType::Secondary(_) => "".blue(),
}
.to_string(),
Some(label) => match label {
LabelType::Primary => {
if self.severity == Severity::Error {
"".red()
} else {
"".yellow()
}
}
LabelType::Secondary(_) => "".blue(),
}
.to_string(),
None => " ".to_string(),
};
builder.push_str(&piece);
}
builder
}
fn render_header(&self) {
let severity_label = match self.severity {
Severity::Error => "error".red(),
Severity::Warning => "warning".yellow(),
};
if let Some(ref code) = self.code {
println!(
"{}",
format!("{}[{}]: {}", severity_label, code, self.headline).bold()
);
} else {
println!(
"{}",
format!("{}: {}", severity_label, self.headline).bold()
);
}
}
fn render_help_and_notes<'src>(&self, file: &TokenizedFile<'src>) {
if self.help().is_none() && self.notes().is_empty() {
return;
}
let ranges = make_ranges(file, self);
let max_line_number_width = max(max_line_number_width(&ranges), 3);
// Blank gutter separator, like rustc's trailing `|`
println!(
"{}",
format!(" {} |", " ".repeat(max_line_number_width))
.blue()
.bold()
);
if let Some(help) = self.help() {
self.render_trailer_line("help", help, max_line_number_width);
}
for note in self.notes() {
self.render_trailer_line("note", note, max_line_number_width);
}
}
fn render_trailer_line(&self, kind: &str, message: &str, max_line_number_width: usize) {
let prefix = format!(" {} = ", " ".repeat(max_line_number_width))
.blue()
.bold()
.to_string();
let kind = match kind {
"help" => "help".green().bold().to_string(),
"note" => "note".blue().bold().to_string(),
_ => kind.bold().to_string(),
};
println!("{prefix}{kind}: {message}");
}
}

View File

@ -1,83 +0,0 @@
//! Debug-only helpers for [`TokenizedFile`]
//!
//! This module is **compiled only if**
//!
//! * the current build profile has `debug_assertions` enabled, or
//! * the crate is built with the `debug` cargo feature.
//!
//! These checks have been moved to the parent module.
/// A technical trait that adds debug helpers to the lexer.
pub trait DebugTools {
/// Pretty-prints the internal layout of the tokenised file - useful when
/// writing new passes or hunting lexer bugs.
///
/// This method writes the layout directly to standard output.
///
/// The format is unspecified, may change, and is not intended for
/// external tools.
///
/// Each line in the printed layout starts with its 0-based number for
/// convenience.
fn dump_debug_layout(&self);
/// Reconstructs the exact, lossless source text that was fed to
/// [`super::TokenizedFile::from_source`] from internal representation -
/// useful for manually verifying that the lexer works.
fn reconstruct_source(&self) -> String;
}
impl<'src> DebugTools for super::TokenizedFile<'src> {
fn reconstruct_source(&self) -> String {
self.buffer.iter().map(|span| span.lexeme).collect()
}
fn dump_debug_layout(&self) {
for (row_idx, line) in self.lines.iter().enumerate() {
println!("Line {}", row_idx + 1);
match (line.continued_from, line.local_range()) {
// Stand-alone line (all tokens start here)
(None, Some(range)) => {
println!("\t[Standalone]");
dump_spans(&self.buffer[range.clone()]);
}
// Pure continuation - the only thing on this line is
// the remainder of a multi-line token that started earlier.
(Some(origin_row), None) => {
println!(
"\t[Continued from line {} - no new tokens here]",
origin_row + 1
);
}
// Continuation **plus** some fresh tokens that begin here.
(Some(origin_row), Some(range)) => {
println!("\t[Continued from line {} + new tokens]", origin_row + 1);
dump_spans(&self.buffer[range.clone()]);
}
// An empty physical line (should be rare, but let's be safe).
(None, None) => {
println!("\t[Empty line]");
}
}
}
}
}
/// Helper that prints every span in `spans` together with its UTF-16
/// column boundaries.
fn dump_spans<'src>(spans: &[super::TokenPiece<'src>]) {
let mut col_utf16 = 0usize;
for span in spans {
let start = col_utf16;
let end = start + span.length_utf16;
println!(
"\t\t{:?} @ {}-{}: {:?}",
span.token, start, end, span.lexeme
);
col_utf16 = end;
}
}

View File

@ -1,200 +0,0 @@
//! Sub-module that adds an iterator to [`TokenizedFile`] which yields tokens in
//! the order they appear in the source code.
//!
//! ## Examples
//!
//! ```rust
//! let iter = TokenizedFile::from_str("0 / 0").tokens().without_whitespace();
//! ```
//!
//! ## Terminology: continued tokens
//!
//! Some [`super::Token`]s (e.g. [`super::Token::CppText`] or
//! [`super::Token::BlockComment`] can span multiple lines and are recorded on
//! every line on which they appear (usually as the first, and sometimes
//! the only, token).
//! In this module these are referred to as "continued" or
//! "carried-over" tokens.
//! Since our iterator needs to return each token only once, we take special
//! care to skip such continued tokens during iteration.
use super::{TokenLocation, TokenPiece, TokenizedFile};
/// An immutable iterator over all tokens in a [`TokenizedFile`], preserving
/// their order of appearance in the original source file.
///
/// After exhaustion it keeps returning [`None`].
#[must_use]
#[derive(Clone, Debug)]
pub struct Tokens<'src> {
/// [`TokenLocation`] of the next token to be returned.
cursor: TokenLocation,
/// [`TokenizedFile`] whose tokens we're iterating over.
source_file: &'src TokenizedFile<'src>,
/// When `true`, whitespace tokens are skipped.
skip_whitespace: bool,
}
// Because we can only return [`None`] after we've returned it once.
impl<'src> std::iter::FusedIterator for Tokens<'src> {}
impl<'src> Tokens<'src> {
/// Makes the iterator skip all whitespace tokens.
#[must_use]
#[inline]
pub fn without_whitespace(mut self) -> Self {
self.skip_whitespace = true;
self
}
// Returns the position of the next new token, skipping carried-over pieces
// and blank lines.
fn advance_position(&self, position: TokenLocation) -> TokenLocation {
let TokenLocation::Position {
mut line,
mut column,
} = position
else {
return TokenLocation::EndOfFile;
};
if let Some(current_line) = self.source_file.lines.get(line) {
// `Line::len()` also counts a possible token that continued from
// the previous line.
if column + 1 < current_line.len() {
column += 1;
return TokenLocation::Position { line, column };
}
}
// Current line is exhausted: walk downward until we find the first line
// that **owns local tokens**, because we only want *new* token,
// not continued from previous lines (they were already iterated over).
line += 1;
while let Some(next_line) = self.source_file.lines.get(line) {
if next_line.local_range().is_some() {
// Start at the first *local* token,
// skipping any carried-over one
column = if next_line.continued_from.is_some() {
1
} else {
0
};
return TokenLocation::Position { line, column };
}
line += 1; // keep skipping empty / pure-carried lines
}
// No more tokens.
TokenLocation::EndOfFile
}
// Creates a new iterator.
fn new(source_file: &'src TokenizedFile) -> Tokens<'src> {
let mut new_iterator = Tokens {
source_file,
cursor: TokenLocation::Position { line: 0, column: 0 },
skip_whitespace: false,
};
// We need to land on the first existing token so [`Iterator::next`]
// can assume cursor is valid.
while new_iterator.cursor != TokenLocation::EndOfFile {
if new_iterator.source_file.get(new_iterator.cursor).is_some() {
break;
}
new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor);
}
new_iterator
}
}
impl<'src> Iterator for Tokens<'src> {
type Item = (TokenLocation, TokenPiece<'src>);
fn next(&mut self) -> Option<Self::Item> {
// We only ever loop to discard whitespaces when the flag is on
while self.cursor != TokenLocation::EndOfFile {
let token_location = self.cursor;
let token_piece = *self.source_file.get(self.cursor)?;
self.cursor = self.advance_position(self.cursor);
// Optional whitespace-skip
if !self.skip_whitespace || !token_piece.token.is_whitespace() {
return Some((token_location, token_piece));
}
}
None
}
}
impl<'src> TokenizedFile<'src> {
// Returns the final local token in `line_number`
// (used to resolve column 0 of a continued line).
fn last_piece_in_line(&self, line_number: usize) -> Option<&TokenPiece> {
self.lines
.get(line_number)
.and_then(|line| line.local_range())
// `Line::local_range()` is guaranteed to return non-empty `Range`.
.and_then(|range| self.buffer.get(range.end - 1))
}
/// Returns [`TokenPiece`] at a given location if it exists.
///
/// If the line specified by [`TokenLocation`] starts with a token that
/// continues from the previous line - column `0` refers to that token.
///
/// Never panics, invalid position returns [`None`].
///
/// ## Examples
///
/// ```rust
/// use super::{TokenizedFile, TokenLocation, Token};
/// let file = TokenizedFile::from_str("0 / 0");
/// assert_eq!(
/// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),
/// Some(Token::Divide),
/// );
/// ```
#[track_caller]
pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> {
let TokenLocation::Position { line, column } = position else {
return None;
};
let line = self.lines.get(line)?;
let column = column;
if column >= line.len() {
return None;
}
if let Some(spanned_line_number) = line.continued_from
&& column == 0
{
self.last_piece_in_line(spanned_line_number)
} else {
// If we have a token that continued from the previous line,
// then, relative to `self.buffer`, our `column` is actually 1-based
// and we need to shift it back to being 0-based.
let token_position =
line.local_range.start + column - if line.continued_from.is_some() { 1 } else { 0 };
self.buffer.get(token_position)
}
}
/// Returns an iterator over all contained tokens in the order they appear
/// in the original source file.
///
/// By default includes all tokens, including whitespace and comments.
///
/// Returns the same iterator as [`TokenizedFile::into_iter`]
#[must_use]
#[inline]
pub fn tokens(&'src self) -> Tokens<'src> {
Tokens::new(self)
}
}
impl<'src> IntoIterator for &'src TokenizedFile<'src> {
type Item = (TokenLocation, TokenPiece<'src>);
type IntoIter = Tokens<'src>;
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.tokens()
}
}

View File

@ -1,526 +0,0 @@
//! Lexer for UnrealScript that understands inline `cpptext { ... }` blocks.
//!
//! ## Notable details
//!
//! Lexer for UnrealScript that recognizes inline `cpptext { ... }` blocks.
//!
//! In UnrealScript, `cpptext` lets authors embed raw C++ between braces.
//! Because whitespace, newlines, or comments may appear between the
//! `cpptext` keyword and the opening `{`, the lexer must remember that
//! it has just seen `cpptext` - hence a state machine.
//!
//! ## Modes
//!
//! - **Normal** - ordinary UnrealScript tokens.
//! - **AwaitingCppBlock** - after `cpptext`, waiting for the next `{`.
//!
//! When that brace arrives, the lexer consumes the entire C++ block as
//! one token (`Token::Brace(BraceKind::CppBlock)`), tracking nested
//! braces, strings, and comments on the way. If the closing `}` is
//! missing, everything to EOF is treated as C++; downstream parsers must
//! handle that gracefully.
use logos::Lexer;
/// Which lexer mode we're in. See the module docs for the full story.
#[derive(Default, Clone, Copy, PartialEq, Eq)]
enum LexerMode {
/// Lexing regular UnrealScript.
#[default]
Normal,
/// Saw `cpptext`; waiting for the opening `{` of a C++ block.
AwaitingCppBlock,
}
/// Extra per-lexer state. Currently just holds the [`Mode`].
///
/// This is a logos-specific implementation detail.
#[derive(Default)]
pub struct LexerState {
mode: LexerMode,
}
/// Are these braces "real" UnrealScript braces, or the start/end of a C++ block?
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BraceKind {
Normal,
CppBlock,
}
/// All UnrealScript tokens that our compiler distinguishes.
#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[logos(extras = LexerState)]
pub enum Token {
// # Compiler/directive keywords
#[regex(r"(?i)#exec[^\r\n]*(\r|\n|\r\n)")]
ExecDirective,
#[regex("(?i)cpptext", |lex| { lex.extras.mode = LexerMode::AwaitingCppBlock; })]
CppText,
// # Declaration & structural keywords
#[regex("(?i)class")]
Class,
#[regex("(?i)struct")]
Struct,
#[regex("(?i)enum")]
Enum,
#[regex("(?i)state")]
State,
#[regex("(?i)function")]
Function,
#[regex("(?i)event")]
Event,
#[regex("(?i)delegate")]
Delegate,
#[regex("(?i)var")]
Var,
#[regex("(?i)local")]
Local,
// # Inheritance, interface, dependencies
#[regex("(?i)extends")]
Extends,
#[regex("(?i)dependson")]
DependsOn,
// # Access modifiers & properties
#[regex("(?i)private")]
Private,
#[regex("(?i)protected")]
Protected,
#[regex("(?i)public")]
Public,
#[regex("(?i)const")]
Const,
#[regex("(?i)static")]
Static,
#[regex("(?i)native")]
Native,
#[regex("(?i)abstract")]
Abstract,
#[regex("(?i)deprecated")]
Deprecated,
// # UnrealScript metadata/specifiers
#[regex("(?i)default")]
Default,
#[regex("(?i)defaultproperties")]
DefaultProperties,
#[regex("(?i)optional")]
Optional,
#[regex("(?i)config")]
Config,
#[regex("(?i)perobjectconfig")]
PerObjectConfig,
#[regex("(?i)globalconfig")]
GlobalConfig,
#[regex("(?i)collapsecategories")]
CollapseCategories,
#[regex("(?i)dontcollapsecategories")]
DontCollapseCategories,
#[regex("(?i)hidecategories")]
HideCategories,
#[regex("(?i)localized")]
Localized,
#[regex("(?i)placeable")]
Placeable,
#[regex("(?i)notplaceable")]
NotPlaceable,
#[regex("(?i)editinlinenew")]
EditInlineNew,
#[regex("(?i)noteditinlinenew")]
NotEditInlineNew,
#[regex("(?i)dynamicrecompile")]
DynamicRecompile,
#[regex("(?i)transient")]
Transient,
#[regex("(?i)operator")]
Operator,
#[regex("(?i)simulated")]
Simulated,
#[regex("(?i)latent")]
Latent,
#[regex("(?i)iterator")]
Iterator,
#[regex("(?i)out")]
Out,
#[regex("(?i)skip")]
Skip,
#[regex("(?i)singular")]
Singular,
#[regex("(?i)coerce")]
Coerce,
#[regex("(?i)assert")]
Assert,
#[regex("(?i)ignores")]
Ignores,
#[regex("(?i)within")]
Within,
#[regex("(?i)noexport")]
NoExport,
// # Replication-related
#[regex("(?i)reliable")]
Reliable,
#[regex("(?i)unreliable")]
Unreliable,
#[regex("(?i)replication")]
Replication,
#[regex("(?i)nativereplication")]
NativeReplication,
// # Control-flow keywords
#[regex("(?i)goto")]
Goto,
#[regex("(?i)if")]
If,
#[regex("(?i)else")]
Else,
#[regex("(?i)switch")]
Switch,
#[regex("(?i)case")]
Case,
#[regex("(?i)for")]
For,
#[regex("(?i)foreach")]
ForEach,
#[regex("(?i)while")]
While,
#[regex("(?i)do")]
Do,
#[regex("(?i)until")]
Until,
#[regex("(?i)break")]
Break,
#[regex("(?i)continue")]
Continue,
#[regex("(?i)return")]
Return,
// # Built-in types
#[regex("(?i)int")]
Int,
#[regex("(?i)float")]
Float,
#[regex("(?i)bool")]
Bool,
#[regex("(?i)byte")]
Byte,
#[regex("(?i)string")]
String,
#[regex("(?i)array")]
Array,
#[regex("(?i)name")]
Name,
// # Literals & identifiers
#[regex(r"0[xX][0-9A-Fa-f]+|[0-9]+")]
IntegerLiteral,
#[regex(r"[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?")]
FloatLiteral,
#[regex(r#""([^"\\\r\n]|\\.)*""#)]
StringLiteral,
#[regex(r"'[a-zA-Z0-9_\. \-]*'")]
NameLiteral,
#[regex("(?i)true")]
True,
#[regex("(?i)false")]
False,
#[regex("(?i)none")]
None,
#[regex("(?i)self")]
SelfKeyword,
#[regex("(?i)new")]
New,
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
// # Operations
// ## Exponentiation
#[token("**")]
Exponentiation,
// ## Unary
#[token("++")]
Increment,
#[token("--")]
Decrement,
#[token("!")]
Not,
#[token("~")]
BitwiseNot,
// ## Vector
#[regex("(?i)dot")]
Dot,
#[regex("(?i)cross")]
Cross,
// ## Multiplicative
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("%")]
Modulo,
// ## Additive
#[token("+")]
Plus,
#[token("-")]
Minus,
// ## String manipulation
#[token("@")]
ConcatSpace,
#[token("$")]
Concat,
// ## Shifts
#[token("<<")]
LeftShift,
#[token(">>>")]
LogicalRightShift,
#[token(">>")]
RightShift,
// ## Relational
#[token("<")]
Less,
#[token("<=")]
LessEqual,
#[token(">")]
Greater,
#[token(">=")]
GreaterEqual,
#[token("==")]
Equal,
#[token("!=")]
NotEqual,
#[token("~=")]
ApproximatelyEqual,
#[regex("(?i)clockwisefrom")]
ClockwiseFrom,
// ## Bitwise
#[token("&")]
BitwiseAnd,
#[token("|")]
BitwiseOr,
#[token("^")]
BitwiseXor,
// ## Logical
#[token("&&")]
And,
#[token("^^")]
Xor,
#[token("||")]
Or,
// ## Assigments
#[token("=")]
Assign,
#[token("*=")]
MultiplyAssign,
#[token("/=")]
DivideAssign,
#[token("%=")]
ModuloAssign,
#[token("+=")]
PlusAssign,
#[token("-=")]
MinusAssign,
#[token("$=")]
ConcatAssign,
#[token("@=")]
ConcatSpaceAssign,
// # Punctuation & delimiters
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("{", handle_brace)]
Brace(BraceKind),
#[token("}")]
RightBrace,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token(";")]
Semicolon,
#[token(",")]
Comma,
#[token(".")]
Period,
#[token(":")]
Colon,
#[token("#")]
Hash,
#[token("?")]
Question,
// # Comments & whitespaces
#[regex(r"//[^\r\n]*")]
LineComment,
#[regex(r"/\*", handle_block_comment)]
BlockComment,
#[regex(r"\r\n|\n|\r")]
Newline,
#[regex(r"[ \t]+")]
Whitespace,
// # Technical
Error,
}
impl Token {
/// Returns `true` if this token is a newline (`Token::NewLine`).
pub fn is_newline(&self) -> bool {
matches!(self, Token::Newline)
}
/// Returns `true` if this token is trivia whitespace
/// (`Token::Whitespace` or `Token::NewLine`).
///
/// Note: comments are **not** considered whitespace.
pub fn is_whitespace(&self) -> bool {
matches!(&self, Token::Whitespace | Token::Newline)
}
/// Returns `true` if this token may span multiple physical lines
/// (i.e. can contain newline characters).
pub fn can_span_lines(&self) -> bool {
matches!(
self,
Token::BlockComment | Token::Brace(BraceKind::CppBlock) | Token::Error
)
}
/// Returns `true` if this token can appear in type position
/// (either a built-in type keyword or an identifier).
pub fn is_valid_type_name_token(&self) -> bool {
matches!(
self,
Token::Int
| Token::Float
| Token::Bool
| Token::Byte
| Token::String
| Token::Array
| Token::Name
| Token::Identifier
)
}
}
/// Consume a /* ... */ block comment with arbitrary nesting
/// (like UnrealScript allows).
///
/// Matches the whole comment (delimiters included) or [`None`] if the file ends
/// before every `/*` is closed.
fn handle_block_comment(lexer: &mut Lexer<Token>) -> Option<()> {
let mut comment_depth = 1;
while let Some(next_char) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("/*") {
comment_depth += 1;
lexer.bump(2);
continue;
}
if lexer.remainder().starts_with("*/") {
comment_depth -= 1;
lexer.bump(2);
if comment_depth == 0 {
return Some(());
}
continue;
}
lexer.bump(next_char.len_utf8());
}
// Unterminated comment
None
}
/// Called for every `{`.
///
/// This method either emits an opening brace or token for `cppblock`,
/// depending on lexer's current state.
fn handle_brace(lexer: &mut Lexer<Token>) -> Option<BraceKind> {
match lexer.extras.mode {
LexerMode::Normal => Some(BraceKind::Normal),
LexerMode::AwaitingCppBlock => {
lexer.extras.mode = LexerMode::Normal;
consume_cpp_block(lexer);
Some(BraceKind::CppBlock)
}
}
}
/// Consumes a complete C++ block, handling:
/// - Nested `{...}` pairs
/// - String literals (`"..."` and `'...'`), including escaped quotes
/// - Line comments (`// ...\n`)
/// - Block comments (`/* ... */`)
///
/// Leaves the lexer positioned immediately after the closing `}` of the block.
/// The opening `{` must have already been consumed by the caller.
fn consume_cpp_block(lexer: &mut Lexer<Token>) {
let mut depth = 1;
while let Some(ch) = lexer.remainder().chars().next() {
match ch {
'{' => {
depth += 1;
lexer.bump(1);
}
'}' => {
depth -= 1;
lexer.bump(1);
if depth == 0 {
break;
}
}
'/' if lexer.remainder().starts_with("/*") => {
lexer.bump(2); // consuming two-byte sequence `/*`
consume_c_comment(lexer)
}
'/' if lexer.remainder().starts_with("//") => {
lexer.bump(2); // consuming two-byte sequence `//`
while let Some(c) = lexer.remainder().chars().next() {
lexer.bump(c.len_utf8());
if c == '\n' {
break;
}
}
}
'"' | '\'' => {
lexer.bump(1); // skip `'` or `"`
consume_string_literal(lexer, ch);
}
_ => lexer.bump(ch.len_utf8()),
}
}
}
/// Consume over a C-style `/* ... */` comment (without nesting).
///
/// Assumes that opener `/*` is already consumed.
fn consume_c_comment(lexer: &mut Lexer<Token>) {
while let Some(next_character) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("*/") {
lexer.bump(2);
break;
} else {
lexer.bump(next_character.len_utf8());
}
}
}
/// Consume a string literal from C++ code.
///
/// Assumes that opening quotation mark is already consumed.
fn consume_string_literal(lexer: &mut Lexer<Token>, delimiter: char) {
while let Some(next_character) = lexer.remainder().chars().next() {
lexer.bump(next_character.len_utf8());
if next_character == '\\' {
// Skip the escaped character
if let Some(next) = lexer.remainder().chars().next() {
lexer.bump(next.len_utf8());
}
} else if next_character == delimiter {
return;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,313 @@
//! # Query helpers
//!
//! Read-only convenience APIs for inspecting a [`TokenizedFile`] without
//! exposing its internal representation.
use crate::lexer::{Line, TokenData, TokenPosition, TokenizedFile, VisibleLineSpan};
impl<'src> TokenizedFile<'src> {
pub const fn eof(&self) -> TokenPosition {
TokenPosition(self.buffer.len())
}
pub fn is_eof(&self, position: &TokenPosition) -> bool {
position == &self.eof()
}
/// Returns the number of physical lines stored in this file.
///
/// Empty line after the trailing newline sequence isn't counted as a line
/// by this method.
#[must_use]
pub const fn line_count(&self) -> usize {
self.lines.len()
}
/// Returns an iterator over tokens that *start* on physical line
/// `line_number`.
///
/// The yielded items are `(TokenPosition, TokenData)` pairs, matching the
/// canonical file-wide token arena.
///
/// If the line ends with a newline token, that newline token is included.
///
/// If the line begins with a carried fragment of a multi-line token that
/// started on an earlier line, that fragment is **not** yielded here.
/// Use [`TokenizedFile::line_text`] to reconstruct the visible content of
/// the full line.
///
/// If `line_number` is out of bounds, the returned iterator is empty.
#[must_use]
pub fn line_tokens(
&self,
line_number: usize,
) -> std::vec::IntoIter<(TokenPosition, TokenData<'src>)> {
let Some(line) = self.lines.get(line_number) else {
return Vec::new().into_iter();
};
let Some(local_range) = line.local_range() else {
return Vec::new().into_iter();
};
let mut out = Vec::with_capacity(local_range.len());
for buffer_index in local_range {
// Invariant:
// `Line::local_range()` is always constructed from contiguous
// slices of `self.buffer` during tokenization, so every index in
// this range must be valid for `self.buffer`.
let token_data = self.buffer[buffer_index];
out.push((TokenPosition(buffer_index), token_data));
}
out.into_iter()
}
/// Returns the token stored at `position`, if that position is valid.
///
/// This is a direct lookup into the file-wide token buffer.
#[must_use]
pub fn token_at(&self, position: TokenPosition) -> Option<TokenData<'src>> {
self.buffer.get(position.0).copied()
}
/// Reconstructs the visible text of physical line `line_index`.
///
/// The returned string does **not** include a trailing line terminator.
///
/// Unlike [`TokenizedFile::line_tokens`], this method includes the visible
/// fragment of a multi-line token carried from an earlier line.
///
/// Returns [`None`] iff `line_index >= self.line_count()`.
#[must_use]
pub fn line_text(&self, line_index: usize) -> Option<String> {
let line = self.lines.get(line_index)?;
let mut out = String::new();
if let Some(piece) = self.carried_piece_for_line(line_index) {
out.push_str(piece);
}
let Some(range) = line.local_range() else {
return Some(out);
};
for buffer_index in range.clone() {
let token_piece = self.buffer[buffer_index];
if token_piece.token.is_newline() {
// Must be last token
debug_assert_eq!(buffer_index + 1, range.end);
break;
}
if token_piece.token.can_span_lines()
&& let Some(first_segment) = self
.multi_line_map
.get(&buffer_index)
.and_then(|segments| segments.first())
{
out.push_str(&token_piece.lexeme[first_segment.clone()]);
// Must be last token
debug_assert_eq!(buffer_index + 1, range.end);
break;
}
out.push_str(token_piece.lexeme);
}
Some(out)
}
/// Returns the 0-based physical line on which the token at `position`
/// starts.
///
/// For multi-line tokens, this is the line where the token begins, not
/// every physical line it spans.
///
/// Returns `None` if `position` is out of bounds.
#[must_use]
pub fn token_line(&self, position: TokenPosition) -> Option<usize> {
// EOF is a valid virtual position: past the end of the last stored line.
if position == self.eof() {
return self.line_count().checked_sub(1);
}
// Reject invalid non-EOF positions early.
self.buffer.get(position.0)?;
let line_index = self
.lines
.partition_point(|line| self.line_search_upper_bound(line) <= position.0);
(line_index < self.lines.len()).then_some(line_index)
}
/// Returns the exclusive upper token index bound for binary-searching
/// lines by token position.
///
/// In other words: every token that "belongs" to this line in start-line
/// terms has index `< returned_value`.
fn line_search_upper_bound(&self, line: &Line) -> usize {
if let Some(local_range) = line.local_range() {
local_range.end
} else {
// Pure continuation line: it contains only the carried fragment of
// a multi-line token that started earlier.
//
// That token is always the last local token on the origin line, so
// its token index + 1 acts as the exclusive upper bound.
let origin_line = line
.continued_from
.expect("empty line entry must be a continuation line");
self.carried_token_index(origin_line)
.expect("continuation line must point to a valid origin token")
+ 1
}
}
/// If `line_index` begins with a fragment of a multi-line token that
/// started earlier, returns the visible slice of that token for this line.
fn carried_piece_for_line(&self, line_index: usize) -> Option<&'src str> {
// Find carried, multiline token
let origin_line = self.lines.get(line_index)?.continued_from?;
let carried_token_index = self.carried_token_index(origin_line)?;
// Find right part of the multiline token's lexeme
let segments = self.multi_line_map.get(&carried_token_index)?;
let segment_index = line_index.checked_sub(origin_line)?;
let boundary = segments.get(segment_index)?;
self.buffer
.get(carried_token_index)?
.lexeme
.get(boundary.clone())
}
/// Recovers the token index of the multi-line token that started on
/// `origin_line` and is carried into later lines.
///
/// In the current representation, this is always the last local token that
/// started on the origin line.
fn carried_token_index(&self, origin_line: usize) -> Option<usize> {
let range = self.lines.get(origin_line)?.local_range()?;
let token_index = range.end.checked_sub(1)?;
debug_assert!(self.buffer[token_index].token.can_span_lines());
Some(token_index)
}
/// Returns the visible per-line spans occupied by the token at `position`.
///
/// Coordinates are expressed in visible character columns inside
/// `line_text(line)`, with an exclusive end bound.
///
/// Newline-only tokens have no visible text, so they return an empty vector.
///
/// Returns `None` if `position` is invalid.
#[must_use]
pub fn token_visible_spans(&self, position: TokenPosition) -> Option<Vec<VisibleLineSpan>> {
// EOF is a virtual zero-width span at the end of the last stored line.
if position == self.eof() {
let line = self.line_count().checked_sub(1)?;
let column = self.line_text(line)?.chars().count();
return Some(vec![VisibleLineSpan {
line,
columns: column..column,
}]);
}
let token_piece = self.buffer.get(position.0).copied()?;
let start_line = self.token_line(position)?;
let start_column = self.token_start_visible_column(position)?;
if token_piece.token.is_newline() {
return Some(Vec::new());
}
if let Some(segments) = self.multi_line_map.get(&position.0) {
let mut out = Vec::with_capacity(segments.len());
for (segment_index, byte_range) in segments.iter().enumerate() {
let visible_text = &token_piece.lexeme[byte_range.clone()];
let width = visible_text.chars().count();
if width == 0 {
continue;
}
let line = start_line + segment_index;
if line >= self.line_count() {
break;
}
let column_start = if segment_index == 0 { start_column } else { 0 };
out.push(VisibleLineSpan {
line,
columns: column_start..(column_start + width),
});
}
return Some(out);
}
let width = token_piece.lexeme.chars().count();
Some(vec![VisibleLineSpan {
line: start_line,
columns: start_column..(start_column + width),
}])
}
/// Returns the visible start column of the token at `position` inside
/// `line_text(token_line(position))`.
///
/// Column is measured in visible characters, excluding line terminators.
fn token_start_visible_column(&self, position: TokenPosition) -> Option<usize> {
let line_index = self.token_line(position)?;
let line = self.lines.get(line_index)?;
let mut column = self
.carried_piece_for_line(line_index)
.map_or(0, |text| text.chars().count());
let local_range = line.local_range()?;
for buffer_index in local_range {
if buffer_index == position.0 {
return Some(column);
}
let token_piece = self.buffer.get(buffer_index)?;
if token_piece.token.is_newline() {
break;
}
if token_piece.token.can_span_lines() && self.multi_line_map.contains_key(&buffer_index)
{
//debug_assert_eq!(buffer_index + 1, local_range.end);
return None;
}
column += token_piece.lexeme.chars().count();
}
None
}
#[must_use]
pub fn span_visible_on_line(&self, span: crate::lexer::TokenSpan) -> Option<VisibleLineSpan> {
let start = self.token_visible_spans(span.start)?.into_iter().next()?;
let end = self.token_visible_spans(span.end)?.into_iter().last()?;
if start.line != end.line {
return None;
}
Some(VisibleLineSpan {
line: start.line,
columns: start.columns.start..end.columns.end,
})
}
pub fn token_text(&self, pos: TokenPosition) -> Option<&'src str> {
self.token_at(pos).map(|t| t.lexeme)
}
pub fn same_line(&self, a: TokenPosition, b: TokenPosition) -> bool {
match (self.token_line(a), self.token_line(b)) {
(Some(x), Some(y)) => x == y,
_ => false,
}
}
}

View File

@ -0,0 +1,632 @@
//! Lexer for `UnrealScript` that understands inline `cpptext { ... }` blocks.
//!
//! ## Notable details
//!
//! Lexer for `UnrealScript` that recognizes inline `cpptext { ... }` blocks.
//!
//! In `UnrealScript`, `cpptext` lets authors embed raw C++ between braces.\
//! Because whitespace, newlines, or comments may appear between the
//! `cpptext` keyword and the opening `{`, the lexer must remember that
//! it has just seen `cpptext` - hence a state machine.
//!
//! ## Modes
//!
//! - **Normal** - ordinary `UnrealScript` `RawTokens`.
//! - **`AwaitingCppBlock`** - after `cpptext`, waiting for the next `{`.
//!
//! When that brace arrives, the lexer consumes the entire C++ block as
//! one `RawToken` (`RawToken::Brace(BraceKind::CppBlock)`), tracking nested
//! braces, strings, and comments on the way. If the closing `}` is
//! missing, everything to EOF is treated as C++; downstream parsers must
//! handle that gracefully.
use logos::Lexer;
/// Which lexer mode we're in. See the module docs for the full story.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Default)]
enum LexerMode {
/// Lexing regular `UnrealScript`.
#[default]
Normal,
/// Saw `cpptext`; waiting for the opening `{` of a C++ block.
AwaitingCppBlock,
}
/// Extra per-lexer state. Currently just holds the [`LexerMode`].
///
/// This is a logos-specific implementation detail.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub struct LexerState {
mode: LexerMode,
}
/// Distinguishes an ordinary `{` token from one that starts
/// an embedded C++ block.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum BraceKind {
/// An ordinary `UnrealScript` `{`.
Normal,
/// A `{` that starts an embedded C++ block and consumes through its
/// matching `}`.
CppBlock,
}
/// Tokens produced by the `UnrealScript` lexer.
///
/// Includes both syntactic tokens and trivia such as whitespace, newlines,
/// and comments.
#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[logos(extras = LexerState)]
pub enum RawToken {
// # Compiler/directive keywords
#[regex(r"(?i)#exec[^\r\n]*")]
ExecDirective,
#[regex("(?i)cpptext", |lex| {
if is_next_nontrivia_left_brace(lex) {
lex.extras.mode = LexerMode::AwaitingCppBlock;
} else {
lex.extras.mode = LexerMode::Normal;
}
})]
CppText,
#[regex("(?i)cppstruct", |lex| {
if is_next_nontrivia_left_brace(lex) {
lex.extras.mode = LexerMode::AwaitingCppBlock;
} else {
lex.extras.mode = LexerMode::Normal;
}
})]
CppStruct,
// # Declaration & structural keywords
//#[regex("(?i)class")]
#[token("class", ignore(case))]
Class,
#[token("struct", ignore(case))]
Struct,
#[token("enum", ignore(case))]
Enum,
#[token("state", ignore(case))]
State,
#[token("auto", ignore(case))]
Auto,
#[token("function", ignore(case))]
Function,
#[token("event", ignore(case))]
Event,
#[token("delegate", ignore(case))]
Delegate,
#[token("var", ignore(case))]
Var,
#[token("local", ignore(case))]
Local,
// # Inheritance, interface, dependencies
#[token("extends", ignore(case))]
Extends,
#[token("dependson", ignore(case))]
DependsOn,
// # Access modifiers & properties
#[token("private", ignore(case))]
Private,
#[token("protected", ignore(case))]
Protected,
#[token("public", ignore(case))]
Public,
#[token("const", ignore(case))]
Const,
#[token("static", ignore(case))]
Static,
#[token("native", ignore(case))]
Native,
#[token("abstract", ignore(case))]
Abstract,
#[token("deprecated", ignore(case))]
Deprecated,
#[token("safereplace", ignore(case))]
SafeReplace,
#[token("exportstructs", ignore(case))]
ExportStructs,
#[token("input", ignore(case))]
Input,
// # UnrealScript metadata/specifiers
#[token("final", ignore(case))]
Final,
#[token("default", ignore(case))]
Default,
#[token("defaultproperties", ignore(case))]
DefaultProperties,
#[token("object", ignore(case))]
Object,
#[token("begin", ignore(case))]
Begin,
#[token("end", ignore(case))]
End,
#[token("optional", ignore(case))]
Optional,
#[token("config", ignore(case))]
Config,
#[token("perobjectconfig", ignore(case))]
PerObjectConfig,
#[token("globalconfig", ignore(case))]
GlobalConfig,
#[token("collapsecategories", ignore(case))]
CollapseCategories,
#[token("dontcollapsecategories", ignore(case))]
DontCollapseCategories,
#[token("hidecategories", ignore(case))]
HideCategories,
#[token("showcategories", ignore(case))]
ShowCategories,
#[token("localized", ignore(case))]
Localized,
#[token("placeable", ignore(case))]
Placeable,
#[token("notplaceable", ignore(case))]
NotPlaceable,
#[token("instanced", ignore(case))]
Instanced,
#[token("editconst", ignore(case))]
EditConst,
#[token("editconstarray", ignore(case))]
EditConstArray,
#[token("editinline", ignore(case))]
EditInline,
#[token("editinlineuse", ignore(case))]
EditInlineUse,
#[token("editinlinenew", ignore(case))]
EditInlineNew,
#[token("noteditinlinenew", ignore(case))]
NotEditInlineNew,
#[token("edfindable", ignore(case))]
EdFindable,
#[token("editinlinenotify", ignore(case))]
EditInlineNotify,
#[token("parseconfig", ignore(case))]
ParseConfig,
#[token("automated", ignore(case))]
Automated,
#[token("dynamicrecompile", ignore(case))]
DynamicRecompile,
#[token("transient", ignore(case))]
Transient,
#[token("long", ignore(case))]
Long,
#[token("operator", ignore(case))]
Operator,
#[token("preoperator", ignore(case))]
PreOperator,
#[token("postoperator", ignore(case))]
PostOperator,
#[token("simulated", ignore(case))]
Simulated,
#[token("exec", ignore(case))]
Exec,
#[token("latent", ignore(case))]
Latent,
#[token("iterator", ignore(case))]
Iterator,
#[token("out", ignore(case))]
Out,
#[token("skip", ignore(case))]
Skip,
#[token("singular", ignore(case))]
Singular,
#[token("coerce", ignore(case))]
Coerce,
#[token("assert", ignore(case))]
Assert,
#[token("ignores", ignore(case))]
Ignores,
#[token("within", ignore(case))]
Within,
#[token("init", ignore(case))]
Init,
#[token("export", ignore(case))]
Export,
#[token("noexport", ignore(case))]
NoExport,
#[token("hidedropdown", ignore(case))]
HideDropdown,
#[token("travel", ignore(case))]
Travel,
#[token("cache", ignore(case))]
Cache,
#[token("cacheexempt", ignore(case))]
CacheExempt,
// # Replication-related
#[token("reliable", ignore(case))]
Reliable,
#[token("unreliable", ignore(case))]
Unreliable,
#[token("replication", ignore(case))]
Replication,
#[token("nativereplication", ignore(case))]
NativeReplication,
// # Control-flow keywords
#[token("goto", ignore(case))]
Goto,
#[token("if", ignore(case))]
If,
#[token("else", ignore(case))]
Else,
#[token("switch", ignore(case))]
Switch,
#[token("case", ignore(case))]
Case,
#[token("for", ignore(case))]
For,
#[token("foreach", ignore(case))]
ForEach,
#[token("while", ignore(case))]
While,
#[token("do", ignore(case))]
Do,
#[token("until", ignore(case))]
Until,
#[token("break", ignore(case))]
Break,
#[token("continue", ignore(case))]
Continue,
#[token("return", ignore(case))]
Return,
// # Built-in types
#[token("int", ignore(case))]
Int,
#[token("float", ignore(case))]
Float,
#[token("bool", ignore(case))]
Bool,
#[token("byte", ignore(case))]
Byte,
#[token("string", ignore(case))]
String,
#[token("array", ignore(case))]
Array,
#[token("name", ignore(case))]
Name,
// FloatLiteral must come before IntegerLiteral and '.'
// to have higher priority.
// It also recognizes things like: `1.foo``, `1.foo.bar`, `1.2.3`.
// It has to. Because UnrealScript is a pile of-... wonderful language,
// where everything is possible.
#[regex(r"[0-9]+(?:\.(?:[0-9]+|[A-Za-z_][A-Za-z0-9_]*))+[fF]?")]
#[regex(r"(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[eE][+-]?[0-9]+)?[fF]?")]
#[regex(r"[0-9]+[eE][+-]?[0-9]+[fF]?")]
FloatLiteral,
#[regex(r"0b[01](?:_?[01])*")]
#[regex(r"0o[0-7](?:_?[0-7])*")]
#[regex(r"0x[0-9A-Fa-f](?:_?[0-9A-Fa-f])*")]
#[regex(r"[0-9][0-9]*")]
IntegerLiteral,
#[regex(r#""([^"\\\r\n]|\\.)*""#)]
StringLiteral,
#[regex(r"'[a-zA-Z0-9_\. \-]*'")]
NameLiteral,
#[token("true", ignore(case))]
True,
#[token("false", ignore(case))]
False,
#[token("none", ignore(case))]
None,
#[token("self", ignore(case))]
SelfValue,
#[token("new", ignore(case))]
New,
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
// # Operations
// ## Exponentiation
#[token("**")]
Exponentiation,
// ## Unary
#[token("++")]
Increment,
#[token("--")]
Decrement,
#[token("!")]
Not,
#[token("~")]
BitwiseNot,
// ## Vector
#[token("dot", ignore(case))]
Dot,
#[token("cross", ignore(case))]
Cross,
// ## Multiplicative
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("%")]
Modulo,
// ## Additive
#[token("+")]
Plus,
#[token("-")]
Minus,
// ## String manipulation
#[token("@")]
ConcatSpace,
#[token("$")]
Concat,
// ## Shifts
#[token("<<")]
LeftShift,
#[token(">>>")]
LogicalRightShift,
#[token(">>")]
RightShift,
// ## Relational
#[token("<")]
Less,
#[token("<=")]
LessEqual,
#[token(">")]
Greater,
#[token(">=")]
GreaterEqual,
#[token("==")]
Equal,
#[token("!=")]
NotEqual,
#[token("~=")]
ApproximatelyEqual,
#[token("clockwisefrom", ignore(case))]
ClockwiseFrom,
// ## Bitwise
#[token("&")]
BitwiseAnd,
#[token("|")]
BitwiseOr,
#[token("^")]
BitwiseXor,
// ## Logical
#[token("&&")]
LogicalAnd,
#[token("^^")]
LogicalXor,
#[token("||")]
LogicalOr,
// ## Assignments
#[token("=")]
Assign,
#[token("*=")]
MultiplyAssign,
#[token("/=")]
DivideAssign,
#[token("%=")]
ModuloAssign,
#[token("+=")]
PlusAssign,
#[token("-=")]
MinusAssign,
#[token("$=")]
ConcatAssign,
#[token("@=")]
ConcatSpaceAssign,
// # Punctuation & delimiters
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("{", process_left_brace)]
Brace(BraceKind),
#[token("}")]
RightBrace,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token(";")]
Semicolon,
#[token(",")]
Comma,
#[token(".")]
Period,
#[token(":")]
Colon,
#[token("#")]
Hash,
#[token("?")]
Question,
// # Comments & whitespaces
#[regex(r"//[^\r\n]*")]
LineComment,
#[regex(r"/\*", handle_block_comment)]
BlockComment,
#[regex(r"\r\n|\n|\r")]
Newline,
#[regex(r"[ \t]+")]
Whitespace,
// # Technical
Error,
}
/// Consumes an `UnrealScript` `/* ... */` block comment, including nested comments.
///
/// Matches the entire comment, including its delimiters.
/// If the comment is unterminated, consumes to the end of input.
fn handle_block_comment(lexer: &mut Lexer<RawToken>) {
let mut comment_depth = 1;
while let Some(next_character) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("/*") {
comment_depth += 1;
lexer.bump(2);
continue;
}
if lexer.remainder().starts_with("*/") {
comment_depth -= 1;
lexer.bump(2);
if comment_depth == 0 {
break;
}
continue;
}
lexer.bump(next_character.len_utf8());
}
}
/// Processes `{` according to the current lexer mode.
///
/// Returns [`BraceKind::Normal`] for ordinary `UnrealScript` braces.
/// After `cpptext` or `cppstruct`, consumes the embedded C++ block and returns
/// [`BraceKind::CppBlock`].
fn process_left_brace(lexer: &mut Lexer<RawToken>) -> BraceKind {
match lexer.extras.mode {
LexerMode::Normal => BraceKind::Normal,
LexerMode::AwaitingCppBlock => {
lexer.extras.mode = LexerMode::Normal;
consume_cpp_block(lexer);
BraceKind::CppBlock
}
}
}
/// Consumes a complete C++ block, handling:
/// - Nested `{...}` pairs
/// - String literals (`"..."` and `'...'`), including escaped quotes
/// - Line comments (`// ...\n`)
/// - Block comments (`/* ... */`)
///
/// Leaves the lexer positioned immediately after the closing `}` of the block.
/// The opening `{` must have already been consumed by the caller.
///
/// We target UE2-era cpp blocks, so no need for anything fancy.
fn consume_cpp_block(lexer: &mut Lexer<RawToken>) {
let mut brace_depth = 1;
while let Some(next_character) = lexer.remainder().chars().next() {
match next_character {
'{' => {
brace_depth += 1;
lexer.bump(1);
}
'}' => {
brace_depth -= 1;
lexer.bump(1);
if brace_depth == 0 {
break;
}
}
'/' if lexer.remainder().starts_with("/*") => {
lexer.bump(2); // consuming two-byte sequence `/*`
consume_c_style_block_comment(lexer);
}
'/' if lexer.remainder().starts_with("//") => {
lexer.bump(2); // consuming two-byte sequence `//`
while let Some(next_character) = lexer.remainder().chars().next() {
lexer.bump(next_character.len_utf8());
if next_character == '\n' || next_character == '\r' {
break;
}
}
}
'"' | '\'' => {
lexer.bump(1); // skip `'` or `"`
consume_quoted_cpp_literal(lexer, next_character);
}
_ => lexer.bump(next_character.len_utf8()),
}
}
}
/// Consumes a non-nesting C-style `/* ... */` comment.
///
/// Assumes that the opening `/*` has already been consumed.
fn consume_c_style_block_comment(lexer: &mut Lexer<RawToken>) {
while let Some(next_character) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("*/") {
lexer.bump(2);
break;
}
lexer.bump(next_character.len_utf8());
}
}
/// Consumes a quoted C++ string or character literal.
///
/// Assumes that the opening delimiter has already been consumed.
fn consume_quoted_cpp_literal(lexer: &mut Lexer<RawToken>, delimiter: char) {
while let Some(next_character) = lexer.remainder().chars().next() {
lexer.bump(next_character.len_utf8());
if next_character == '\\' {
// Skip the escaped character
if let Some(escaped_character) = lexer.remainder().chars().next() {
lexer.bump(escaped_character.len_utf8());
}
} else if next_character == delimiter {
return;
}
}
}
/// Peek ahead from the current lexer position, skipping "trivia", and report
/// whether the next significant character is `{`.
///
/// Trivia here means:
/// - Spaces and tabs
/// - Newlines (`\r`, `\n`, or `\r\n`)
/// - Line comments (`// ...`)
/// - Block comments (`/* ... */`), including nested ones
///
/// This is used after lexing tokens like `cpptext` or `cppstruct`, where
/// `UnrealScript` allows arbitrary trivia between the keyword and the opening
/// brace of the embedded C++ block.
///
/// Returns `true` if the next non-trivia character is `{`, otherwise `false`.
/// If the input ends while skipping trivia, returns `false`.
fn is_next_nontrivia_left_brace(lexer: &Lexer<RawToken>) -> bool {
let mut remaining = lexer.remainder();
while let Some(next_character) = remaining.chars().next() {
match next_character {
' ' | '\t' | '\r' | '\n' => {
remaining = &remaining[next_character.len_utf8()..];
}
'/' if remaining.starts_with("//") => {
remaining = &remaining[2..];
while let Some(comment_character) = remaining.chars().next() {
remaining = &remaining[comment_character.len_utf8()..];
if comment_character == '\n' || comment_character == '\r' {
break;
}
}
}
'/' if remaining.starts_with("/*") => {
remaining = &remaining[2..];
let mut comment_depth = 1;
while comment_depth > 0 {
if remaining.starts_with("/*") {
comment_depth += 1;
remaining = &remaining[2..];
continue;
}
if remaining.starts_with("*/") {
comment_depth -= 1;
remaining = &remaining[2..];
continue;
}
let Some(comment_character) = remaining.chars().next() else {
return false;
};
remaining = &remaining[comment_character.len_utf8()..];
}
}
_ => return next_character == '{',
}
}
false
}

338
rottlib/src/lexer/tests.rs Normal file
View File

@ -0,0 +1,338 @@
use super::{Keyword, Token, TokenPosition, TokenizedFile, split_visible_line_segments};
fn reconstruct_source(file: &TokenizedFile<'_>) -> String {
file.buffer.iter().map(|piece| piece.lexeme).collect()
}
fn token_kinds_and_lexemes<'src>(file: &TokenizedFile<'src>) -> Vec<(Token, &'src str)> {
file.buffer
.iter()
.map(|piece| (piece.token, piece.lexeme))
.collect()
}
#[test]
fn split_visible_line_segments_returns_empty_for_single_line_text() {
assert!(split_visible_line_segments("abcdef").is_empty());
assert!(split_visible_line_segments("").is_empty());
}
#[test]
fn split_visible_line_segments_handles_mixed_line_endings() {
let text = "ab\r\ncd\ref\n";
let segments = split_visible_line_segments(text);
assert_eq!(segments, vec![0..2, 4..6, 7..9, 10..10]);
let visible: Vec<&str> = segments.iter().map(|range| &text[range.clone()]).collect();
assert_eq!(visible, vec!["ab", "cd", "ef", ""]);
}
#[test]
fn tokenization_is_lossless_for_mixed_input() {
let source = concat!(
"class Foo extends Bar;\r\n",
"var string S;\n",
"/* block comment */\r",
"defaultproperties {}\n",
"X = 1.25e+2;\n",
);
let file = TokenizedFile::tokenize(source);
assert_eq!(reconstruct_source(&file), source);
}
#[test]
fn trailing_newline_does_not_create_extra_empty_line() {
let source = "a\n";
let file = TokenizedFile::tokenize(source);
assert_eq!(file.lines.len(), 1);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..2));
assert_eq!(
token_kinds_and_lexemes(&file),
vec![(Token::Identifier, "a"), (Token::Newline, "\n")]
);
}
#[test]
fn final_line_without_trailing_newline_is_committed() {
let source = "a\nb";
let file = TokenizedFile::tokenize(source);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..2));
assert_eq!(file.lines[1].continued_from, None);
assert_eq!(file.lines[1].local_range(), Some(2..3));
assert_eq!(
token_kinds_and_lexemes(&file),
vec![
(Token::Identifier, "a"),
(Token::Newline, "\n"),
(Token::Identifier, "b"),
]
);
}
#[test]
fn multiline_block_comment_creates_continuation_line_with_local_tokens() {
let source = "a/*x\ny*/b";
let file = TokenizedFile::tokenize(source);
assert_eq!(
token_kinds_and_lexemes(&file),
vec![
(Token::Identifier, "a"),
(Token::BlockComment, "/*x\ny*/"),
(Token::Identifier, "b"),
]
);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..2));
assert_eq!(file.lines[1].continued_from, Some(0));
assert_eq!(file.lines[1].local_range(), Some(2..3));
let block_comment_index = 1;
assert_eq!(
file.multi_line_map.get(&block_comment_index),
Some(&vec![0..3, 4..7])
);
}
#[test]
fn pure_multiline_token_finishes_with_bare_continuation_line() {
let source = "/*a\nb*/";
let file = TokenizedFile::tokenize(source);
assert_eq!(
token_kinds_and_lexemes(&file),
vec![(Token::BlockComment, "/*a\nb*/")]
);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..1));
assert_eq!(file.lines[1].continued_from, Some(0));
assert_eq!(file.lines[1].local_range(), None);
assert_eq!(file.multi_line_map.get(&0), Some(&vec![0..3, 4..7]));
}
#[test]
fn nested_block_comments_are_consumed_as_one_token() {
let source = "/* outer /* inner */ still outer */";
let file = TokenizedFile::tokenize(source);
assert!(!file.has_errors());
assert_eq!(file.buffer.len(), 1);
assert_eq!(file.buffer[0].token, Token::BlockComment);
assert_eq!(file.buffer[0].lexeme, source);
}
#[test]
fn cpptext_with_trivia_before_brace_produces_cpp_block_token() {
let source = "cpptext /* gap */\n{ int x; if (y) { z(); } }";
let file = TokenizedFile::tokenize(source);
assert_eq!(
token_kinds_and_lexemes(&file),
vec![
(Token::Keyword(Keyword::CppText), "cpptext"),
(Token::Whitespace, " "),
(Token::BlockComment, "/* gap */"),
(Token::Newline, "\n"),
(Token::CppBlock, "{ int x; if (y) { z(); } }"),
]
);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..4));
assert_eq!(file.lines[1].continued_from, None);
assert_eq!(file.lines[1].local_range(), Some(4..5));
}
#[test]
fn cpptext_without_following_brace_does_not_start_cpp_block_mode() {
let source = "cpptext Foo { bar }";
let file = TokenizedFile::tokenize(source);
let tokens = token_kinds_and_lexemes(&file);
assert!(!tokens.iter().any(|(token, _)| *token == Token::CppBlock));
assert!(
tokens
.iter()
.any(|(token, lexeme)| *token == Token::Keyword(Keyword::CppText)
&& *lexeme == "cpptext")
);
assert!(
tokens
.iter()
.any(|(token, lexeme)| *token == Token::LeftBrace && *lexeme == "{")
);
assert!(
tokens
.iter()
.any(|(token, lexeme)| *token == Token::RightBrace && *lexeme == "}")
);
}
#[test]
fn utf16_length_is_precomputed_per_token() {
let source = "\"😀\"";
let file = TokenizedFile::tokenize(source);
assert_eq!(file.buffer.len(), 1);
assert_eq!(file.buffer[0].token, Token::StringLiteral);
assert_eq!(file.buffer[0].utf16_length, source.encode_utf16().count());
assert_eq!(file.buffer[0].utf16_length, 4);
}
#[test]
fn lexer_reports_error_tokens() {
let source = "`";
let file = TokenizedFile::tokenize(source);
assert!(file.has_errors());
assert_eq!(reconstruct_source(&file), source);
assert_eq!(file.buffer.len(), 1);
assert_eq!(file.buffer[0].token, Token::Error);
assert_eq!(file.buffer[0].lexeme, "`");
}
#[test]
fn token_predicates_match_current_rules() {
assert!(Token::Identifier.is_valid_identifier_name());
assert!(Token::Keyword(Keyword::Int).is_valid_identifier_name());
assert!(Token::Keyword(Keyword::Int).is_valid_type_name());
assert!(Token::Keyword(Keyword::Delegate).is_valid_type_name());
assert!(Token::Keyword(Keyword::Exec).is_valid_function_modifier());
assert!(Token::Keyword(Keyword::Operator).is_valid_function_modifier());
assert!(Token::Keyword(Keyword::Config).is_valid_function_modifier());
assert!(!Token::Plus.is_valid_identifier_name());
assert!(!Token::Plus.is_valid_type_name());
assert!(!Token::Keyword(Keyword::If).is_valid_function_modifier());
}
#[test]
fn tokens_iterator_yields_positions_in_buffer_order() {
let source = "a + b";
let file = TokenizedFile::tokenize(source);
let collected: Vec<_> = file.iter().collect();
assert_eq!(collected.len(), file.buffer.len());
for (expected_index, (position, token_data)) in collected.into_iter().enumerate() {
assert_eq!(position.0, expected_index);
assert_eq!(token_data, file.buffer[expected_index]);
}
}
fn line_token_kinds_and_lexemes<'src>(
file: &TokenizedFile<'src>,
line_number: usize,
) -> Vec<(usize, Token, &'src str)> {
file.line_tokens(line_number)
.map(|(position, token_data)| (position.0, token_data.token, token_data.lexeme))
.collect()
}
#[test]
fn line_count_counts_physical_lines_without_trailing_empty_line() {
assert_eq!(TokenizedFile::tokenize("").line_count(), 0);
assert_eq!(TokenizedFile::tokenize("a").line_count(), 1);
assert_eq!(TokenizedFile::tokenize("a\n").line_count(), 1);
assert_eq!(TokenizedFile::tokenize("a\nb\n").line_count(), 2);
}
#[test]
fn line_tokens_return_only_tokens_that_start_on_that_line() {
let source = "a/*x\ny*/b\nc";
let file = TokenizedFile::tokenize(source);
assert_eq!(
line_token_kinds_and_lexemes(&file, 0),
vec![
(0, Token::Identifier, "a"),
(1, Token::BlockComment, "/*x\ny*/"),
]
);
// Important: the carried fragment "y*/" is NOT yielded here.
assert_eq!(
line_token_kinds_and_lexemes(&file, 1),
vec![(2, Token::Identifier, "b"), (3, Token::Newline, "\n"),]
);
assert_eq!(
line_token_kinds_and_lexemes(&file, 2),
vec![(4, Token::Identifier, "c")]
);
}
#[test]
fn line_tokens_are_empty_for_continuation_only_or_out_of_bounds_lines() {
let file = TokenizedFile::tokenize("/*a\nb*/");
assert_eq!(file.line_tokens(1).count(), 0);
assert_eq!(file.line_tokens(999).count(), 0);
}
#[test]
fn token_at_returns_token_for_valid_position_and_none_for_invalid_one() {
let file = TokenizedFile::tokenize("a + b");
assert_eq!(file.token_at(TokenPosition(0)), Some(file.buffer[0]));
assert_eq!(
file.token_at(TokenPosition(1)).map(|t| t.token),
Some(Token::Whitespace)
);
assert_eq!(
file.token_at(TokenPosition(2)).map(|t| t.token),
Some(Token::Plus)
);
assert_eq!(file.token_at(TokenPosition(file.buffer.len())), None);
}
#[test]
fn line_text_omits_line_terminators_and_handles_empty_lines() {
let file = TokenizedFile::tokenize("left\n\nright");
assert_eq!(file.line_text(0).as_deref(), Some("left"));
assert_eq!(file.line_text(1).as_deref(), Some(""));
assert_eq!(file.line_text(2).as_deref(), Some("right"));
assert_eq!(file.line_text(999), None);
}
#[test]
fn line_text_includes_carried_fragment_on_continued_line() {
let file = TokenizedFile::tokenize("a/*x\ny*/b");
assert_eq!(file.line_text(1).as_deref(), Some("y*/b"));
}
#[test]
fn line_text_on_origin_line_of_multiline_token_uses_only_visible_part() {
let file = TokenizedFile::tokenize("a/*x\ny*/b");
assert_eq!(file.line_text(0).as_deref(), Some("a/*x"));
}

637
rottlib/src/lexer/token.rs Normal file
View File

@ -0,0 +1,637 @@
//! Token definitions for Fermented `UnrealScript`.
//!
//! These are the tokens consumed by the parser and derived from [`RawToken`]s.
use super::{BraceKind, raw_lexer::RawToken};
/// Tokens consumed by the Fermented `UnrealScript` parser.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum Token {
ExecDirective,
Keyword(Keyword),
// Primaries
FloatLiteral,
IntegerLiteral,
StringLiteral,
NameLiteral,
Identifier,
// Operations
Exponentiation,
Increment,
Decrement,
Not,
BitwiseNot,
Multiply,
Divide,
Modulo,
Plus,
Minus,
ConcatSpace,
Concat,
LeftShift,
LogicalRightShift,
RightShift,
Less,
LessEqual,
Greater,
GreaterEqual,
Equal,
NotEqual,
ApproximatelyEqual,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
LogicalAnd,
LogicalXor,
LogicalOr,
Assign,
MultiplyAssign,
DivideAssign,
ModuloAssign,
PlusAssign,
MinusAssign,
ConcatAssign,
ConcatSpaceAssign,
// Delimiters
LeftParenthesis,
RightParenthesis,
LeftBrace,
CppBlock,
RightBrace,
LeftBracket,
RightBracket,
Semicolon,
Comma,
Period,
Colon,
Hash,
Question,
// Trivia
LineComment,
BlockComment,
Newline,
Whitespace,
// Technical - for representing a very wrong sequence of characters
Error,
}
impl From<RawToken> for Token {
#![allow(clippy::too_many_lines)]
fn from(token: RawToken) -> Self {
match token {
// Non-trivial conversions
RawToken::Brace(BraceKind::Normal) => Self::LeftBrace,
RawToken::Brace(BraceKind::CppBlock) => Self::CppBlock,
// Keyword conversions
RawToken::CppText => Self::Keyword(Keyword::CppText),
RawToken::CppStruct => Self::Keyword(Keyword::CppStruct),
RawToken::Class => Self::Keyword(Keyword::Class),
RawToken::Struct => Self::Keyword(Keyword::Struct),
RawToken::Enum => Self::Keyword(Keyword::Enum),
RawToken::State => Self::Keyword(Keyword::State),
RawToken::Auto => Self::Keyword(Keyword::Auto),
RawToken::Function => Self::Keyword(Keyword::Function),
RawToken::Event => Self::Keyword(Keyword::Event),
RawToken::Delegate => Self::Keyword(Keyword::Delegate),
RawToken::Var => Self::Keyword(Keyword::Var),
RawToken::Local => Self::Keyword(Keyword::Local),
RawToken::Extends => Self::Keyword(Keyword::Extends),
RawToken::DependsOn => Self::Keyword(Keyword::DependsOn),
RawToken::Private => Self::Keyword(Keyword::Private),
RawToken::Protected => Self::Keyword(Keyword::Protected),
RawToken::Public => Self::Keyword(Keyword::Public),
RawToken::Const => Self::Keyword(Keyword::Const),
RawToken::Static => Self::Keyword(Keyword::Static),
RawToken::Native => Self::Keyword(Keyword::Native),
RawToken::Abstract => Self::Keyword(Keyword::Abstract),
RawToken::Deprecated => Self::Keyword(Keyword::Deprecated),
RawToken::SafeReplace => Self::Keyword(Keyword::SafeReplace),
RawToken::ExportStructs => Self::Keyword(Keyword::ExportStructs),
RawToken::Input => Self::Keyword(Keyword::Input),
RawToken::Final => Self::Keyword(Keyword::Final),
RawToken::Default => Self::Keyword(Keyword::Default),
RawToken::DefaultProperties => Self::Keyword(Keyword::DefaultProperties),
RawToken::Object => Self::Keyword(Keyword::Object),
RawToken::Begin => Self::Keyword(Keyword::Begin),
RawToken::End => Self::Keyword(Keyword::End),
RawToken::Optional => Self::Keyword(Keyword::Optional),
RawToken::Config => Self::Keyword(Keyword::Config),
RawToken::PerObjectConfig => Self::Keyword(Keyword::PerObjectConfig),
RawToken::GlobalConfig => Self::Keyword(Keyword::GlobalConfig),
RawToken::CollapseCategories => Self::Keyword(Keyword::CollapseCategories),
RawToken::DontCollapseCategories => Self::Keyword(Keyword::DontCollapseCategories),
RawToken::HideCategories => Self::Keyword(Keyword::HideCategories),
RawToken::ShowCategories => Self::Keyword(Keyword::ShowCategories),
RawToken::Localized => Self::Keyword(Keyword::Localized),
RawToken::Placeable => Self::Keyword(Keyword::Placeable),
RawToken::NotPlaceable => Self::Keyword(Keyword::NotPlaceable),
RawToken::Instanced => Self::Keyword(Keyword::Instanced),
RawToken::EditConst => Self::Keyword(Keyword::EditConst),
RawToken::EditConstArray => Self::Keyword(Keyword::EditConstArray),
RawToken::EditInline => Self::Keyword(Keyword::EditInline),
RawToken::EditInlineUse => Self::Keyword(Keyword::EditInlineUse),
RawToken::EditInlineNew => Self::Keyword(Keyword::EditInlineNew),
RawToken::NotEditInlineNew => Self::Keyword(Keyword::NotEditInlineNew),
RawToken::EdFindable => Self::Keyword(Keyword::EdFindable),
RawToken::EditInlineNotify => Self::Keyword(Keyword::EditInlineNotify),
RawToken::ParseConfig => Self::Keyword(Keyword::ParseConfig),
RawToken::Automated => Self::Keyword(Keyword::Automated),
RawToken::DynamicRecompile => Self::Keyword(Keyword::DynamicRecompile),
RawToken::Transient => Self::Keyword(Keyword::Transient),
RawToken::Long => Self::Keyword(Keyword::Long),
RawToken::Operator => Self::Keyword(Keyword::Operator),
RawToken::PreOperator => Self::Keyword(Keyword::PreOperator),
RawToken::PostOperator => Self::Keyword(Keyword::PostOperator),
RawToken::Simulated => Self::Keyword(Keyword::Simulated),
RawToken::Exec => Self::Keyword(Keyword::Exec),
RawToken::Latent => Self::Keyword(Keyword::Latent),
RawToken::Iterator => Self::Keyword(Keyword::Iterator),
RawToken::Out => Self::Keyword(Keyword::Out),
RawToken::Skip => Self::Keyword(Keyword::Skip),
RawToken::Singular => Self::Keyword(Keyword::Singular),
RawToken::Coerce => Self::Keyword(Keyword::Coerce),
RawToken::Assert => Self::Keyword(Keyword::Assert),
RawToken::Ignores => Self::Keyword(Keyword::Ignores),
RawToken::Within => Self::Keyword(Keyword::Within),
RawToken::Init => Self::Keyword(Keyword::Init),
RawToken::Export => Self::Keyword(Keyword::Export),
RawToken::NoExport => Self::Keyword(Keyword::NoExport),
RawToken::HideDropdown => Self::Keyword(Keyword::HideDropdown),
RawToken::Travel => Self::Keyword(Keyword::Travel),
RawToken::Cache => Self::Keyword(Keyword::Cache),
RawToken::CacheExempt => Self::Keyword(Keyword::CacheExempt),
RawToken::Reliable => Self::Keyword(Keyword::Reliable),
RawToken::Unreliable => Self::Keyword(Keyword::Unreliable),
RawToken::Replication => Self::Keyword(Keyword::Replication),
RawToken::NativeReplication => Self::Keyword(Keyword::NativeReplication),
RawToken::Goto => Self::Keyword(Keyword::Goto),
RawToken::If => Self::Keyword(Keyword::If),
RawToken::Else => Self::Keyword(Keyword::Else),
RawToken::Switch => Self::Keyword(Keyword::Switch),
RawToken::Case => Self::Keyword(Keyword::Case),
RawToken::For => Self::Keyword(Keyword::For),
RawToken::ForEach => Self::Keyword(Keyword::ForEach),
RawToken::While => Self::Keyword(Keyword::While),
RawToken::Do => Self::Keyword(Keyword::Do),
RawToken::Until => Self::Keyword(Keyword::Until),
RawToken::Break => Self::Keyword(Keyword::Break),
RawToken::Continue => Self::Keyword(Keyword::Continue),
RawToken::Return => Self::Keyword(Keyword::Return),
RawToken::Int => Self::Keyword(Keyword::Int),
RawToken::Float => Self::Keyword(Keyword::Float),
RawToken::Bool => Self::Keyword(Keyword::Bool),
RawToken::Byte => Self::Keyword(Keyword::Byte),
RawToken::String => Self::Keyword(Keyword::String),
RawToken::Array => Self::Keyword(Keyword::Array),
RawToken::Name => Self::Keyword(Keyword::Name),
RawToken::True => Self::Keyword(Keyword::True),
RawToken::False => Self::Keyword(Keyword::False),
RawToken::None => Self::Keyword(Keyword::None),
RawToken::SelfValue => Self::Keyword(Keyword::SelfValue),
RawToken::New => Self::Keyword(Keyword::New),
RawToken::Dot => Self::Keyword(Keyword::Dot),
RawToken::Cross => Self::Keyword(Keyword::Cross),
RawToken::ClockwiseFrom => Self::Keyword(Keyword::ClockwiseFrom),
// Trivial 1-to-1 conversions.
RawToken::ExecDirective => Self::ExecDirective,
RawToken::FloatLiteral => Self::FloatLiteral,
RawToken::IntegerLiteral => Self::IntegerLiteral,
RawToken::StringLiteral => Self::StringLiteral,
RawToken::NameLiteral => Self::NameLiteral,
RawToken::Identifier => Self::Identifier,
RawToken::Exponentiation => Self::Exponentiation,
RawToken::Increment => Self::Increment,
RawToken::Decrement => Self::Decrement,
RawToken::Not => Self::Not,
RawToken::BitwiseNot => Self::BitwiseNot,
RawToken::Multiply => Self::Multiply,
RawToken::Divide => Self::Divide,
RawToken::Modulo => Self::Modulo,
RawToken::Plus => Self::Plus,
RawToken::Minus => Self::Minus,
RawToken::ConcatSpace => Self::ConcatSpace,
RawToken::Concat => Self::Concat,
RawToken::LeftShift => Self::LeftShift,
RawToken::LogicalRightShift => Self::LogicalRightShift,
RawToken::RightShift => Self::RightShift,
RawToken::Less => Self::Less,
RawToken::LessEqual => Self::LessEqual,
RawToken::Greater => Self::Greater,
RawToken::GreaterEqual => Self::GreaterEqual,
RawToken::Equal => Self::Equal,
RawToken::NotEqual => Self::NotEqual,
RawToken::ApproximatelyEqual => Self::ApproximatelyEqual,
RawToken::BitwiseAnd => Self::BitwiseAnd,
RawToken::BitwiseOr => Self::BitwiseOr,
RawToken::BitwiseXor => Self::BitwiseXor,
RawToken::LogicalAnd => Self::LogicalAnd,
RawToken::LogicalXor => Self::LogicalXor,
RawToken::LogicalOr => Self::LogicalOr,
RawToken::Assign => Self::Assign,
RawToken::MultiplyAssign => Self::MultiplyAssign,
RawToken::DivideAssign => Self::DivideAssign,
RawToken::ModuloAssign => Self::ModuloAssign,
RawToken::PlusAssign => Self::PlusAssign,
RawToken::MinusAssign => Self::MinusAssign,
RawToken::ConcatAssign => Self::ConcatAssign,
RawToken::ConcatSpaceAssign => Self::ConcatSpaceAssign,
RawToken::LeftParenthesis => Self::LeftParenthesis,
RawToken::RightParenthesis => Self::RightParenthesis,
RawToken::RightBrace => Self::RightBrace,
RawToken::LeftBracket => Self::LeftBracket,
RawToken::RightBracket => Self::RightBracket,
RawToken::Semicolon => Self::Semicolon,
RawToken::Comma => Self::Comma,
RawToken::Period => Self::Period,
RawToken::Colon => Self::Colon,
RawToken::Hash => Self::Hash,
RawToken::Question => Self::Question,
RawToken::LineComment => Self::LineComment,
RawToken::BlockComment => Self::BlockComment,
RawToken::Newline => Self::Newline,
RawToken::Whitespace => Self::Whitespace,
RawToken::Error => Self::Error,
}
}
}
impl Token {
/// Returns `true` if this token is a newline.
#[must_use]
pub const fn is_newline(&self) -> bool {
matches!(self, Self::Newline)
}
/// Returns `true` if this token is trivia whitespace.
///
/// Note: comments are **not** considered whitespace.
#[must_use]
pub const fn is_whitespace(&self) -> bool {
matches!(self, Self::Whitespace | Self::Newline)
}
/// Returns `true` if this token may span multiple physical lines
/// (i.e. can contain newline characters).
#[must_use]
pub const fn can_span_lines(&self) -> bool {
matches!(self, Self::BlockComment | Self::CppBlock | Self::Error)
}
/// Returns `true` if this token can appear in type position
/// (either a built-in type keyword or an identifier).
#[must_use]
pub fn is_valid_type_name(&self) -> bool {
let Self::Keyword(keyword) = self else {
return *self == Self::Identifier;
};
keyword.is_valid_type_name()
}
/// Returns `true` if this token can be used as an identifier.
///
/// This includes [`Token::Identifier`] and certain keywords that
/// `UnrealScript` also accepts in identifier position.
#[must_use]
pub fn is_valid_identifier_name(&self) -> bool {
if *self == Self::Identifier {
return true;
}
if let Self::Keyword(keyword) = self {
return keyword.is_valid_identifier_name();
}
false
}
/// Returns `true` if this token can be used as function's modifier.
#[must_use]
pub const fn is_valid_function_modifier(&self) -> bool {
let Self::Keyword(keyword) = self else {
return false;
};
matches!(
keyword,
Keyword::Final
| Keyword::Native
| Keyword::Abstract
| Keyword::Transient
| Keyword::Public
| Keyword::Protected
| Keyword::Private
| Keyword::Static
| Keyword::Const
| Keyword::Deprecated
| Keyword::NoExport
| Keyword::Export
| Keyword::Simulated
| Keyword::Latent
| Keyword::Iterator
| Keyword::Singular
| Keyword::Reliable
| Keyword::Unreliable
| Keyword::NativeReplication
| Keyword::PreOperator
| Keyword::Operator
| Keyword::PostOperator
| Keyword::Config
| Keyword::Exec
)
}
/// Returns `true` if this token is definitely not a valid first token of an
/// expression.
///
/// This is a conservative recovery predicate:
/// - `true` means expression parsing should not be attempted at this token;
/// - `false` means the token might start an expression, or that the normal
/// expression parser should report the more specific error.
#[must_use]
pub const fn is_definitely_not_expression_start(self) -> bool {
match self {
Self::Keyword(keyword) => keyword.is_definitely_not_expression_start(),
// Closing delimiters / separators.
Self::RightParenthesis
| Self::RightBrace
| Self::RightBracket
| Self::Semicolon
| Self::Comma
| Self::Colon
| Self::Question
// Tokens that only continue a previous expression.
| Self::Period
// Infix / postfix / assignment operators.
| Self::Exponentiation
| Self::Multiply
| Self::Divide
| Self::Modulo
| Self::ConcatSpace
| Self::Concat
| Self::LeftShift
| Self::LogicalRightShift
| Self::RightShift
| Self::Less
| Self::LessEqual
| Self::Greater
| Self::GreaterEqual
| Self::Equal
| Self::NotEqual
| Self::ApproximatelyEqual
| Self::BitwiseAnd
| Self::BitwiseOr
| Self::BitwiseXor
| Self::LogicalAnd
| Self::LogicalXor
| Self::LogicalOr
| Self::Assign
| Self::MultiplyAssign
| Self::DivideAssign
| Self::ModuloAssign
| Self::PlusAssign
| Self::MinusAssign
| Self::ConcatAssign
| Self::ConcatSpaceAssign
// Non-expression trivia / technical tokens.
| Self::ExecDirective
| Self::CppBlock
| Self::Hash
| Self::LineComment
| Self::BlockComment
| Self::Newline
| Self::Whitespace
| Self::Error => true,
_ => false,
}
}
}
/// Reserved words of Fermented `UnrealScript`.
///
/// These are represented in [`Token`] as [`Token::Keyword`].
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum Keyword {
// C++ blocks
CppText,
CppStruct,
// Top-level declaration
Class,
Struct,
Enum,
State,
Auto,
Function,
Event,
Delegate,
Var,
Local,
// Class modifiers
Extends,
DependsOn,
// Access modifiers
Private,
Protected,
Public,
Const,
// Meta data / specifiers
Static,
Native,
Abstract,
Deprecated,
SafeReplace,
ExportStructs,
Input,
Final,
Default,
DefaultProperties,
Object,
Begin,
End,
Optional,
Config,
PerObjectConfig,
GlobalConfig,
CollapseCategories,
DontCollapseCategories,
HideCategories,
ShowCategories,
Localized,
Placeable,
NotPlaceable,
Instanced,
EditConst,
EditConstArray,
EditInline,
EditInlineUse,
EditInlineNew,
NotEditInlineNew,
EdFindable,
EditInlineNotify,
ParseConfig,
Automated,
DynamicRecompile,
Transient,
Long,
Operator,
PreOperator,
PostOperator,
Simulated,
Exec,
Latent,
Iterator,
Out,
Skip,
Singular,
Coerce,
Assert,
Ignores,
Within,
Init,
Export,
NoExport,
HideDropdown,
Travel,
Cache,
CacheExempt,
// Replication
Reliable,
Unreliable,
Replication,
NativeReplication,
// Control flow
Goto,
If,
Else,
Switch,
Case,
For,
ForEach,
While,
Do,
Until,
Break,
Continue,
Return,
// Built-in types
Int,
Float,
Bool,
Byte,
String,
Array,
Name,
// Literals
True,
False,
None,
SelfValue,
New,
// Vector math operators
Dot,
Cross,
ClockwiseFrom,
}
impl Keyword {
/// Returns `true` if this keyword can be used as an identifier.
#[must_use]
pub const fn is_valid_identifier_name(self) -> bool {
matches!(
self,
// Built-in type words usable as identifiers
Self::Name
| Self::String
| Self::Byte
| Self::Int
| Self::Bool
| Self::Float
| Self::Array
| Self::Delegate
// Context keywords we've directly checked
| Self::Class
| Self::SelfValue
| Self::Default
| Self::Static
| Self::Simulated
| Self::Native
| Self::Latent
| Self::Iterator
| Self::Singular
| Self::Reliable
| Self::Unreliable
| Self::Transient
| Self::Const
| Self::Abstract
| Self::New
| Self::Extends
| Self::Within
| Self::Config
| Self::Out
| Self::Optional
| Self::Local
| Self::Var
| Self::DefaultProperties
| Self::PerObjectConfig
| Self::Object
| Self::Enum
| Self::End
| Self::Event
| Self::Switch
| Self::Goto
| Self::Cross
| Self::CppText
| Self::CppStruct
| Self::HideCategories
| Self::Auto
| Self::For
| Self::Skip
| Self::Placeable
| Self::NotPlaceable
| Self::Instanced
| Self::Function
| Self::State
| Self::Init
| Self::Export
| Self::NoExport
| Self::Dot
| Self::ClockwiseFrom
| Self::Assert
| Self::ExportStructs
| Self::SafeReplace
| Self::Input
| Self::Travel
| Self::Cache
| Self::CacheExempt
| Self::Long
| Self::Continue
)
}
/// Returns `true` if this keyword can appear in type position.
#[must_use]
pub const fn is_valid_type_name(self) -> bool {
matches!(
self,
Self::Int
| Self::Float
| Self::Bool
| Self::Byte
| Self::String
| Self::Array
| Self::Name
| Self::Object
| Self::Function
| Self::State
| Self::Delegate
)
}
/// Returns `true` if this keyword is definitely not a valid first token of
/// an expression.
#[must_use]
pub const fn is_definitely_not_expression_start(self) -> bool {
matches!(self, Self::Else | Self::Case | Self::Until)
}
}

View File

@ -1,48 +1,46 @@
//! Cursor utilities for a token stream.
//!
//! Provides memoized lookahead over significant tokens and attaches
//! trivia to [`TriviaComponent`]. Significant tokens exclude whitespace and
//! comments; see [`crate::parser::TriviaKind`].
//! Provides memoized lookahead over significant tokens and records trivia in
//! [`TriviaIndexBuilder`]. Significant tokens exclude whitespace and comments;
//! see [`parser::TriviaKind`].
use crate::lexer::{Token, TokenLocation};
use crate::parser::trivia::TriviaComponent;
// TODO: need a refactor pass
use std::collections::VecDeque;
// TODO: NO RETURNING EOF
use crate::{
lexer::{self, Keyword, Token, TokenPosition, TokenSpan},
parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder},
};
/// Cursor over a token stream with memoized lookahead and trivia attachment.
#[derive(Clone, Debug)]
pub(crate) struct CursorComponent<'src> {
/// Underlying token stream.
tokens: crate::lexer::Tokens<'src>,
/// Significant-token lookahead buffer.
lookahead_buffer: std::collections::VecDeque<(TokenLocation, crate::lexer::TokenPiece<'src>)>,
/// Location of the last consumed token.
previous_location: Option<TokenLocation>,
/// Location of the last significant token.
///
/// Used to associate following trivia with the correct token.
last_significant_location: Option<TokenLocation>,
/// Scratch space for [`CursorComponent::buffer_next_significant_token`],
/// used to avoid reallocations.
trivia_buffer: Vec<crate::parser::trivia::TriviaToken<'src>>,
pub(crate) struct Cursor<'file, 'src> {
tokens: lexer::Tokens<'file, 'src>,
lookahead_buffer: VecDeque<(TokenPosition, lexer::TokenData<'src>)>,
last_consumed_position: Option<TokenPosition>,
}
impl<'src> CursorComponent<'src> {
/// Create a [`CursorComponent`] over the tokens of `file`.
pub(crate) fn new(tokenized_file: &'src crate::lexer::TokenizedFile<'src>) -> Self {
impl<'file, 'src> Cursor<'file, 'src> {
/// Creates a [`Cursor`] over `tokenized_file`.
pub(crate) const fn new(tokenized_file: &'file lexer::TokenizedFile<'src>) -> Self {
Self {
tokens: tokenized_file.tokens(),
lookahead_buffer: std::collections::VecDeque::new(),
previous_location: None,
last_significant_location: None,
trivia_buffer: Vec::new(),
tokens: tokenized_file.iter(),
lookahead_buffer: VecDeque::new(),
last_consumed_position: None,
}
}
/// Ensure the lookahead buffer contains at least `lookahead + 1`
/// significant tokens.
/// Ensures that the lookahead buffer contains at least `lookahead + 1`
/// significant tokens, if available.
///
/// May consume trivia from the underlying stream.
/// Does not consume significant tokens.
fn ensure_min_lookahead(&mut self, lookahead: usize, trivia: &mut TriviaComponent<'src>) {
/// May consume trivia from the underlying stream without consuming
/// significant tokens.
fn ensure_lookahead_available(
&mut self,
lookahead: usize,
trivia: &mut TriviaIndexBuilder<'src>,
) {
while self.lookahead_buffer.len() <= lookahead {
if !self.buffer_next_significant_token(trivia) {
break;
@ -50,181 +48,368 @@ impl<'src> CursorComponent<'src> {
}
}
/// Scan to the next significant token, recording intervening trivia.
/// Buffers the next significant token and records any preceding trivia.
///
/// Returns `true` if a significant token was buffered,
/// `false` on end of file.
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaComponent<'src>) -> bool {
self.trivia_buffer.clear();
while let Some((token_location, token_piece)) = self.tokens.next() {
if let Ok(trivia_kind) = crate::parser::TriviaKind::try_from(token_piece.token) {
self.trivia_buffer.push(crate::parser::TriviaToken {
/// Returns `true` if a significant token was buffered, or `false` if the
/// stream is exhausted.
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaIndexBuilder<'src>) -> bool {
for (token_position, token_data) in self.tokens.by_ref() {
if let Ok(trivia_kind) = parser::TriviaKind::try_from(token_data.token) {
trivia.record_trivia(parser::TriviaToken {
kind: trivia_kind,
text: token_piece.lexeme,
location: token_location,
text: token_data.lexeme,
position: token_position,
});
} else {
// Attach trivia found after the previous significant token
if !self.trivia_buffer.is_empty() {
trivia.record_between_locations(
self.last_significant_location,
token_location,
&mut self.trivia_buffer,
);
}
trivia.record_significant_token(token_position);
self.lookahead_buffer
.push_back((token_location, token_piece));
self.last_significant_location = Some(token_location);
.push_back((token_position, token_data));
return true;
}
}
// Reached end-of-file: attach trailing trivia
if !self.trivia_buffer.is_empty() {
trivia.record_between_locations(
self.last_significant_location,
TokenLocation::EndOfFile,
&mut self.trivia_buffer,
);
}
false
}
}
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Returns the next token without consuming it.
impl<'src, 'arena> Parser<'src, 'arena> {
fn peek_buffered_token(&mut self) -> Option<&(TokenPosition, lexer::TokenData<'src>)> {
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
self.cursor.lookahead_buffer.front()
}
/// Returns the next significant token without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token(&mut self) -> Option<Token> {
self.peek_entry().map(|(_, token_piece)| token_piece.token)
self.peek_buffered_token()
.map(|(_, token_data)| token_data.token)
}
/// Returns the next token, its lexeme, and its location
/// without consuming it.
/// Returns the next keyword without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain or if the next token is not
/// a keyword.
#[must_use]
pub(crate) fn peek_keyword(&mut self) -> Option<Keyword> {
match self.peek_token() {
Some(Token::Keyword(keyword)) => Some(keyword),
_ => None,
}
}
/// Returns the position of the next significant token without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_lexeme_and_location(
&mut self,
) -> Option<(Token, &'src str, TokenLocation)> {
self.peek_entry().map(|(token_location, token_piece)| {
(token_piece.token, token_piece.lexeme, *token_location)
})
pub(crate) fn peek_position(&mut self) -> Option<TokenPosition> {
self.peek_buffered_token()
.map(|(token_position, _)| *token_position)
}
/// Returns the next token and its lexeme without consuming it.
#[must_use]
pub(crate) fn peek_position_or_eof(&mut self) -> TokenPosition {
self.peek_buffered_token()
.map(|(token_position, _)| *token_position)
.unwrap_or_else(|| self.file.eof())
}
/// Returns the next significant token and its lexeme without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> {
self.peek_entry()
.map(|(_, token_piece)| (token_piece.token, token_piece.lexeme))
self.peek_buffered_token()
.map(|(_, token_data)| (token_data.token, token_data.lexeme))
}
/// Returns the next token and its location without consuming it.
/// Returns the next significant token and its position without consuming
/// it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_and_location(&mut self) -> Option<(Token, TokenLocation)> {
self.peek_entry()
.map(|(token_location, token_piece)| (token_piece.token, *token_location))
pub(crate) fn peek_token_and_position(&mut self) -> Option<(Token, TokenPosition)> {
self.peek_buffered_token()
.map(|(token_position, token_data)| (token_data.token, *token_position))
}
/// Returns the location of the next token, or [`TokenLocation::EndOfFile`]
/// if none remain.
#[must_use]
pub(crate) fn peek_location(&mut self) -> TokenLocation {
self.peek_entry()
.map(|(token_location, _)| *token_location)
.unwrap_or(TokenLocation::EndOfFile)
}
/// Returns the location of the last token that was actually consumed
/// by [`crate::parser::Parser::advance`].
/// Returns the next keyword and its position without consuming it.
///
/// Returns [`None`] if no tokens have been consumed yet.
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if next token isn't keyword or no tokens remain.
#[must_use]
pub(crate) fn last_consumed_location(&self) -> Option<TokenLocation> {
self.cursor.previous_location
pub(crate) fn peek_keyword_and_position(&mut self) -> Option<(Keyword, TokenPosition)> {
let Some((Token::Keyword(keyword), keyword_position)) = self.peek_token_and_position()
else {
return None;
};
Some((keyword, keyword_position))
}
/// Returns the most recent location the parser is "at".
/// Returns the next significant token, its lexeme, and its position
/// without consuming them.
///
/// If at least one token has been consumed, this is the location of the
/// last consumed token. Otherwise it falls back to the location of the
/// first significant token in the stream (or [`TokenLocation::EndOfFile`]
/// if the stream is empty).
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn last_visited_location(&mut self) -> TokenLocation {
// Only has to `unwrap` before *any* characters were consumed
self.last_consumed_location()
.unwrap_or_else(|| self.peek_location())
pub(crate) fn peek_token_lexeme_and_position(
&mut self,
) -> Option<(Token, &'src str, TokenPosition)> {
self.peek_buffered_token()
.map(|(token_position, token_data)| {
(token_data.token, token_data.lexeme, *token_position)
})
}
/// Peeks the token at `lookahead` (`0` is the next token)
/// without consuming.
/// Returns the next significant token at `lookahead` without consuming it.
///
/// Returns `None` if the stream ends before that position.
/// `lookahead` counts significant tokens, with `0` referring to the next
/// significant token.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> {
self.cursor
.ensure_min_lookahead(lookahead, &mut self.trivia);
.ensure_lookahead_available(lookahead, &mut self.trivia);
self.cursor
.lookahead_buffer
.get(lookahead)
.map(|(_, token_piece)| token_piece.token)
.map(|(_, token_data)| token_data.token)
}
/// Returns the keyword at `lookahead` without consuming it.
///
/// `lookahead` counts significant tokens, with `0` referring to the next
/// significant token.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if the token at that position is not a keyword or if
/// the stream ends before that position.
#[must_use]
pub(crate) fn peek_keyword_at(&mut self, lookahead: usize) -> Option<Keyword> {
match self.peek_token_at(lookahead) {
Some(Token::Keyword(keyword)) => Some(keyword),
_ => None,
}
}
/// Returns the position of the next significant token without consuming it.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
self.peek_position()
.ok_or_else(|| self.make_error_at(error_kind, self.file.eof()))
}
/// Returns the next significant token and its position without consuming
/// it.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_token_and_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, (Token, TokenPosition)> {
self.peek_token_and_position()
.ok_or_else(|| self.make_error_at(error_kind, self.file.eof()))
}
/// Returns the next significant token, its lexeme, and its position
/// without consuming them.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_token_lexeme_and_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> {
self.peek_token_lexeme_and_position()
.ok_or_else(|| self.make_error_at(error_kind, self.file.eof()))
}
/// Advances by one significant token.
///
/// Trivia is internally handled and recorded.
/// Does nothing at the end-of-file.
pub(crate) fn advance(&mut self) {
self.cursor.ensure_min_lookahead(0, &mut self.trivia);
if let Some((location, _)) = self.cursor.lookahead_buffer.pop_front() {
self.cursor.previous_location = Some(location);
/// Records any skipped trivia and returns the consumed token position.
/// Returns [`None`] if no significant tokens remain.
pub(crate) fn advance(&mut self) -> Option<TokenPosition> {
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
if let Some((token_position, _)) = self.cursor.lookahead_buffer.pop_front() {
self.cursor.last_consumed_position = Some(token_position);
Some(token_position)
} else {
None
}
}
/// If the next token equals `token`, consumes it and returns `true`.
/// If the next significant token equals `token`, consumes it and
/// returns `true`.
///
/// Otherwise leaves the cursor unchanged and returns `false`.
/// Trivia is recorded automatically.
#[must_use]
pub(crate) fn eat(&mut self, token: Token) -> bool {
let correct_token = self.peek_token() == Some(token);
if correct_token {
if self.peek_token() == Some(token) {
self.advance();
true
} else {
false
}
correct_token
}
/// Centralized peek used by public peekers.
fn peek_entry(&mut self) -> Option<&(TokenLocation, crate::lexer::TokenPiece<'src>)> {
self.cursor.ensure_min_lookahead(0, &mut self.trivia);
self.cursor.lookahead_buffer.front()
}
/// Expects `expected` at the current position.
/// If the next significant token corresponds to the given keyword,
/// consumes it and returns `true`.
///
/// On match consumes the token and returns its [`TokenLocation`].
/// Otherwise returns a [`crate::parser::ParseError`] of
/// the given [`crate::parser::ParseErrorKind`] that carries the current
/// span for diagnostics.
/// Otherwise leaves the cursor unchanged and returns `false`.
#[must_use]
pub(crate) fn eat_keyword(&mut self, keyword: Keyword) -> bool {
self.eat(Token::Keyword(keyword))
}
#[must_use]
pub(crate) fn eat_with_position(&mut self, token: Token) -> Option<TokenPosition> {
if let Some((next_token, next_token_position)) = self.peek_token_and_position()
&& next_token == token
{
self.advance();
Some(next_token_position)
} else {
None
}
}
/// Expects `expected` token as the next significant one.
///
/// On match consumes the token and returns its [`TokenPosition`].
/// Otherwise returns an error of `error_kind` anchored at
/// the current token, or at the last consumed token if the stream is
/// exhausted. That error also gets set a blame span that contains exactly
/// that anchor point.
pub(crate) fn expect(
&mut self,
expected: Token,
error_kind: crate::parser::ParseErrorKind,
) -> crate::parser::ParseResult<'src, 'arena, TokenLocation> {
let token_position = self.peek_location();
// `Token` only includes type information, so comparison is valid
if self.peek_token() == Some(expected) {
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
// `Token` equality is enough here because lexeme and position
// are stored separately.
if let Some((token, token_position)) = self.peek_token_and_position()
&& token == expected
{
self.advance();
Ok(token_position)
} else {
Err(crate::parser::ParseError {
kind: error_kind,
source_span: crate::ast::AstSpan::new(token_position),
})
let anchor = self.peek_position().unwrap_or_else(|| self.file.eof());
Err(self
.make_error_at(error_kind, anchor)
.blame(TokenSpan::new(anchor)))
}
}
/// Expects `expected` keyword as the next significant token.
///
/// On match consumes the keyword and returns its [`TokenPosition`].
/// Otherwise returns an error of `error_kind` anchored at the current
/// token, or at the last consumed token if the stream is exhausted.
pub(crate) fn expect_keyword(
&mut self,
expected: Keyword,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
self.expect(Token::Keyword(expected), error_kind)
}
/// Returns position of the last significant token that was actually
/// consumed by [`parser::Parser::advance`].
///
/// Returns [`None`] if no tokens have been consumed yet.
#[must_use]
pub(crate) const fn last_consumed_position(&self) -> Option<TokenPosition> {
self.cursor.last_consumed_position
}
/// Returns the position of the last significant token consumed by
/// [`parser::Parser::advance`], or the start of the stream if no token has
/// been consumed yet.
///
/// Useful when diagnostics need a stable anchor even at the beginning of
/// input.
#[must_use]
pub(crate) fn last_consumed_position_or_start(&self) -> TokenPosition {
self.cursor
.last_consumed_position
.unwrap_or(TokenPosition(0))
}
/// Ensures that parsing has advanced past `old_position`.
// TODO: must be given peeked value!
///
/// This is intended as a safeguard against infinite-loop bugs while
/// recovering from invalid input. In debug builds it asserts that progress
/// was made; in release builds it consumes one significant token when
/// the parser stalls.
#[track_caller]
pub(crate) fn ensure_forward_progress(&mut self, old_position: TokenPosition) {
if let Some(peeked_position) = self.peek_position() {
debug_assert!(
peeked_position > old_position,
"parser made no forward progress"
);
if peeked_position <= old_position {
self.advance();
}
if self.file.is_eof(&old_position) {
panic!("parsing stuck at the eof");
}
}
}
/// Returns the first significant token after `position`, without consuming it.
///
/// This buffers through the cursor, so trivia is still recorded normally and
/// insignificant tokens are skipped consistently with the rest of the parser.
///
/// Returns `None` if the stream ends before a later significant token is found.
#[must_use]
pub(crate) fn peek_token_after_position(&mut self, position: TokenPosition) -> Option<Token> {
let mut lookahead = 0usize;
loop {
self.cursor
.ensure_lookahead_available(lookahead, &mut self.trivia);
let Some((token_position, token_data)) = self.cursor.lookahead_buffer.get(lookahead)
else {
return None;
};
if *token_position > position {
return Some(token_data.token);
}
lookahead += 1;
}
}
}

View File

@ -1,6 +1,8 @@
//! Submodule with parsing related errors.
use crate::ast::AstSpan;
use std::collections::HashMap;
use crate::{lexer::TokenPosition, lexer::TokenSpan};
/// Internal parse error kinds.
///
@ -14,37 +16,107 @@ use crate::ast::AstSpan;
/// `UnexpectedToken`, `MultipleDefaults`, etc.).
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum ParseErrorKind {
/// Expression inside `(...)` could not be parsed and no closing `)`
/// was found.
ExpressionMissingClosingParenthesis,
/// A `do` block was not followed by a matching `until`.
/// P0001
ParenthesizedExpressionInvalidStart,
/// P0002
ExpressionExpected,
/// P0003
ParenthesizedExpressionMissingClosingParenthesis,
/// P0004
ClassTypeMissingTypeArgument,
/// P0005
ClassTypeExpectedQualifiedTypeName,
/// P0006
ClassTypeInvalidStart,
/// P0007
ClassTypeMissingClosingAngleBracket,
/// P0008
NewMissingClassSpecifier,
/// P0009
NewTooManyArguments,
/// P0010
NewMissingClosingParenthesis,
/// P0011
NewArgumentMissingComma,
/// P0012
ConditionExpected,
/// P0013
ControlFlowBodyExpected,
/// P0014
DoMissingUntil,
/// P0015
ForEachIteratorExpressionExpected,
/// P0016
ForLoopHeaderInitializerInvalidStart,
/// P0017
ForLoopHeaderMissingSemicolonAfterInitializer,
/// P0018
ForLoopHeaderConditionInvalidStart,
/// P0019
ForLoopHeaderMissingSemicolonAfterCondition,
/// P0020
ForLoopHeaderStepInvalidStart,
/// P0021
ForLoopHeaderMissingClosingParenthesis,
/// P0022
ReturnValueInvalidStart,
/// P0023
BreakValueInvalidStart,
/// P0024
GotoMissingLabel,
/// P0025
BlockMissingSemicolonAfterExpression,
/// P0026
BlockMissingClosingBrace,
/// P0027
BlockExpectedItem,
/// P0028
MemberAccessMissingMemberName,
/// P0029
IndexMissingExpression,
/// P0030
IndexMissingClosingBracket,
/// P0031
FunctionCallArgumentMissingComma,
/// P0032
FunctionCallMissingClosingParenthesis,
/// P0033
FunctionCallUnexpectedTokenInArgumentList,
/// P0034
SwitchMissingBody,
/// P0035
SwitchTopLevelItemNotCase,
/// P0036
SwitchCaseMissingColon,
/// P0037
SwitchDefaultMissingColon,
/// P0038
SwitchDuplicateDefault,
/// P0039
SwitchCasesAfterDefault,
/// P0040
SwitchMissingClosingBrace,
/// P0041
SwitchCaseMissingExpression,
/// P0042
SwitchCaseExpressionInvalidStart,
// ================== Old errors to be thrown away! ==================
/// Found an unexpected token while parsing an expression.
ExpressionUnexpectedToken,
/// A `for` loop is missing its opening `(`.
ForMissingOpeningParenthesis,
/// The first `;` in `for (init; cond; step)` is missing.
ForMissingInitializationSemicolon,
/// The second `;` in `for (init; cond; step)` is missing.
ForMissingConditionSemicolon,
/// The closing `)` of a `for` loop is missing.
ForMissingClosingParenthesis,
/// An expression inside a block is not terminated with `;`.
BlockMissingSemicolonAfterExpression,
/// A statement inside a block is not terminated with `;`.
DeclEmptyVariableDeclarations,
DeclNoSeparatorBetweenVariableDeclarations,
DeclExpectedRightBracketAfterArraySize,
DeclExpectedCommaAfterVariableDeclarator,
TypeSpecExpectedType,
TypeSpecInvalidNamedTypeName,
TypeSpecArrayMissingOpeningAngle,
TypeSpecArrayMissingInnerType,
TypeSpecArrayMissingClosingAngle,
TypeSpecClassMissingInnerType,
TypeSpecClassMissingClosingAngle,
BlockMissingSemicolonAfterStatement,
/// `switch` has no body (missing matching braces).
SwitchMissingBody,
/// The first top-level item in a `switch` body is not a `case`.
SwitchTopLevelItemNotCase,
/// A `case` arm is missing the trailing `:`.
SwitchCaseMissingColon,
/// Found more than one `default` branch.
SwitchDuplicateDefault,
/// Found `case` arms after a `default` branch.
SwitchCasesAfterDefault,
/// A `goto` was not followed by a label.
GotoMissingLabel,
/// Unexpected end of input while parsing.
UnexpectedEndOfFile,
/// Token looked like a numeric literal but could not be parsed as one.
@ -70,26 +142,222 @@ pub enum ParseErrorKind {
DeclMissingIdentifier,
/// Invalid variable name identifier in non-`local` variable definition.
DeclBadVariableIdentifier,
/// Found an unexpected token while parsing a declaration literal.
///
/// Expected one of: integer, float, string, `true`, `false`, `none`
/// or an identifier.
DeclarationLiteralUnexpectedToken,
/// A class name was expected, but the current token is not an identifier.
///
/// Emitted when parsing `class Foo` and the token after `class` is not an
/// identifier (so its string value cannot be extracted).
ClassNameNotIdentifier,
/// A parent class name after `extends` was expected, but the token is not
/// an identifier.
///
/// Emitted when parsing `class Foo extends Bar` and the token after
/// `extends` is not an identifier.
ClassParentNameNotIdentifier,
/// A class declaration was not terminated with `;`.
///
/// Emitted when the parser reaches the end of a class definition but
/// does not encounter the required semicolon.
ClassMissingSemicolon,
/// An identifier was expected inside optional parentheses, but the token
/// is not an identifier.
///
/// Emitted by helpers that parse either `(<Ident>)` or bare `<Ident>`.
ParenthesisedIdentifierNameNotIdentifier,
/// A `(` was seen before an identifier, but the matching `)` was not found.
///
/// Emitted when parsing a parenthesised identifier like `(Foo)`.
ParenthesisedIdentifierMissingClosingParenthesis,
/// `HideCategories` is missing the opening `(` before the category list.
///
/// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`.
HideCategoriesMissingOpeningParenthesis,
/// `HideCategories` is missing the closing `)` after the category list.
HideCategoriesMissingClosingParenthesis,
/// `HideCategories` is missing the opening `(` before the category list.
///
/// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`.
ShowCategoriesMissingOpeningParenthesis,
/// `HideCategories` is missing the closing `)` after the category list.
ShowCategoriesMissingClosingParenthesis,
/// `Within` must be followed by a class or package name identifier.
///
/// Example: `Within(MyOuterClass)`.
WithinNameNotIdentifier,
/// `operator` modifier is missing the opening `(` before
/// the precedence rank.
///
/// Expected syntax: `operator(<integer>)`.
OperatorMissingOpeningParenthesis,
/// `operator(<...>)` must contain an integer literal precedence rank.
///
/// Emitted when the token inside parentheses is not an integer literal.
OperatorPrecedenceNotIntegerLiteral,
/// `operator(<integer>` is missing the closing `)`.
OperatorMissingClosingParenthesis,
ParamInvalidTypeName,
ParamMissingIdentifier,
FunctionReturnTypeNotTypeName,
FunctionNameNotIdentifier,
FunctionParamsMissingOpeningParenthesis,
FunctionParamsMissingClosingParenthesis,
ClassUnexpectedItem,
EnumMissingLeftBrace,
EnumBadVariant,
StructFieldMissingName,
StructFieldMissingSemicolon,
StructMissingRightBrace,
// Named enum/struct typedefs
EnumMissingKeyword, // class member: expected `enum`
EnumExpectedNameOrBrace, // after `enum`, expected identifier
EnumNoClosingBrace,
EnumEmptyVariants,
EnumNoSeparatorBetweenVariants,
EnumMissingLBrace,
StructMissingKeyword, // class member: expected `struct`
StructExpectedNameOrBrace, // after `struct`, expected identifier
StructExpectedExtendsOrBrace,
StructMissingLeftBrace,
StructExpectedBaseName,
StructBodyUnexpectedItem,
CppDirectiveMissingCppBlock,
// var(...) field decls
VarMissingKeyword, // class member: expected `var`
VarSpecsMissingOpeningParenthesis, // after `var`, expected '('
VarSpecNotIdentifier, // inside var(...), expected identifier
VarSpecsMissingClosingParenthesis, // var(...) missing ')'
// Generic decl end
DeclMissingSemicolon, // class-level declaration missing `;`
// --- Replication ---
ReplicationMissingReliability,
ReplicationIfMissingOpeningParenthesis,
ReplicationIfMissingClosingParenthesis,
ReplicationMemberNotIdentifier,
ReplicationMemberMissingClosingParenthesis,
ReplicationRuleMissingSemicolon,
ReplicationMissingKeyword,
ReplicationMissingLBrace,
ReplicationMissingRBrace,
// --- DefaultProperties ---
DefaultPropPathExpectedIdentifier,
DefaultPropIndexNotIntegerLiteral,
DefaultPropIndexMissingClosingParenthesis,
DefaultPropAssignMissingEq,
DefaultPropsMissingKeyword,
DefaultPropsMissingLBrace,
DefaultPropsMissingRBrace,
// --- Begin/End Object headers ---
ObjectBeginMissingKeyword,
ObjectMissingKeyword,
ObjectHeaderKeyNotIdentifier,
ObjectHeaderMissingEq,
// --- State / ignores ---
IgnoresItemNotIdentifier,
IgnoresMissingSemicolon,
StateMissingKeyword,
StateNameNotIdentifier,
StateParentNameNotIdentifier,
StateMissingLBrace,
StateMissingRBrace,
ClassMissingKeyword,
TypeMissingLT,
TypeMissingGT,
StateParensMissingRParen,
BadTypeInClassTypeDeclaration,
IdentifierExpected,
// --- Generic list diagnostics (comma-separated, closed by `)`) ---
/// Saw `)` immediately after `(`, or closed the list without any items.
/// Use when a construct requires at least one item: e.g. `HideCategories(...)`.
ListEmpty,
/// Parser was positioned where an item was required but found neither an
/// item nor a terminator. Typical triggers:
/// - Leading comma: `(, Foo)`
/// - Double comma: `(Foo,, Bar)`
/// - Garbage in place of an item: `(@@, Foo)`
///
/// Recovery: skip to next comma or `)`.
ListMissingIdentifierBeforeSeparator,
/// Parser was positioned where an item was required but found neither an
/// item nor a terminator. Typical triggers:
/// - Leading comma: `(, Foo)`
/// - Double comma: `(Foo,, Bar)`
/// - Garbage in place of an item: `(@@, Foo)`
///
/// Recovery: skip to next comma or `)`.
ListInvalidIdentifier,
/// Two items without a comma (or some token after an item where a comma
/// was required). Typical triggers:
/// - Adjacent identifiers: `(Foo Bar)`
/// - Token after an item where only `,` or `)` are valid.
///
/// Recovery: behave as if a comma were present; continue with the next item.
ListMissingSeparator,
/// Comma directly before `)`: `(Foo, )`.
/// Treat as a soft error or warning, depending on your policy.
ListTrailingSeparator,
FunctionArgumentMissingComma,
// Expression was required, but none started
MissingExpression,
CallableExpectedHeader,
CallableExpectedKind,
CallableOperatorInvalidPrecedence,
CallableMissingBodyOrSemicolon,
CallableNameNotIdentifier,
CallablePrefixOperatorInvalidSymbol,
CallableInfixOperatorInvalidSymbol,
CallablePostfixOperatorInvalidSymbol,
CallableParamsMissingOpeningParenthesis,
CallableParamsMissingClosingParenthesis,
NativeModifierIdNotIntegerLiteral,
}
/// Enumerates all specific kinds of parsing errors that the parser can emit.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[must_use]
pub struct ParseError {
/// The specific kind of parse error that occurred.
pub kind: ParseErrorKind,
pub anchor: TokenPosition,
/// Where the user should look first.
pub blame_span: TokenSpan,
/// The source span in which the error was detected.
pub source_span: AstSpan,
pub covered_span: TokenSpan,
pub related_spans: HashMap<String, TokenSpan>,
}
pub type ParseResult<'src, 'arena, T> = Result<T, ParseError>;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
#[must_use]
pub(crate) fn make_error_here(&mut self, error_kind: ParseErrorKind) -> ParseError {
impl crate::parser::Parser<'_, '_> {
pub(crate) fn make_error_at_last_consumed(&self, error_kind: ParseErrorKind) -> ParseError {
self.make_error_at(error_kind, self.last_consumed_position_or_start())
}
pub(crate) fn make_error_at(
&self,
error_kind: ParseErrorKind,
position: TokenPosition,
) -> ParseError {
ParseError {
kind: error_kind,
source_span: AstSpan::new(self.peek_location()),
anchor: position,
blame_span: TokenSpan::new(position),
covered_span: TokenSpan::new(position),
related_spans: HashMap::new(),
}
}
}

View File

@ -1,60 +0,0 @@
use crate::ast::Expression;
use crate::lexer::Token;
use crate::parser::ParseErrorKind;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a block `{ ... }` after `{`.
///
/// Consumes tokens until the matching `}` and returns
/// an [`Expression::Block`] spanning from the opening `{` to
/// the closing `}`.
/// Returns a best-effort block on premature end-of-file.
#[must_use]
pub(crate) fn parse_block_cont(
&mut self,
block_start_location: crate::lexer::TokenLocation,
) -> crate::ast::ExpressionRef<'src, 'arena> {
let mut statements = self.arena.vec();
let mut tail = None;
loop {
let Some((token, token_location)) = self.peek_token_and_location() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
return self.arena.alloc(
Expression::Block { statements, tail },
crate::ast::AstSpan {
from: block_start_location,
to: self.peek_location(),
},
);
};
if let Token::RightBrace = token {
self.advance(); // '}'
let block_span = crate::ast::AstSpan {
from: block_start_location,
to: token_location,
};
return self
.arena
.alloc(Expression::Block { statements, tail }, block_span);
}
// We know that at this point:
// 1. There is still a token and it is not end-of-file;
// 2. It isn't end of the block.
// So having a tail statement there is a problem!
if let Some(tail_expression) = tail {
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
let tail_span = *tail_expression.span();
let node = self.arena.alloc(
crate::ast::Statement::Expression(tail_expression),
tail_span,
);
statements.push(node);
}
tail = self.parse_block_item(&mut statements);
// Ensure forward progress under errors to avoid infinite loops.
if self.peek_location() <= token_location {
self.advance();
}
}
}
}

View File

@ -0,0 +1,959 @@
// rottlib/src/parser/grammar/class.rs
#![allow(clippy::all, clippy::pedantic, clippy::nursery)]
use crate::ast::{
BlockBody, ClassConstDecl, ClassConstDeclRef, ClassDeclaration, ClassDefinition, ClassMember,
ClassModifier, ClassModifierRef, ClassVarDecl, ClassVarDeclRef, DeclarationLiteral,
DeclarationLiteralRef, ExecDirective, ExecDirectiveRef, ExpressionRef, IdentifierToken,
Reliability, ReplicationBlock, ReplicationBlockRef, ReplicationRule, ReplicationRuleRef,
StateDecl, StateDeclRef, StateModifier, VariableDeclarator, VariableDeclaratorRef,
};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt, SyncLevel};
use std::collections::HashMap;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
#[inline]
pub fn ensure_progress_or_break(&mut self, before: TokenPosition) -> bool {
match self.peek_position() {
Some(position) if position > before => true,
_ => self.advance().is_some(),
}
}
fn parse_exec_directive(&mut self) -> ParseResult<'src, 'arena, ExecDirectiveRef<'arena>> {
let (token, lexeme, start_position) =
self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?;
debug_assert!(matches!(token, Token::ExecDirective));
let trimmed = lexeme.trim_end_matches(['\r', '\n']);
self.advance();
let span = TokenSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
ExecDirective {
text: self.arena.string(trimmed),
span,
},
span,
))
}
fn parse_parenthesised_identifier(&mut self) -> ParseResult<'src, 'arena, IdentifierToken> {
let has_opening_parenthesis = self.eat(Token::LeftParenthesis);
let identifier =
self.parse_identifier(ParseErrorKind::ParenthesisedIdentifierNameNotIdentifier)?;
if has_opening_parenthesis {
self.expect(
Token::RightParenthesis,
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
}
Ok(identifier)
}
#[inline]
fn lookahead_state_after_mods(&mut self) -> bool {
let mut lookahead = 0;
loop {
match self.peek_keyword_at(lookahead) {
Some(Keyword::Auto | Keyword::Simulated) => {
lookahead += 1;
}
Some(Keyword::State) => return true,
_ => return false,
}
}
}
pub fn parse_array_len_expr(
&mut self,
) -> ParseResult<'src, 'arena, Option<ExpressionRef<'src, 'arena>>> {
if !self.eat(Token::LeftBracket) {
return Ok(None);
}
let expression = self.parse_expression();
self.expect(
Token::RightBracket,
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseBracket)?;
Ok(Some(expression))
}
pub fn parse_class_declaration_modifier(
&mut self,
) -> ParseResult<'src, 'arena, Option<ClassModifierRef<'arena>>> {
use ClassModifier::{
Abstract, CacheExempt, CollapseCategories, Config, Const, DependsOn, Deprecated,
DontCollapseCategories, DynamicRecompile, EditConst, EditInline, EditInlineNew, Export,
ExportStructs, Final, GlobalConfig, HideCategories, HideDropdown, Instanced, Localized,
Native, NativeReplication, NoExport, NotEditInlineNew, NotPlaceable, ParseConfig,
PerObjectConfig, Placeable, Private, Protected, Public, SafeReplace, ShowCategories,
Static, Transient, Within,
};
let Some((token, modifier_position)) = self.peek_token_and_position() else {
return Err(self.make_error_at_last_consumed(ParseErrorKind::UnexpectedEndOfFile));
};
let mut consumed_inside_match = false;
let mut span = TokenSpan::new(modifier_position);
let modifier = match token {
Token::Keyword(Keyword::Final) => Final,
Token::Keyword(Keyword::Native) => Native,
Token::Keyword(Keyword::Abstract) => Abstract,
Token::Keyword(Keyword::Transient) => Transient,
Token::Keyword(Keyword::Public) => Public,
Token::Keyword(Keyword::Protected) => Protected,
Token::Keyword(Keyword::Private) => Private,
Token::Keyword(Keyword::Static) => Static,
Token::Keyword(Keyword::Const) => Const,
Token::Keyword(Keyword::Deprecated) => Deprecated,
Token::Keyword(Keyword::NoExport) => NoExport,
Token::Keyword(Keyword::Export) => Export,
Token::Keyword(Keyword::Config) => {
self.advance();
consumed_inside_match = true;
let value = if self.peek_token() == Some(Token::LeftParenthesis) {
Some(self.parse_parenthesised_identifier()?)
} else {
None
};
Config(value)
}
Token::Keyword(Keyword::Localized) => Localized,
Token::Keyword(Keyword::Placeable) => Placeable,
Token::Keyword(Keyword::NotPlaceable) => NotPlaceable,
Token::Keyword(Keyword::Instanced) => Instanced,
Token::Keyword(Keyword::EditConst) => EditConst,
Token::Keyword(Keyword::EditInline) => EditInline,
Token::Keyword(Keyword::EditInlineNew) => EditInlineNew,
Token::Keyword(Keyword::NotEditInlineNew) => NotEditInlineNew,
Token::Keyword(Keyword::CollapseCategories) => CollapseCategories,
Token::Keyword(Keyword::DontCollapseCategories) => DontCollapseCategories,
Token::Keyword(Keyword::GlobalConfig) => GlobalConfig,
Token::Keyword(Keyword::PerObjectConfig) => PerObjectConfig,
Token::Keyword(Keyword::DynamicRecompile) => DynamicRecompile,
Token::Keyword(Keyword::CacheExempt) => CacheExempt,
Token::Keyword(Keyword::HideDropdown) => HideDropdown,
Token::Keyword(Keyword::ParseConfig) => ParseConfig,
Token::Keyword(Keyword::NativeReplication) => NativeReplication,
Token::Keyword(Keyword::ExportStructs) => ExportStructs,
Token::Keyword(Keyword::SafeReplace) => SafeReplace,
Token::Keyword(Keyword::HideCategories) => {
self.advance();
consumed_inside_match = true;
self.expect(
Token::LeftParenthesis,
ParseErrorKind::HideCategoriesMissingOpeningParenthesis,
)?;
let categories = self.parse_identifier_list();
self.expect(
Token::RightParenthesis,
ParseErrorKind::HideCategoriesMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
HideCategories(categories)
}
Token::Keyword(Keyword::ShowCategories) => {
self.advance();
consumed_inside_match = true;
self.expect(
Token::LeftParenthesis,
ParseErrorKind::ShowCategoriesMissingOpeningParenthesis,
)?;
let categories = self.parse_identifier_list();
self.expect(
Token::RightParenthesis,
ParseErrorKind::ShowCategoriesMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
ShowCategories(categories)
}
Token::Keyword(Keyword::Within) => {
self.advance();
consumed_inside_match = true;
Within(self.parse_identifier(ParseErrorKind::WithinNameNotIdentifier)?)
}
Token::Keyword(Keyword::DependsOn) => {
self.advance();
consumed_inside_match = true;
DependsOn(self.parse_parenthesised_identifier()?)
}
_ => return Ok(None),
};
if !consumed_inside_match {
self.advance();
}
span.extend_to(self.last_consumed_position_or_start());
Ok(Some(self.arena.alloc_node(modifier, span)))
}
pub(crate) fn parse_class_header_cont(
&mut self,
) -> ParseResult<'src, 'arena, ClassDeclaration<'arena>>
where
'src: 'arena,
{
let class_name = self.parse_identifier(ParseErrorKind::ClassNameNotIdentifier)?;
let parent_class_name = if self.eat_keyword(Keyword::Extends) {
let qualified_parent =
self.parse_qualified_identifier(ParseErrorKind::ClassParentNameNotIdentifier)?;
Some(qualified_parent)
} else {
None
};
let mut modifiers = Vec::new();
loop {
match self.parse_class_declaration_modifier() {
Ok(Some(next_modifier)) => modifiers.push(next_modifier),
Ok(None) => break,
Err(error) => {
self.report_error(error);
break;
}
}
}
self.expect(Token::Semicolon, ParseErrorKind::ClassMissingSemicolon)?;
Ok(ClassDeclaration {
name: class_name,
parent: parent_class_name.map(|identifier| identifier.head()),
modifiers,
})
}
fn parse_class_var_decl(&mut self) -> ParseResult<'src, 'arena, ClassVarDeclRef<'src, 'arena>> {
let start_position = self.expect(
Token::Keyword(Keyword::Var),
ParseErrorKind::VarMissingKeyword,
)?;
let paren_specs = self.parse_var_editor_specifier_list();
let modifiers = self.parse_var_declaration_modifiers();
let type_spec = self.parse_type_specifier()?;
let declarators = self.parse_class_var_declarators();
self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?;
let span = TokenSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
ClassVarDecl {
paren_specs,
modifiers,
type_spec,
declarators,
span,
},
span,
))
}
fn parse_replication_rule(
&mut self,
) -> ParseResult<'src, 'arena, ReplicationRuleRef<'src, 'arena>> {
let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?;
let reliability = match self.peek_token() {
Some(Token::Keyword(Keyword::Reliable)) => {
self.advance();
Reliability::Reliable
}
Some(Token::Keyword(Keyword::Unreliable)) => {
self.advance();
Reliability::Unreliable
}
_ => return Err(self.make_error_at_last_consumed(ParseErrorKind::ReplicationMissingReliability)),
};
let condition = if self.eat_keyword(Keyword::If) {
self.expect(
Token::LeftParenthesis,
ParseErrorKind::ReplicationIfMissingOpeningParenthesis,
)?;
let expression = self.parse_expression();
self.expect(
Token::RightParenthesis,
ParseErrorKind::ReplicationIfMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
Some(expression)
} else {
None
};
let mut members = self.arena.vec();
loop {
let identifier =
self.parse_identifier(ParseErrorKind::ReplicationMemberNotIdentifier)?;
members.push(identifier);
if self.eat(Token::LeftParenthesis) {
self.expect(
Token::RightParenthesis,
ParseErrorKind::ReplicationMemberMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
}
if !self.eat(Token::Comma) {
break;
}
}
self.expect(
Token::Semicolon,
ParseErrorKind::ReplicationRuleMissingSemicolon,
)?;
let span = TokenSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
ReplicationRule {
reliability,
condition,
members,
span,
},
span,
))
}
fn parse_replication_block(
&mut self,
) -> ParseResult<'src, 'arena, ReplicationBlockRef<'src, 'arena>> {
let start_position = self.expect(
Token::Keyword(Keyword::Replication),
ParseErrorKind::ReplicationMissingKeyword,
)?;
self.expect(Token::LeftBrace, ParseErrorKind::ReplicationMissingLBrace)?;
let mut rules = self.arena.vec();
while !matches!(self.peek_token(), Some(Token::RightBrace)) {
let loop_start = self
.peek_position()
.unwrap_or_else(|| self.last_consumed_position_or_start());
if self.peek_token().is_none() {
return Err(self.make_error_at_last_consumed(ParseErrorKind::UnexpectedEndOfFile));
}
match self.parse_replication_rule() {
Ok(rule) => rules.push(rule),
Err(error) => {
self.report_error(error);
self.recover_until(SyncLevel::StatementStart);
let _ = self.eat(Token::Semicolon);
if !self.ensure_progress_or_break(loop_start) {
break;
}
continue;
}
}
if !self.ensure_progress_or_break(loop_start) {
break;
}
}
self.expect(Token::RightBrace, ParseErrorKind::ReplicationMissingRBrace)?;
let span = TokenSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self
.arena
.alloc_node(ReplicationBlock { rules, span }, span))
}
fn parse_ignores_clause(
&mut self,
) -> ParseResult<'src, 'arena, Option<crate::arena::ArenaVec<'arena, IdentifierToken>>> {
if !self.eat_keyword(Keyword::Ignores) {
return Ok(None);
}
let mut identifiers = self.arena.vec();
loop {
let identifier = self.parse_identifier(ParseErrorKind::IgnoresItemNotIdentifier)?;
identifiers.push(identifier);
if !self.eat(Token::Comma) {
break;
}
}
self.expect(Token::Semicolon, ParseErrorKind::IgnoresMissingSemicolon)?;
Ok(Some(identifiers))
}
fn parse_state_decl(&mut self) -> ParseResult<'src, 'arena, StateDeclRef<'src, 'arena>> {
let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?;
let mut modifiers = self.arena.vec();
loop {
match self.peek_keyword() {
Some(Keyword::Auto) => {
self.advance();
modifiers.push(StateModifier::Auto);
}
Some(Keyword::Simulated) => {
self.advance();
modifiers.push(StateModifier::Simulated);
}
_ => break,
}
}
self.expect(
Token::Keyword(Keyword::State),
ParseErrorKind::StateMissingKeyword,
)?;
loop {
match self.peek_keyword() {
Some(Keyword::Auto) => {
self.advance();
modifiers.push(StateModifier::Auto);
}
Some(Keyword::Simulated) => {
self.advance();
modifiers.push(StateModifier::Simulated);
}
_ => break,
}
}
if self.eat(Token::LeftParenthesis) {
self.expect(
Token::RightParenthesis,
ParseErrorKind::StateParensMissingRParen,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
}
let name = self.parse_identifier(ParseErrorKind::StateNameNotIdentifier)?;
let parent = if self.eat_keyword(Keyword::Extends) {
Some(self.parse_identifier(ParseErrorKind::StateParentNameNotIdentifier)?)
} else {
None
};
let opening_brace_position =
self.expect(Token::LeftBrace, ParseErrorKind::StateMissingLBrace)?;
let ignores = self.parse_ignores_clause()?;
let BlockBody {
statements: body,
span: inner_span,
} = self.parse_braced_block_statements_tail(opening_brace_position);
let span = TokenSpan::range(start_position, inner_span.end);
Ok(self.arena.alloc_node(
StateDecl {
name,
parent,
modifiers,
ignores,
body,
span,
},
span,
))
}
pub(crate) fn parse_class_definition_cont(
&mut self,
) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> {
let header = self.parse_class_header_cont()?;
let mut members = self.arena.vec();
loop {
let Some((token, member_start)) = self.peek_token_and_position() else {
break;
};
match token {
Token::Keyword(Keyword::DefaultProperties) => break,
_ if self.lookahead_state_after_mods() => {
let state = self
.parse_state_decl()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *state.span();
members.push(self.arena.alloc_node(ClassMember::State(state), span));
}
_ if self.is_callable_header_ahead() => {
let callable = self.parse_callable_definition();
let span = *callable.span();
members.push(self.arena.alloc_node(ClassMember::Function(callable), span));
}
Token::Keyword(Keyword::Const) => {
let constant = self
.parse_class_const_decl()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *constant.span();
members.push(self.arena.alloc_node(ClassMember::Const(constant), span));
}
Token::Keyword(Keyword::Enum)
if !matches!(self.peek_token_at(1), Some(Token::LeftBrace)) =>
{
self.advance();
let enum_definition = self.parse_enum_definition_tail(member_start);
let span = *enum_definition.span();
members.push(
self.arena
.alloc_node(ClassMember::TypeDefEnum(enum_definition), span),
);
let _ = self.eat(Token::Semicolon);
}
Token::Keyword(Keyword::Struct) => {
self.advance();
let struct_definition = self.parse_struct_definition_tail(member_start);
let span = *struct_definition.span();
members.push(
self.arena
.alloc_node(ClassMember::TypeDefStruct(struct_definition), span),
);
let _ = self.eat(Token::Semicolon);
}
Token::Keyword(Keyword::Var) => {
let variable_declaration = self
.parse_class_var_decl()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *variable_declaration.span();
members.push(
self.arena
.alloc_node(ClassMember::Var(variable_declaration), span),
);
}
Token::Keyword(Keyword::Replication) => {
let replication = self
.parse_replication_block()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *replication.span();
members.push(
self.arena
.alloc_node(ClassMember::Replication(replication), span),
);
}
Token::ExecDirective => {
let directive = self
.parse_exec_directive()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *directive.span();
members.push(self.arena.alloc_node(ClassMember::Exec(directive), span));
}
Token::Keyword(Keyword::CppText | Keyword::CppStruct) => {
self.advance();
if !self.eat(Token::CppBlock) {
self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock);
}
}
Token::Keyword(Keyword::Class) => break,
Token::Semicolon => {
self.advance();
}
_ => {
self.report_error_here(ParseErrorKind::ClassUnexpectedItem);
while let Some(next_token) = self.peek_token() {
match next_token {
Token::Keyword(
Keyword::Function
| Keyword::Event
| Keyword::Enum
| Keyword::Struct
| Keyword::Var
| Keyword::Replication
| Keyword::State
| Keyword::Class
| Keyword::DefaultProperties,
) => break,
_ => {
self.advance();
}
}
}
}
}
if !self.ensure_progress_or_break(member_start) {
break;
}
}
Ok(ClassDefinition { header, members })
}
pub fn parse_source_file(
&mut self,
) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> {
loop {
match self.peek_token() {
Some(Token::Semicolon) => {
self.advance();
}
Some(Token::ExecDirective) => {
if let Err(error) = self.parse_exec_directive() {
self.report_error(error);
}
}
//Some(Token::Keyword(Keyword::Class)) | None => break,
_ => break,
}
}
self.expect(
Token::Keyword(Keyword::Class),
ParseErrorKind::ClassMissingKeyword,
)?;
let class_definition = self.parse_class_definition_cont()?;
if matches!(
self.peek_token(),
Some(Token::Keyword(Keyword::DefaultProperties))
) {
return Ok(class_definition);
}
loop {
match self.peek_token() {
Some(Token::Semicolon) => {
self.advance();
}
Some(_) => {
self.report_error_here(ParseErrorKind::ClassUnexpectedItem);
while self.peek_token().is_some() {
self.advance();
}
break;
}
None => break,
}
}
Ok(class_definition)
}
fn decode_signed_integer_literal(&self, s: &str) -> ParseResult<'src, 'arena, i128> {
let (negative, body) = if let Some(rest) = s.strip_prefix('-') {
(true, rest)
} else if let Some(rest) = s.strip_prefix('+') {
(false, rest)
} else {
(false, s)
};
let magnitude: u128 = self.decode_unsigned_integer_magnitude(body)?;
if negative {
const MIN_MAGNITUDE: u128 = 1u128 << 127;
if magnitude == MIN_MAGNITUDE {
Ok(i128::MIN)
} else {
let magnitude_as_i128 = i128::try_from(magnitude)
.map_err(|_| self.make_error_at_last_consumed(ParseErrorKind::InvalidNumericLiteral))?;
Ok(-magnitude_as_i128)
}
} else {
i128::try_from(magnitude)
.map_err(|_| self.make_error_at_last_consumed(ParseErrorKind::InvalidNumericLiteral))
}
}
fn decode_unsigned_integer_magnitude(&self, body: &str) -> ParseResult<'src, 'arena, u128> {
use ParseErrorKind::InvalidNumericLiteral;
if body.is_empty() {
return Err(self.make_error_at_last_consumed(InvalidNumericLiteral));
}
let (base, digits) =
if let Some(rest) = body.strip_prefix("0x").or_else(|| body.strip_prefix("0X")) {
(16u128, rest)
} else if let Some(rest) = body.strip_prefix("0b").or_else(|| body.strip_prefix("0B")) {
(2u128, rest)
} else if let Some(rest) = body.strip_prefix("0o").or_else(|| body.strip_prefix("0O")) {
(8u128, rest)
} else {
(10u128, body)
};
if digits.is_empty() {
return Err(self.make_error_at_last_consumed(InvalidNumericLiteral));
}
let mut accumulator: u128 = 0;
for character in digits.chars() {
if character == '_' {
continue;
}
let digit_value = match character {
'0'..='9' => u128::from(character as u32 - '0' as u32),
'a'..='f' => u128::from(10 + (character as u32 - 'a' as u32)),
'A'..='F' => u128::from(10 + (character as u32 - 'A' as u32)),
_ => return Err(self.make_error_at_last_consumed(InvalidNumericLiteral)),
};
if digit_value >= base {
return Err(self.make_error_at_last_consumed(InvalidNumericLiteral));
}
accumulator = accumulator
.checked_mul(base)
.and_then(|value| value.checked_add(digit_value))
.ok_or_else(|| self.make_error_at_last_consumed(InvalidNumericLiteral))?;
}
Ok(accumulator)
}
fn parse_declaration_literal_class(
&mut self,
) -> ParseResult<'src, 'arena, DeclarationLiteralRef<'src, 'arena>> {
let (token, lexeme, token_position) =
self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?;
let literal = match token {
Token::Plus | Token::Minus => {
let is_negative = matches!(token, Token::Minus);
self.advance();
let (next_token, next_lexeme, _) =
self.require_token_lexeme_and_position(ParseErrorKind::InvalidNumericLiteral)?;
match next_token {
Token::IntegerLiteral => {
let value = if is_negative {
self.decode_signed_integer_literal(&format!("-{next_lexeme}"))?
} else {
self.decode_signed_integer_literal(next_lexeme)?
};
self.advance();
DeclarationLiteral::Integer(value)
}
Token::FloatLiteral => {
let mut signed_lexeme = String::with_capacity(1 + next_lexeme.len());
signed_lexeme.push(if is_negative { '-' } else { '+' });
signed_lexeme.push_str(next_lexeme);
let value = self.decode_float_literal(&signed_lexeme)?;
self.advance();
DeclarationLiteral::Float(value)
}
_ => {
return Err(
self.make_error_at_last_consumed(ParseErrorKind::DeclarationLiteralUnexpectedToken)
);
}
}
}
Token::IntegerLiteral => {
let value = self.decode_signed_integer_literal(lexeme)?;
self.advance();
DeclarationLiteral::Integer(value)
}
Token::FloatLiteral => {
let value = self.decode_float_literal(lexeme)?;
self.advance();
DeclarationLiteral::Float(value)
}
Token::StringLiteral => {
let value = self.unescape_string_literal(lexeme);
self.advance();
DeclarationLiteral::String(value)
}
Token::Keyword(Keyword::True) => {
self.advance();
DeclarationLiteral::Bool(true)
}
Token::Keyword(Keyword::False) => {
self.advance();
DeclarationLiteral::Bool(false)
}
Token::Keyword(Keyword::None) => {
self.advance();
DeclarationLiteral::None
}
Token::NameLiteral => {
let inner = &lexeme[1..lexeme.len() - 1];
let value = self.arena.string(inner);
self.advance();
DeclarationLiteral::String(value)
}
Token::Keyword(Keyword::Class) => {
self.advance();
let (next_token, next_lexeme, _) = self.require_token_lexeme_and_position(
ParseErrorKind::DeclarationLiteralUnexpectedToken,
)?;
if !matches!(next_token, Token::NameLiteral) {
return Err(
self.make_error_at_last_consumed(ParseErrorKind::DeclarationLiteralUnexpectedToken)
);
}
let inner = &next_lexeme[1..next_lexeme.len() - 1];
let quoted_name = self.arena.string(inner);
self.advance();
DeclarationLiteral::TaggedName {
tag: IdentifierToken(token_position),
quoted: quoted_name,
}
}
_ if token.is_valid_identifier_name() => {
self.advance();
DeclarationLiteral::Identifier(lexeme)
}
_ => return Err(self.make_error_at_last_consumed(ParseErrorKind::ExpressionUnexpectedToken)),
};
Ok(DeclarationLiteralRef {
literal,
position: token_position,
})
}
fn parse_class_const_decl(
&mut self,
) -> ParseResult<'src, 'arena, ClassConstDeclRef<'src, 'arena>> {
let start_position = self.expect(
Token::Keyword(Keyword::Const),
ParseErrorKind::ClassUnexpectedItem,
)?;
let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?;
self.expect(Token::Assign, ParseErrorKind::TypeSpecInvalidNamedTypeName)?;
let value = self.parse_declaration_literal_class()?;
self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?;
let span = TokenSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self
.arena
.alloc_node(ClassConstDecl { name, value, span }, span))
}
fn parse_class_var_declarators(
&mut self,
) -> crate::arena::ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> {
let mut declarators = self.arena.vec();
loop {
match self.peek_token_and_position() {
Some((next_token, declarator_start)) if next_token.is_valid_identifier_name() => {
let identifier = self
.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)
.unwrap_or(IdentifierToken(declarator_start));
let array_size = match self.parse_array_len_expr() {
Ok(value) => value,
Err(error) => {
self.report_error(error);
self.recover_until(SyncLevel::CloseBracket);
let _ = self.eat(Token::RightBracket);
None
}
};
let span =
TokenSpan::range(identifier.0, self.last_consumed_position_or_start());
declarators.push(self.arena.alloc_node(
VariableDeclarator {
name: identifier,
initializer: None,
array_size,
},
span,
));
if self.eat(Token::Comma) {
if self.peek_token() == Some(Token::Semicolon) {
break;
}
continue;
}
break;
}
Some((_, _)) if declarators.is_empty() => {
self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier);
self.recover_until(SyncLevel::StatementStart);
let _ = self.eat(Token::Semicolon);
break;
}
_ => break,
}
}
declarators
}
pub(crate) fn parse_identifier_list(
&mut self,
) -> crate::arena::ArenaVec<'arena, IdentifierToken> {
let list_start = self.last_consumed_position_or_start();
let mut identifiers = self.arena.vec();
while let Some((token, _lexeme, identifier_position)) =
self.peek_token_lexeme_and_position()
{
match token {
Token::RightParenthesis => break,
Token::Comma => {
self.advance();
self.report_error_here(ParseErrorKind::ListMissingIdentifierBeforeSeparator);
}
_ if token.is_valid_identifier_name() => {
self.advance();
identifiers.push(IdentifierToken(identifier_position));
if !self.eat(Token::Comma)
&& let Some(next_token) = self.peek_token()
&& next_token != Token::RightParenthesis
{
self.report_error_here(ParseErrorKind::ListMissingSeparator);
}
}
_ => {
self.make_error_at_last_consumed(ParseErrorKind::ListInvalidIdentifier)
.sync_error_until(self, SyncLevel::ListSeparator)
.report_error(self);
}
}
}
if identifiers.is_empty() {
let list_end = self.last_consumed_position_or_start();
self.report_error(crate::parser::ParseError {
kind: ParseErrorKind::ListEmpty,
anchor: list_start,
blame_span: TokenSpan::range(list_start, list_end),
covered_span: TokenSpan::range(list_start, list_end),
related_spans: HashMap::new(),
});
}
identifiers
}
}

View File

@ -1,180 +0,0 @@
use crate::ast::{AstSpan, Expression, ExpressionRef};
use crate::lexer::{Token, TokenLocation};
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses an `if` block, assuming that `if` token was consumed.
///
/// Produces an [`Expression::If`] spanning from the `if` keyword to
/// the end of the last arm (`else` body if present,
/// otherwise the `if` body).
#[must_use]
pub(crate) fn parse_if_cont(
&mut self,
if_start_location: TokenLocation,
) -> ExpressionRef<'src, 'arena> {
let condition = self.parse_expression();
let body = self.parse_expression();
let (else_body, if_end_location) = if let Some(Token::Else) = self.peek_token() {
self.advance(); // else
let else_body = self.parse_expression();
// Capture end before moving `else_body` to build the full `if` span
let body_end = else_body.span().to;
(Some(else_body), body_end)
} else {
(None, body.span().to)
};
let span = AstSpan {
from: if_start_location,
to: if_end_location,
};
self.arena.alloc(
Expression::If {
condition,
body,
else_body,
},
span,
)
}
/// Parses a `while` loop, assuming that `while` token was consumed.
///
/// Produces an [`Expression::While`] spanning from the `while` keyword
/// to the end of the body.
#[must_use]
pub(crate) fn parse_while_cont(
&mut self,
while_start_location: TokenLocation,
) -> ExpressionRef<'src, 'arena> {
let condition = self.parse_expression();
let body = self.parse_expression();
let span = AstSpan {
from: while_start_location,
to: body.span().to,
};
self.arena
.alloc(Expression::While { condition, body }, span)
}
/// Parses a `do ... until ...` loop after `do`, assuming that `do` token
/// was consumed.
///
/// On a missing `until`, returns an error
/// [`ParseErrorKind::DoMissingUntil`].
/// On success, produces an [`Expression::DoUntil`] spanning from `do`
/// to the end of the condition.
#[must_use]
pub(crate) fn parse_do_until_cont(
&mut self,
do_start_location: TokenLocation,
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
let body = self.parse_expression();
self.expect(Token::Until, ParseErrorKind::DoMissingUntil)
.widen_error_span_from(do_start_location)?;
let condition = self.parse_expression();
let span = AstSpan {
from: do_start_location,
to: condition.span().to,
};
Ok(self
.arena
.alloc(Expression::DoUntil { condition, body }, span))
}
/// Parses a `foreach` loop, assuming that `foreach` token was consumed.
///
/// Produces an [`Expression::ForEach`] spanning from `foreach`
/// to the end of the body.
#[must_use]
pub(crate) fn parse_foreach_cont(
&mut self,
foreach_start_location: TokenLocation,
) -> ExpressionRef<'src, 'arena> {
let iterator = self.parse_expression();
let body = self.parse_expression();
let span = AstSpan {
from: foreach_start_location,
to: body.span().to,
};
self.arena
.alloc(Expression::ForEach { iterator, body }, span)
}
/// Parses a `for` loop after `for`, assuming that `for` token was consumed.
///
/// Grammar: `for (init?; condition?; step?) body`.
/// Any of `init`, `condition`, or `step` may be omitted.
/// Emits specific `ParseErrorKind` values for missing
/// delimiters/separators.
/// On success returns an [`Expression::For`] spanning from `for` to
/// the end of the body.
#[must_use]
pub(crate) fn parse_for_cont(
&mut self,
for_start_location: TokenLocation,
) -> crate::parser::ParseResult<'src, 'arena, ExpressionRef<'src, 'arena>> {
self.expect(
Token::LeftParenthesis,
ParseErrorKind::ForMissingOpeningParenthesis,
)
.widen_error_span_from(for_start_location)?;
let init = if let Some(Token::Semicolon) = self.peek_token() {
self.advance();
None
} else {
let init = self.parse_expression();
self.expect(
Token::Semicolon,
ParseErrorKind::ForMissingInitializationSemicolon,
)?;
Some(init)
};
let condition = if let Some(Token::Semicolon) = self.peek_token() {
self.advance();
None
} else {
let condition = self.parse_expression();
self.expect(
Token::Semicolon,
ParseErrorKind::ForMissingConditionSemicolon,
)?;
Some(condition)
};
let step = if let Some(Token::RightParenthesis) = self.peek_token() {
self.advance();
None
} else {
let step = self.parse_expression();
self.expect(
Token::RightParenthesis,
ParseErrorKind::ForMissingClosingParenthesis,
)
.widen_error_span_from(for_start_location)
.sync_error_until(self, crate::parser::SyncLevel::CloseParenthesis)?;
Some(step)
};
let body = self.parse_expression();
let span = AstSpan {
from: for_start_location,
to: body.span().to,
};
Ok(self.arena.alloc(
Expression::For {
init,
condition,
step,
body,
},
span,
))
}
}

View File

@ -0,0 +1,138 @@
//! Parsing of enum definitions for Fermented `UnrealScript`.
use std::ops::ControlFlow;
use crate::arena::ArenaVec;
use crate::ast::{EnumDefRef, EnumDefinition, IdentifierToken};
use crate::lexer::Token;
use crate::lexer::{TokenSpan, TokenPosition};
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum EnumParseState {
ExpectingVariant,
ExpectingSeparator,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses an `enum` definition after the `enum` keyword has been consumed.
///
/// Returns a reference to the allocated enum definition containing its name
/// and variants.
pub(crate) fn parse_enum_definition_tail(
&mut self,
enum_keyword_position: TokenPosition,
) -> EnumDefRef<'src, 'arena> {
let name = self
.parse_identifier(ParseErrorKind::EnumExpectedNameOrBrace)
.unwrap_or_fallback(self);
self.expect(Token::LeftBrace, ParseErrorKind::EnumMissingLeftBrace)
.report_error(self);
let variants = self.parse_enum_variants();
self.expect(Token::RightBrace, ParseErrorKind::EnumNoClosingBrace)
.report_error(self);
let span = TokenSpan::range(
enum_keyword_position,
self.last_consumed_position_or_start(),
);
self.arena
.alloc_node(EnumDefinition { name, variants }, span)
}
/// Parses the list of enum variants inside braces, handling commas and
/// errors.
///
/// Returns a vector of successfully parsed variant identifiers.
fn parse_enum_variants(&mut self) -> ArenaVec<'arena, IdentifierToken> {
use EnumParseState::{ExpectingSeparator, ExpectingVariant};
let mut variants = self.arena.vec();
let mut parser_state = ExpectingVariant;
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
let should_break = match (parser_state, next_token) {
(_, Token::RightBrace) => break,
(ExpectingVariant, Token::Comma) => self
.recover_from_empty_enum_variant(next_token_position)
.is_break(),
(ExpectingVariant, _) => {
parser_state = ExpectingSeparator;
self.parse_and_push_enum_variant(&mut variants).is_break()
}
(ExpectingSeparator, Token::Comma) => {
self.advance(); // `,`
parser_state = ExpectingVariant;
false
}
(ExpectingSeparator, _) => self
.parse_enum_variant_after_missing_separator(next_token_position, &mut variants)
.is_break(),
};
if should_break {
break;
}
self.ensure_forward_progress(next_token_position);
}
variants
}
/// Recovers from one or more commas appearing where a variant is expected.
///
/// Stops parsing if only a closing brace or end-of-file remains.
fn recover_from_empty_enum_variant(
&mut self,
error_start_position: TokenPosition,
) -> ControlFlow<()> {
while self.peek_token() == Some(Token::Comma) {
self.advance();
}
self.make_error_at_last_consumed(ParseErrorKind::EnumEmptyVariants)
.widen_error_span_from(error_start_position)
.report_error(self);
if matches!(self.peek_token(), Some(Token::RightBrace) | None) {
ControlFlow::Break(())
} else {
ControlFlow::Continue(())
}
}
/// Parses one enum variant and appends it to `variants`.
///
/// Stops parsing if recovery does not produce a valid identifier.
fn parse_and_push_enum_variant(
&mut self,
variants: &mut ArenaVec<'arena, IdentifierToken>,
) -> ControlFlow<()> {
self.parse_identifier(ParseErrorKind::EnumBadVariant)
.sync_error_until(self, SyncLevel::StatementStart)
.ok_or_report(self)
.map_or(ControlFlow::Break(()), |variant| {
variants.push(variant);
ControlFlow::Continue(())
})
}
/// Parses a variant after a missing separator and reports the missing-comma
/// diagnostic if recovery succeeds.
fn parse_enum_variant_after_missing_separator(
&mut self,
error_start_position: TokenPosition,
variants: &mut ArenaVec<'arena, IdentifierToken>,
) -> ControlFlow<()> {
let Some(variant) = self
.parse_identifier(ParseErrorKind::EnumBadVariant)
.widen_error_span_from(error_start_position)
.sync_error_until(self, SyncLevel::StatementStart)
.ok_or_report(self)
else {
// If we don't even get a good identifier - error is different
return ControlFlow::Break(());
};
self.make_error_at_last_consumed(ParseErrorKind::EnumNoSeparatorBetweenVariants)
.widen_error_span_from(error_start_position)
.report_error(self);
variants.push(variant);
ControlFlow::Continue(())
}
}

View File

@ -0,0 +1,11 @@
//! Declaration parsing for Fermented `UnrealScript`.
//!
//! Implements recursive-descent parsing for declaration-related grammar:
//! type specifiers, enum and struct definitions, `var(...)` prefixes,
//! and variable declarators.
mod enum_definition;
mod struct_definition;
mod type_specifier; // Type-specifier parsing (variable types).
mod var_specifiers; // `var(...)` editor specifiers and declaration-modifiers.
mod variable_declarators; // Comma-separated declarator lists (variable lists).

View File

@ -0,0 +1,210 @@
//! Parsing of struct definitions for Fermented `UnrealScript`.
//!
//! ## C++ block handling
//!
//! The Fermented `UnrealScript` parser must support parsing several legacy
//! source files that contain `cpptext` or `cppstruct`. Our compiler does not
//! compile with C++ code and therefore does not need these blocks in
//! the resulting AST. We treat them the same as trivia and skip them.
//!
//! However, some related tokens are context-sensitive, so handling these
//! blocks in the general trivia-skipping path would complicate the separation
//! between the lexer and the parser.
//!
//! The resulting files will not be compiled, but they can still be used to
//! extract type information.
use crate::arena::ArenaVec;
use crate::ast::{
IdentifierToken, QualifiedIdentifierRef, StructDefRef, StructDefinition, StructField,
StructFieldRef, StructModifier, StructModifierKind, TypeSpecifierRef, VarEditorSpecifierRef,
VarModifier,
};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Debug)]
struct ParsedStructFieldPrefix<'src, 'arena> {
editor_specifiers: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
declaration_modifiers: ArenaVec<'arena, VarModifier>,
type_specifier: TypeSpecifierRef<'src, 'arena>,
}
#[derive(Debug)]
enum StructBodyItemParseOutcome<'src, 'arena> {
Field(StructFieldRef<'src, 'arena>),
Skip,
Stop,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a `struct` definition after the `struct` keyword has been
/// consumed.
pub(crate) fn parse_struct_definition_tail(
&mut self,
struct_keyword_position: TokenPosition,
) -> StructDefRef<'src, 'arena> {
let modifiers = self.parse_struct_declaration_modifiers();
let (name, base_type_name) = self.parse_struct_name_base_and_open_brace();
let mut fields = self.arena.vec();
while let Some((next_token, next_position)) = self.peek_token_and_position()
&& next_token != Token::RightBrace
{
match self.parse_or_skip_struct_body_item() {
StructBodyItemParseOutcome::Field(new_field) => fields.push(new_field),
StructBodyItemParseOutcome::Skip => (),
StructBodyItemParseOutcome::Stop => break,
}
self.ensure_forward_progress(next_position);
}
self.expect(Token::RightBrace, ParseErrorKind::StructMissingRightBrace)
.widen_error_span_from(struct_keyword_position)
.report_error(self);
let span = TokenSpan::range(
struct_keyword_position,
self.last_consumed_position_or_start(),
);
self.arena.alloc_node(
StructDefinition {
name,
base_type_name,
modifiers,
fields,
},
span,
)
}
/// Parses one item in a struct body or skips an unsupported one.
///
/// Returns [`StructBodyItemParseOutcome::Field`] for a successfully parsed
/// field, [`StructBodyItemParseOutcome::Skip`] when recovery allows parsing
/// to continue, and [`StructBodyItemParseOutcome::Stop`] when parsing
/// should stop at this level.
fn parse_or_skip_struct_body_item(&mut self) -> StructBodyItemParseOutcome<'src, 'arena> {
let Some((token, token_position)) = self.peek_token_and_position() else {
// This is the end of the file;
// it will be handled by a higher-level parser.
return StructBodyItemParseOutcome::Stop;
};
match token {
Token::Keyword(Keyword::CppText | Keyword::CppStruct) => {
self.advance();
if !self.eat(Token::CppBlock) {
self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock);
self.recover_until(SyncLevel::StatementStart);
}
StructBodyItemParseOutcome::Skip
}
Token::Keyword(Keyword::Var) => {
self.advance();
self.parse_struct_field_tail(token_position)
}
_ => {
self.report_error_here(ParseErrorKind::StructBodyUnexpectedItem);
self.recover_until(SyncLevel::BlockBoundary);
StructBodyItemParseOutcome::Skip
}
}
}
/// Parses a struct field after the `var` keyword has been consumed.
///
/// Returns [`StructBodyItemParseOutcome::Skip`] if the field cannot be
/// parsed far enough to produce a usable AST node after recovery.
fn parse_struct_field_tail(
&mut self,
var_keyword_position: TokenPosition,
) -> StructBodyItemParseOutcome<'src, 'arena> {
let Some(field_prefix) = self.parse_struct_field_prefix() else {
return StructBodyItemParseOutcome::Skip;
};
let declarators = self.parse_variable_declarators();
if !self.eat(Token::Semicolon) {
self.report_error_here(ParseErrorKind::StructFieldMissingSemicolon);
self.recover_until(SyncLevel::BlockBoundary);
let _ = self.eat(Token::Semicolon);
}
if declarators.is_empty() {
return StructBodyItemParseOutcome::Skip;
}
let span = TokenSpan::range(var_keyword_position, self.last_consumed_position_or_start());
StructBodyItemParseOutcome::Field(self.arena.alloc_node(
StructField {
type_specifier: field_prefix.type_specifier,
declaration_modifiers: field_prefix.declaration_modifiers,
editor_specifiers: field_prefix.editor_specifiers,
declarators,
},
span,
))
}
fn parse_struct_field_prefix(&mut self) -> Option<ParsedStructFieldPrefix<'src, 'arena>> {
let editor_specifiers = self.parse_var_editor_specifier_list();
let declaration_modifiers = self.parse_var_declaration_modifiers();
let type_specification = self
.parse_type_specifier()
.sync_error_until(self, SyncLevel::BlockBoundary)
.ok_or_report(self)?;
Some(ParsedStructFieldPrefix {
editor_specifiers,
declaration_modifiers,
type_specifier: type_specification,
})
}
/// Parses the struct name, optional base type, and opening brace.
///
/// Accepts anonymous structs that begin immediately with `{`.
fn parse_struct_name_base_and_open_brace(
&mut self,
) -> (
Option<IdentifierToken>,
Option<QualifiedIdentifierRef<'arena>>,
) {
if self.eat(Token::LeftBrace) {
return (None, None);
}
let name = self
.parse_identifier(ParseErrorKind::StructExpectedNameOrBrace)
.ok_or_report(self);
let base_type_name =
if let Some((Token::Keyword(Keyword::Extends), extends_keyword_position)) =
self.peek_token_and_position()
{
self.advance();
self.parse_qualified_identifier(ParseErrorKind::StructExpectedBaseName)
.widen_error_span_from(extends_keyword_position)
.ok_or_report(self)
} else {
None
};
self.expect(Token::LeftBrace, ParseErrorKind::StructMissingLeftBrace)
.report_error(self);
(name, base_type_name)
}
fn parse_struct_declaration_modifiers(&mut self) -> ArenaVec<'arena, StructModifier> {
let mut modifiers = self.arena.vec();
while let Some((next_keyword, next_keyword_position)) = self.peek_keyword_and_position() {
let next_modifier_kind = match next_keyword {
Keyword::Native => StructModifierKind::Native,
Keyword::Init => StructModifierKind::Init,
Keyword::Export => StructModifierKind::Export,
Keyword::NoExport => StructModifierKind::NoExport,
Keyword::Transient => StructModifierKind::Transient,
Keyword::Deprecated => StructModifierKind::Deprecated,
Keyword::Long => StructModifierKind::Long,
_ => break,
};
modifiers.push(StructModifier {
kind: next_modifier_kind,
position: next_keyword_position,
});
self.advance();
}
modifiers
}
}

View File

@ -0,0 +1,115 @@
//! Parsing of type specifiers for Fermented `UnrealScript`.
use crate::ast::{TypeSpecifier, TypeSpecifierRef};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, ParseResult, Parser};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a type specifier used in variable declarations.
///
/// Accepts named types, `class<...>` types, `array<...>` types, and inline
/// `enum` and `struct` definitions.
///
/// Returns an error if the next tokens do not form a valid type specifier.
pub(crate) fn parse_type_specifier(
&mut self,
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
let (starting_token, starting_token_position) =
self.require_token_and_position(ParseErrorKind::TypeSpecExpectedType)?;
match starting_token {
Token::Keyword(Keyword::Enum) => {
self.advance();
Ok(self.parse_inline_enum_tail(starting_token_position))
}
Token::Keyword(Keyword::Struct) => {
self.advance();
Ok(self.parse_inline_struct_tail(starting_token_position))
}
Token::Keyword(Keyword::Array) => {
self.advance();
self.parse_array_type_specification_tail(starting_token_position)
}
Token::Keyword(Keyword::Class) => {
self.advance();
self.parse_class_type_specification_tail(starting_token_position)
}
_ if starting_token.is_valid_type_name() => {
let type_name =
self.parse_qualified_identifier(ParseErrorKind::TypeSpecInvalidNamedTypeName)?;
let full_span = *type_name.span();
Ok(self
.arena
.alloc_node(TypeSpecifier::Named(type_name), full_span))
}
_ => Err(self.make_error_at_last_consumed(ParseErrorKind::TypeSpecExpectedType)),
}
}
fn parse_inline_enum_tail(
&mut self,
starting_token_position: TokenPosition,
) -> TypeSpecifierRef<'src, 'arena> {
let enum_definition = self.parse_enum_definition_tail(starting_token_position);
let enum_span = TokenSpan::range(starting_token_position, enum_definition.span().end);
self.arena
.alloc_node(TypeSpecifier::InlineEnum(enum_definition), enum_span)
}
fn parse_inline_struct_tail(
&mut self,
starting_token_position: TokenPosition,
) -> TypeSpecifierRef<'src, 'arena> {
let struct_definition = self.parse_struct_definition_tail(starting_token_position);
let struct_span = TokenSpan::range(starting_token_position, struct_definition.span().end);
self.arena
.alloc_node(TypeSpecifier::InlineStruct(struct_definition), struct_span)
}
fn parse_array_type_specification_tail(
&mut self,
starting_token_position: TokenPosition,
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
self.expect(
Token::Less,
ParseErrorKind::TypeSpecArrayMissingOpeningAngle,
)?;
let element_modifiers = self.parse_var_declaration_modifiers();
let element_type = self.parse_type_specifier()?;
let closing_angle_bracket_position = self.expect(
Token::Greater,
ParseErrorKind::TypeSpecArrayMissingClosingAngle,
)?;
let array_span = TokenSpan::range(starting_token_position, closing_angle_bracket_position);
Ok(self.arena.alloc_node(
TypeSpecifier::Array {
element_type,
element_modifiers,
},
array_span,
))
}
fn parse_class_type_specification_tail(
&mut self,
starting_token_position: TokenPosition,
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
let (inner_type_name, class_type_end) = if self.eat(Token::Less) {
let inner_type_name = Some(
self.parse_qualified_identifier(ParseErrorKind::TypeSpecClassMissingInnerType)?,
);
let class_type_end = self.expect(
Token::Greater,
ParseErrorKind::TypeSpecClassMissingClosingAngle,
)?;
(inner_type_name, class_type_end)
} else {
(None, starting_token_position)
};
let span = TokenSpan::range(starting_token_position, class_type_end);
Ok(self
.arena
.alloc_node(TypeSpecifier::Class(inner_type_name), span))
}
}

View File

@ -0,0 +1,89 @@
//! Parsing of declaration specifiers used in `var(...) ...` syntax for
//! Fermented `UnrealScript`.
use crate::arena::ArenaVec;
use crate::ast::{VarEditorSpecifier, VarEditorSpecifierRef, VarModifier};
use crate::lexer::Token;
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a consecutive run of variable declaration modifiers.
///
/// This is used for declarations such as
/// `var transient config editconst int X;`.
///
/// Parsing stops when the next token is not a recognized [`VarModifier`].
/// That token is left unconsumed for the caller.
///
/// Returns the parsed modifiers in source order, or an empty vector if the
/// current token does not begin a modifier list.
#[must_use]
pub(crate) fn parse_var_declaration_modifiers(&mut self) -> ArenaVec<'arena, VarModifier> {
let mut modifiers = self.arena.vec();
while let Some(current_token_and_position) = self.peek_token_and_position() {
let Ok(parsed_modifier) = VarModifier::try_from(current_token_and_position) else {
break;
};
self.advance();
modifiers.push(parsed_modifier);
}
modifiers
}
/// Parses the optional parenthesized editor specifier list in `var(...)`.
///
/// Assumes that `var` has already been consumed.
///
/// Returns `None` if the current token is not `(`. Returns `Some(...)` once
/// `(` is present, including for an empty list.
///
/// Recovery is intentionally minimal because these specifier lists are not
/// important enough to justify aggressive repair.
#[must_use]
pub(crate) fn parse_var_editor_specifier_list(
&mut self,
) -> Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>> {
if !self.eat(Token::LeftParenthesis) {
return None;
}
let mut editor_specifiers = self.arena.vec();
while let Some((next_token, next_token_lexeme, next_token_position)) =
self.peek_token_lexeme_and_position()
&& next_token != Token::RightParenthesis
{
if next_token == Token::StringLiteral {
self.advance();
let string_value = self.unescape_string_literal(next_token_lexeme);
editor_specifiers.push(self.arena.alloc_node_at(
VarEditorSpecifier::String(string_value),
next_token_position,
));
} else if let Some(specifier_identifier) =
Self::identifier_token_from_token(next_token, next_token_position)
{
self.advance();
editor_specifiers.push(self.arena.alloc_node_at(
VarEditorSpecifier::Identifier(specifier_identifier),
next_token_position,
));
} else {
self.make_error_at_last_consumed(ParseErrorKind::VarSpecNotIdentifier)
.sync_error_until(self, SyncLevel::ListSeparator)
.report_error(self);
}
// Detailed recovery is not worthwhile here;
// stop once list structure becomes unclear.
if !self.eat(Token::Comma) {
break;
}
self.ensure_forward_progress(next_token_position);
}
self.expect(
Token::RightParenthesis,
ParseErrorKind::VarSpecsMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
Some(editor_specifiers)
}
}

View File

@ -0,0 +1,172 @@
//! Parsing of comma-separated variable declarator lists for
//! Fermented `UnrealScript`.
//!
//! Extends original `UnrealScript` by allowing array-size expressions and
//! declarator initializers.
#![allow(clippy::option_if_let_else)]
use std::ops::ControlFlow;
use crate::arena::ArenaVec;
use crate::ast::{OptionalExpression, VariableDeclarator, VariableDeclaratorRef};
use crate::lexer::{Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum VariableDeclaratorParseState {
ExpectingDeclarator,
ExpectingSeparator,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a comma-separated list of variable declarators.
///
/// Accepts optional array-size expressions and `=` initializers.
#[must_use]
pub(crate) fn parse_variable_declarators(
&mut self,
) -> ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> {
use VariableDeclaratorParseState::{ExpectingDeclarator, ExpectingSeparator};
let mut declarators = self.arena.vec();
let mut parser_state = ExpectingDeclarator;
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
match (parser_state, next_token) {
(ExpectingDeclarator, Token::Semicolon) => {
self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations);
return declarators;
}
(ExpectingDeclarator, Token::Comma) => {
if self
.recover_empty_variable_declarator(next_token_position)
.is_break()
{
return declarators;
}
}
(ExpectingDeclarator, _) => {
if self
.parse_variable_declarator_into(&mut declarators)
.is_break()
{
// Breaking means we've failed to parse declarator
self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations);
break;
}
parser_state = ExpectingSeparator;
}
(ExpectingSeparator, Token::Comma) => {
self.advance();
parser_state = ExpectingDeclarator;
}
(ExpectingSeparator, Token::Semicolon) => break,
(ExpectingSeparator, _) => {
if self
.recover_missing_variable_declarator_separator(
next_token_position,
&mut declarators,
)
.is_break()
{
break;
}
}
}
self.ensure_forward_progress(next_token_position);
}
// In case of reaching EOF here, it does not matter if we emit
// an additional diagnostic.
// The caller is expected to report the more relevant enclosing error.
declarators
}
fn recover_empty_variable_declarator(
&mut self,
error_start_position: TokenPosition,
) -> ControlFlow<()> {
while self.peek_token() == Some(Token::Comma) {
self.advance();
}
self.make_error_at_last_consumed(ParseErrorKind::DeclEmptyVariableDeclarations)
.widen_error_span_from(error_start_position)
.report_error(self);
if matches!(self.peek_token(), Some(Token::Semicolon) | None) {
ControlFlow::Break(())
} else {
ControlFlow::Continue(())
}
}
fn parse_variable_declarator_into(
&mut self,
declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
) -> ControlFlow<()> {
if let Some(parsed_declarator) = self
.parse_variable_declarator()
.sync_error_until(self, SyncLevel::StatementStart)
.ok_or_report(self)
{
declarators.push(parsed_declarator);
ControlFlow::Continue(())
} else {
ControlFlow::Break(())
}
}
fn recover_missing_variable_declarator_separator(
&mut self,
error_start_position: TokenPosition,
declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
) -> ControlFlow<()> {
if let Some(parsed_declarator) = self
.parse_variable_declarator()
.widen_error_span_from(error_start_position)
.sync_error_until(self, SyncLevel::StatementStart)
.ok_or_report(self)
{
self.make_error_at_last_consumed(ParseErrorKind::DeclNoSeparatorBetweenVariableDeclarations)
.widen_error_span_from(error_start_position)
.report_error(self);
declarators.push(parsed_declarator);
ControlFlow::Continue(())
} else {
ControlFlow::Break(())
}
}
fn parse_variable_declarator(
&mut self,
) -> ParseResult<'src, 'arena, VariableDeclaratorRef<'src, 'arena>> {
let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?;
let array_size = self.parse_optional_array_size();
let initializer = self.parse_optional_variable_initializer();
let span = TokenSpan::range(name.0, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
VariableDeclarator {
name,
initializer,
array_size,
},
span,
))
}
fn parse_optional_array_size(&mut self) -> OptionalExpression<'src, 'arena> {
if !self.eat(Token::LeftBracket) {
return None;
}
let array_size_expression = self.parse_expression();
self.expect(
Token::RightBracket,
ParseErrorKind::DeclExpectedRightBracketAfterArraySize,
)
.sync_error_at(self, SyncLevel::CloseBracket)
.report_error(self);
Some(array_size_expression)
}
fn parse_optional_variable_initializer(&mut self) -> OptionalExpression<'src, 'arena> {
self.eat(Token::Assign).then(|| self.parse_expression())
}
}

View File

@ -0,0 +1,200 @@
//! Block-body parsing for Fermented UnrealScript.
//!
//! Provides shared routines for parsing `{ ... }`-delimited bodies used in
//! function, loop, state, and similar constructs after the opening `{`
//! has been consumed.
use crate::ast::{BlockBody, Expression, ExpressionRef, Statement, StatementList, StatementRef};
use crate::lexer::{Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a braced block body into an [`Expression::Block`].
///
/// The opening `{` must already have been consumed. The returned block's
/// span covers the whole block, from `left_brace_position` through
/// the closing `}`.
///
/// On premature end-of-file, reports the missing `}` and returns
/// a best-effort block.
#[must_use]
pub(crate) fn parse_block_body_tail(
&mut self,
left_brace_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let BlockBody { statements, span } =
self.parse_braced_block_statements_tail(left_brace_position);
self.arena.alloc_node(Expression::Block(statements), span)
}
/// Parses the statements in a braced block body.
///
/// The opening `{` must already have been consumed. Returns the parsed
/// statements and a span covering the whole block, from
/// `left_brace_position` through the closing `}`.
///
/// On premature end-of-file, reports the missing `}` and returns
/// a best-effort body.
#[must_use]
pub(crate) fn parse_braced_block_statements_tail(
&mut self,
left_brace_position: TokenPosition,
) -> BlockBody<'src, 'arena> {
let mut statements = self.arena.vec();
while let Some((token, token_position)) = self.peek_token_and_position() {
if token == Token::RightBrace {
self.advance(); // '}'
let span = TokenSpan::range(left_brace_position, token_position);
return BlockBody { statements, span };
}
match self.parse_and_append_next_block_item(&mut statements) {
Ok(()) => {
// Guard against parser bugs that would otherwise leave
// block parsing stuck on the same token.
self.ensure_forward_progress(token_position);
}
Err(error) => {
// Item-level recovery failed,
// so escalate to the block boundary.
let error = error.sync_error_at_matching_delimiter(self, left_brace_position);
let error_statement = error.fallback(self);
statements.push(error_statement);
let span = TokenSpan::range(
left_brace_position,
self.last_consumed_position_or_start(),
);
return BlockBody { statements, span };
}
}
}
let eof_position = self.peek_position_or_eof();
self.make_error_at(ParseErrorKind::BlockMissingClosingBrace, eof_position)
.related_token("left_brace", left_brace_position)
.report(self);
let span = TokenSpan::range(left_brace_position, self.last_consumed_position_or_start());
BlockBody { statements, span }
}
/// Parses one statement-like item inside a `{ ... }` block and appends it
/// to `statements`.
///
/// This method never consumes the closing `}` and is only meant to be
/// called while parsing inside a block.
///
/// On success, it appends exactly one statement and advances past at least
/// one token. If statement cannot be recovered locally, it returns
/// an unreported error so the enclosing block parser can recover
/// at block level.
pub(crate) fn parse_and_append_next_block_item(
&mut self,
statements: &mut StatementList<'src, 'arena>,
) -> ParseResult<'src, 'arena, ()> {
let mut statement = match self.parse_statement() {
Some(statement) => statement,
None => {
// Non-statement starters are parsed as expression statements
self.parse_expression_statement_in_block()?
}
};
if block_item_requires_semicolon(&statement) {
match self.peek_token_and_position() {
Some((Token::Semicolon, semicolon_position)) => {
statement.span_mut().extend_to(semicolon_position);
self.advance(); // ';'
}
// A final expression before `}` may omit `;`; this makes it
// the block's tail value.
//
// On end-of-file, suppress the missing-`;` diagnostic as well.
// The block parser will report the missing `}`,
// and an extra semicolon error would just cascade.
None | Some((Token::RightBrace, _)) => (),
Some((_, unexpected_token_position)) => {
self.make_error_at_last_consumed(
ParseErrorKind::BlockMissingSemicolonAfterExpression,
)
.widen_error_span_from(statement.span().start)
.sync_error_until(self, SyncLevel::StatementStart)
.blame_token(unexpected_token_position)
.related("expression_span", *statement.span())
.report(self);
}
}
}
statements.push(statement);
Ok(())
}
/// Parses a non-statement starter as an expression statement inside
/// a block.
///
/// On success, returns an expression statement whose expression parser has
/// consumed at least one token. If expression parsing fails and recovery
/// cannot consume anything locally, returns the unreported error instead of
/// producing a zero-width fallback statement.
fn parse_expression_statement_in_block(
&mut self,
) -> ParseResult<'src, 'arena, StatementRef<'src, 'arena>> {
let expression_start_position = self.peek_position_or_eof();
let expected_block_item_after_position = self.last_consumed_position_or_start();
let expression_result = self.parse_required_expression(
ParseErrorKind::BlockExpectedItem,
expected_block_item_after_position,
);
let expression = match expression_result {
Ok(expression) => expression,
Err(error) => {
let expression_recovery_made_no_progress =
self.peek_position_or_eof() == expression_start_position;
// Without progress, a fallback statement would violate this
// function's success contract and could leave the enclosing
// block loop stuck.
if expression_recovery_made_no_progress {
return self.recover_bad_block_item_start_as_error_statement(error);
}
error
.sync_error_until(self, SyncLevel::StatementStart)
.fallback(self)
}
};
let expression_span = *expression.span();
Ok(self
.arena
.alloc_node(Statement::Expression(expression), expression_span))
}
fn recover_bad_block_item_start_as_error_statement(
&mut self,
error: crate::parser::ParseError,
) -> ParseResult<'src, 'arena, StatementRef<'src, 'arena>> {
let position_before_statement_recovery = self.peek_position_or_eof();
// Recover one damaged block item if possible;
// otherwise let the enclosing block recover at its own boundary.
let error = error.sync_error_at(self, SyncLevel::StatementTerminator);
if self.peek_position_or_eof() != position_before_statement_recovery {
return Ok(error.fallback(self));
}
Err(error)
}
}
fn block_item_requires_semicolon(statement: &Statement) -> bool {
// Control-flow and block expressions do not require a trailing semicolon
// when used as block items.
if let Statement::Expression(expression) = statement {
!matches!(
**expression,
Expression::Block { .. }
| Expression::If { .. }
| Expression::While { .. }
| Expression::DoUntil { .. }
| Expression::ForEach { .. }
| Expression::For { .. }
| Expression::Switch { .. }
| Expression::Error
)
} else {
false
}
}

View File

@ -0,0 +1,308 @@
//! Parser for `for` loop expressions in Fermented UnrealScript.
//!
//! ## Disambiguation of `for` as loop vs expression
//!
//! Unlike other control-flow keywords, `for` is disambiguated from functions
//! and variables with the same name. This is done syntactically in
//! [`Parser::peek_for_loop_header_left_parenthesis_position`]: a `for` token
//! followed by a `(` whose contents contain a top-level `;` is unambiguously
//! a loop header.
//!
//! This rule is lightweight, local, and robust, and mirrors the fixed grammar
//! `for (init; condition; step)` without requiring name resolution.
//!
//! ### Why this is not done for `if` / `while` / `do`
//!
//! There is no similarly reliable way to discriminate `if`, `while`, or related
//! keywords at this stage of parsing: their parenthesized forms are
//! indistinguishable from single-argument function calls.
//!
//! Supporting these keywords as identifiers would complicate parsing
//! disproportionately and we always treat them as openers for conditional and
//! loop expressions. This matches common `UnrealScript` usage and
//! intentionally drops support for moronic design choices where such names were
//! reused as variables or functions (like what author did by declaring
//! a `For` function in Acedia).
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{self, Token, TokenPosition};
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Debug)]
struct ParsedForHeader<'src, 'arena> {
initializer: OptionalExpression<'src, 'arena>,
condition: OptionalExpression<'src, 'arena>,
step: OptionalExpression<'src, 'arena>,
right_parenthesis_position: Option<TokenPosition>,
}
impl<'src, 'arena> ParsedForHeader<'src, 'arena> {
#[must_use]
fn new() -> Self {
Self {
initializer: None,
condition: None,
step: None,
right_parenthesis_position: None,
}
}
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Returns the position of the next `(` when it can start a `for (...)`
/// header.
///
/// Recognizes a header by a top-level `;` before the matching `)`.
/// Incomplete headers are accepted so later parsing can produce
/// `for`-specific diagnostics.
///
/// Performs a lookahead-only check used for loop-vs-identifier
/// disambiguation.
pub(in super::super) fn peek_for_loop_header_left_parenthesis_position(
&mut self,
) -> Option<TokenPosition> {
let Some((Token::LeftParenthesis, left_parenthesis_position)) =
self.peek_token_and_position()
else {
return None;
};
let mut nesting_depth: usize = 1;
let mut lookahead_offset: usize = 1;
while let Some(next_token) = self.peek_token_at(lookahead_offset) {
match next_token {
Token::LeftParenthesis => nesting_depth += 1,
Token::RightParenthesis => {
if nesting_depth <= 1 {
// A closed immediate group without a top-level `;`
// is not a loop header.
return None;
}
nesting_depth -= 1;
}
Token::Semicolon if nesting_depth == 1 => return Some(left_parenthesis_position),
_ => (),
}
lookahead_offset += 1;
}
// Prefer treating incomplete `for (` as a loop header so recovery
// reports header diagnostics instead of call-like diagnostics.
Some(left_parenthesis_position)
}
/// Parses a `for` expression after `for` and the opening `(` have been
/// consumed.
///
/// Header components may be omitted. Returns [`Expression::Error`] if the
/// header cannot be closed; otherwise returns [`Expression::For`] spanning
/// from `for_keyword_position` through the parsed body.
#[must_use]
pub(in super::super) fn parse_for_tail(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let header = self.parse_for_header(for_keyword_position, left_parenthesis_position);
if header.right_parenthesis_position.is_none() {
return self.arena.alloc_node(
Expression::Error,
lexer::TokenSpan::range(
for_keyword_position,
self.last_consumed_position_or_start(),
),
);
}
let body = self.parse_branch_body(for_keyword_position);
let span = lexer::TokenSpan::range(for_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::For {
initializer: header.initializer,
condition: header.condition,
step: header.step,
body,
},
span,
)
}
/// Parses one optional `for` header component before `terminator_token`.
///
/// Uses `component_start_anchor_position` to anchor invalid-start
/// diagnostics at the position where an expression was expected.
fn parse_optional_for_header_expression(
&mut self,
invalid_start_error_kind: ParseErrorKind,
terminator_token: Token,
for_keyword_position: TokenPosition,
component_start_anchor_position: TokenPosition,
) -> OptionalExpression<'src, 'arena> {
if let Some(next_token) = self.peek_token()
&& next_token != terminator_token
{
Some(
self.parse_required_expression_with_context(
invalid_start_error_kind,
for_keyword_position,
component_start_anchor_position,
)
// Header recovery must not consume the next `;`;
// it belongs to the surrounding `for` header.
// That's why `SyncLevel` match is preferable to syncing onto
// matching delimiter - latter wouldn't stop at preceding `;`.
.sync_error_until(self, SyncLevel::CloseParenthesis)
.unwrap_or_fallback(self),
)
} else {
None
}
}
fn optional_expression_is_error(expression: &OptionalExpression<'src, 'arena>) -> bool {
expression
.as_ref()
.is_some_and(|expression| matches!(**expression, Expression::Error))
}
/// Consumes the next header semicolon or recovers at `)`.
///
/// Suppresses the missing-semicolon diagnostic when the component
/// expression has already failed.
fn consume_for_header_semicolon_or_recover(
&mut self,
for_keyword_position: TokenPosition,
right_parenthesis_position: &mut Option<TokenPosition>,
component_expression: &OptionalExpression<'src, 'arena>,
missing_semicolon_error_kind: ParseErrorKind,
component_diagnostic_label: &'static str,
) -> bool {
let diagnostic_position = match self.peek_token_and_position() {
Some((Token::Semicolon, _)) => {
self.advance();
return true;
}
Some((Token::RightParenthesis, position)) => {
*right_parenthesis_position = Some(position);
self.advance();
position
}
Some((_, position)) => position,
None => self.peek_position_or_eof(),
};
// Do not add a missing-semicolon error on top of a component
// parse error.
if Self::optional_expression_is_error(component_expression) {
return false;
}
let mut error = self
.make_error_at(missing_semicolon_error_kind, diagnostic_position)
.widen_error_span_from(for_keyword_position)
.blame_token(diagnostic_position);
if let Some(expression) = component_expression {
error = error.related(component_diagnostic_label, *expression.span());
}
error.report_error(self);
false
}
/// Finishes recovery after an invalid step expression.
///
/// Consumes a following `)` when present and suppresses the missing-`)`
/// check to avoid a cascading header diagnostic. Returns `true` when header
/// parsing should stop.
fn recover_after_invalid_step_expression(
&mut self,
header: &mut ParsedForHeader<'src, 'arena>,
) -> bool {
if !Self::optional_expression_is_error(&header.step) {
return false;
}
if let Some((Token::RightParenthesis, right_parenthesis_position)) =
self.peek_token_and_position()
{
header.right_parenthesis_position = Some(right_parenthesis_position);
self.advance();
}
true
}
/// Consumes the closing `)` of a `for` header or reports
/// a header-level error.
///
/// Links the diagnostic back to the opening `(` so incomplete headers point
/// to the whole header region.
fn consume_for_header_closing_parenthesis(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
header: &mut ParsedForHeader<'src, 'arena>,
) {
header.right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ForLoopHeaderMissingClosingParenthesis,
)
.widen_error_span_from(for_keyword_position)
.related_token("for_header_start", left_parenthesis_position)
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
.ok_or_report(self);
}
/// Parses the initializer, condition, and step expressions of a `for`
/// header.
///
/// Stops after the first unrecovered separator error so later recovery
/// does not produce duplicate header diagnostics.
fn parse_for_header(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ParsedForHeader<'src, 'arena> {
let mut header = ParsedForHeader::new();
header.initializer = self.parse_optional_for_header_expression(
ParseErrorKind::ForLoopHeaderInitializerInvalidStart,
Token::Semicolon,
for_keyword_position,
left_parenthesis_position,
);
if !self.consume_for_header_semicolon_or_recover(
for_keyword_position,
&mut header.right_parenthesis_position,
&header.initializer,
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterInitializer,
"for_header_initializer",
) {
return header;
}
let initializer_semicolon_position = self.last_consumed_position_or_start();
header.condition = self.parse_optional_for_header_expression(
ParseErrorKind::ForLoopHeaderConditionInvalidStart,
Token::Semicolon,
for_keyword_position,
initializer_semicolon_position,
);
if !self.consume_for_header_semicolon_or_recover(
for_keyword_position,
&mut header.right_parenthesis_position,
&header.condition,
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterCondition,
"for_header_condition",
) {
return header;
}
let condition_semicolon_position = self.last_consumed_position_or_start();
header.step = self.parse_optional_for_header_expression(
ParseErrorKind::ForLoopHeaderStepInvalidStart,
Token::RightParenthesis,
for_keyword_position,
condition_semicolon_position,
);
if self.recover_after_invalid_step_expression(&mut header) {
return header;
}
self.consume_for_header_closing_parenthesis(
for_keyword_position,
left_parenthesis_position,
&mut header,
);
header
}
}

View File

@ -0,0 +1,449 @@
//! Control expression parsing for Fermented `UnrealScript`.
//!
//! ## Condition boundary recovery and legacy compatibility
//!
//! Fermented `UnrealScript` allows omitting parentheses `(...)` around
//! condition expressions of `if`/`while`/`until` and similar constructs.
//! Conditions are therefore parsed as ordinary expressions by default.
//!
//! This means that a leading parenthesized expression may still be part of a
//! larger condition:
//!
//! ```unrealscript
//! if (2 + 2) * 2 < 7 { ... }
//! while (Index + 1) < Count DoWork();
//! ```
//!
//! For compatibility with older `UnrealScript` code, we apply one conservative
//! legacy cut-off rule:
//!
//! If the condition begins with a parenthesized expression, and the token after
//! the matching `)` is identifier-like, the parser treats only the
//! parenthesized expression as the condition. The following identifier-like
//! token is left for the branch body.
//!
//! This prevents the parser from accidentally consuming the following
//! statement/body as part of the condition in older code such as:
//!
//! ```unrealscript
//! if ( AIController(Controller) != None ) Cross = vect(0,0,0);
//! ```
//!
//! Without the legacy cut-off, a permissive expression parser could interpret
//! the identifier-like body opener, such as `Cross`, as an operator-like
//! continuation of the parenthesized condition.
//!
//! Operator tokens such as `*`, `+`, `<`, `==`, etc. do not trigger this
//! legacy cut-off. They allow the normal expression parser to continue the
//! condition.
//!
//! Trade-off: if an identifier-like token after the closing `)` was intended as
//! a custom/named operator, the parser prefers the legacy interpretation and
//! ends the condition at the closing `)`. Write the condition with additional
//! parentheses or use an unambiguous operator form.
use crate::ast::{BranchBody, Expression, ExpressionRef};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{
ParseErrorKind, ParseExpressionResult, ParseResult, Parser, ResultRecoveryExt,
diagnostic_labels,
};
pub(super) mod for_loop;
impl<'src, 'arena> Parser<'src, 'arena> {
/// Returns the opening `(` of a legacy parenthesized condition cut-off
/// when it applies.
fn find_legacy_parenthesized_condition_opening_position(&mut self) -> Option<TokenPosition> {
let Some((Token::LeftParenthesis, left_parenthesis_position)) =
self.peek_token_and_position()
else {
return None;
};
let right_parenthesis_position =
self.file().matching_delimiter(left_parenthesis_position)?;
self.peek_token_after_position(right_parenthesis_position)
.is_some_and(|token| token.is_valid_identifier_name())
.then_some(left_parenthesis_position)
}
/// Parses a control-flow condition.
///
/// Conditions are parsed as ordinary expressions unless the legacy
/// parenthesized-condition cut-off applies.
fn parse_condition(
&mut self,
invalid_start_error_kind: ParseErrorKind,
) -> ParseExpressionResult<'src, 'arena> {
if self.next_token_definitely_cannot_start_expression() {
let keyword_position = self.last_consumed_position_or_start();
let error_position = self.peek_position_or_eof();
return Err(self
.make_error_at(invalid_start_error_kind, error_position)
.blame_token(error_position)
.related_token(diagnostic_labels::EXPRESSION_REQUIRED_BY, keyword_position));
}
if let Some(left_parenthesis_position) =
self.find_legacy_parenthesized_condition_opening_position()
{
self.advance();
Ok(self.parse_parenthesized_expression_tail(left_parenthesis_position))
} else {
Ok(self.parse_expression())
}
}
/// Parses a branch body for a control-flow construct.
///
/// Normalizes the following source forms into a [`BranchBody`]:
///
/// - empty body with semicolon: `if (cond);`
/// - empty body before a closing `}`: `if (cond) }`
/// - non-empty block body: `if (cond) { ... }`
/// - non-empty single-expression body: `if (cond) expr;`
///
/// For non-block bodies, this method consumes a trailing `;` when present
/// and records its position in the returned [`BranchBody`].
fn parse_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
let Some((first_token, first_token_position)) = self.peek_token_and_position() else {
return self.recover_missing_branch_body(branch_owner_keyword_position);
};
// `if (is_condition);`
if first_token == Token::Semicolon {
return self.parse_empty_semicolon_branch_body(first_token_position);
}
// `{ ... if (is_condition) }`
if first_token == Token::RightBrace {
return self.make_empty_branch_body_before_closing_brace();
}
self.parse_non_empty_branch_body(branch_owner_keyword_position)
}
fn recover_missing_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
let error = self
.make_error_at_last_consumed(ParseErrorKind::ControlFlowBodyExpected)
.blame_token(self.file.eof())
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
branch_owner_keyword_position,
)
.related_token(
diagnostic_labels::EXPRESSION_EXPECTED_AFTER,
self.last_consumed_position_or_start(),
);
let end_anchor_token_position = error.covered_span.end;
self.report_error(error);
BranchBody {
expression: None,
semicolon_position: None,
end_anchor_token_position,
}
}
fn parse_empty_semicolon_branch_body(
&mut self,
semicolon_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
self.advance(); // ';'
BranchBody {
expression: None,
semicolon_position: Some(semicolon_position),
end_anchor_token_position: semicolon_position,
}
}
fn make_empty_branch_body_before_closing_brace(&mut self) -> BranchBody<'src, 'arena> {
BranchBody {
expression: None,
semicolon_position: None,
end_anchor_token_position: self.last_consumed_position_or_start(),
}
}
fn parse_non_empty_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
let branch_expression = self
.parse_required_expression_with_context(
ParseErrorKind::ControlFlowBodyExpected,
branch_owner_keyword_position,
self.last_consumed_position_or_start(),
)
.unwrap_or_fallback(self);
let end_anchor_token_position = branch_expression.span().end;
// A block body in `if {...}` or `if {...};` owns its own terminator;
// a following `;` does not belong to the branch body.
if let Expression::Block(_) = *branch_expression {
return BranchBody {
expression: Some(branch_expression),
semicolon_position: None,
end_anchor_token_position,
};
}
// Single-expression bodies own their optional trailing `;`.
let trailing_semicolon_position = if self.eat(Token::Semicolon) {
self.last_consumed_position()
} else {
None
};
BranchBody {
expression: Some(branch_expression),
semicolon_position: trailing_semicolon_position,
end_anchor_token_position: trailing_semicolon_position
.unwrap_or(end_anchor_token_position),
}
}
fn parse_condition_and_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
condition_expected_error_kind: ParseErrorKind,
) -> ParseResult<'src, 'arena, (ExpressionRef<'src, 'arena>, BranchBody<'src, 'arena>)> {
let condition_start_position = self.peek_position_or_eof();
let condition = self.parse_condition(condition_expected_error_kind)?;
// Prefer diagnosing `if {...}` as a missing condition instead of
// treating the block as the condition and then reporting
// a missing body.
if let Expression::Block(..) = *condition
&& self.next_token_definitely_cannot_start_expression()
{
return Err(self
.make_error_at(condition_expected_error_kind, condition_start_position)
.blame_token(condition_start_position)
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
branch_owner_keyword_position,
)
.related("branch_body", *condition.span()));
}
let branch_body = self.parse_branch_body(branch_owner_keyword_position);
Ok((condition, branch_body))
}
/// Parses an `if` expression after the `if` keyword.
///
/// The resulting [`Expression::If`] spans from `if_keyword_position` to the
/// end of the `if` body, or to the end of the `else` body if one is
/// present.
#[must_use]
pub(super) fn parse_if_tail(
&mut self,
if_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (condition, then_body) = match self
.parse_condition_and_branch_body(if_keyword_position, ParseErrorKind::ConditionExpected)
{
Ok(condition_and_body) => condition_and_body,
Err(error) => return error.fallback(self),
};
let (else_body, if_expression_end_position) = if self.peek_keyword() == Some(Keyword::Else)
{
self.advance(); // 'else'
let else_body = self.parse_branch_body(self.last_consumed_position_or_start());
let else_body_end_position = else_body.end_anchor_token_position;
(Some(else_body), else_body_end_position)
} else {
(None, then_body.end_anchor_token_position)
};
let span = TokenSpan::range(if_keyword_position, if_expression_end_position);
self.arena.alloc_node(
Expression::If {
condition,
then_body,
else_body,
},
span,
)
}
/// Parses a `while` expression after the `while` keyword.
///
/// The resulting [`Expression::While`] spans from `while_keyword_position`
/// to the end of its body.
#[must_use]
pub(super) fn parse_while_tail(
&mut self,
while_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (condition, body) = match self.parse_condition_and_branch_body(
while_keyword_position,
ParseErrorKind::ConditionExpected,
) {
Ok(condition_and_body) => condition_and_body,
Err(error) => return error.fallback(self),
};
let span = TokenSpan::range(while_keyword_position, body.end_anchor_token_position);
self.arena
.alloc_node(Expression::While { condition, body }, span)
}
/// Parses a `do ... until ...` expression after the `do` keyword.
///
/// The resulting [`Expression::DoUntil`] spans from `do_keyword_position`
/// to the end of the condition.
#[must_use]
pub(super) fn parse_do_until_tail(
&mut self,
do_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let body = self.parse_branch_body(do_keyword_position);
let until_keyword_was_missing = self
.expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil)
.widen_error_span_from(do_keyword_position)
.related_token("do_keyword", do_keyword_position)
.report_error(self);
let condition = if until_keyword_was_missing {
self.make_error_expression_at(body.end_anchor_token_position)
} else {
self.parse_condition(ParseErrorKind::ConditionExpected)
.related_token("do_keyword", do_keyword_position)
.unwrap_or_fallback(self)
};
let span = TokenSpan::range(do_keyword_position, condition.span().end);
self.arena
.alloc_node(Expression::DoUntil { condition, body }, span)
}
/// Parses a `foreach` expression after the `foreach` keyword.
///
/// The iterator expression must start with an identifier-like token. Later
/// stages validate its full shape.
///
/// The resulting [`Expression::ForEach`] spans from
/// `foreach_keyword_position` to the end of the body.
#[must_use]
pub(super) fn parse_foreach_tail(
&mut self,
foreach_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// End-of-file is allowed through so the shared condition parser can
// produce the missing-iterator diagnostic.
if self
.peek_token()
.is_some_and(|token| !token.is_valid_identifier_name())
{
let error_position = self.peek_position_or_eof();
return self
.make_error_at(
ParseErrorKind::ForEachIteratorExpressionExpected,
error_position,
)
.blame_token(error_position)
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
foreach_keyword_position,
)
.fallback(self);
}
let iterator_expression =
match self.parse_condition(ParseErrorKind::ForEachIteratorExpressionExpected) {
Ok(iterator_expression) => iterator_expression,
Err(error) => return error.fallback(self),
};
let body = self.parse_branch_body(foreach_keyword_position);
let span = TokenSpan::range(foreach_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::ForEach {
iterator_expression,
body,
},
span,
)
}
/// Parses a `return` expression after the `return` keyword.
///
/// Consumes an optional return value expression. The terminating `;` is
/// left for the surrounding expression parser.
#[must_use]
pub(super) fn parse_return_tail(
&mut self,
return_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (return_value, span) = match self.peek_token() {
// Non-`;` followers are parsed here so invalid return values are
// reported as return-specific diagnostics.
None | Some(Token::Semicolon) => (None, TokenSpan::new(return_keyword_position)),
_ => {
let return_value = self
.parse_required_expression(
ParseErrorKind::ReturnValueInvalidStart,
return_keyword_position,
)
.unwrap_or_fallback(self);
let span = TokenSpan::range(return_keyword_position, return_value.span().end);
(Some(return_value), span)
}
};
self.arena
.alloc_node(Expression::Return(return_value), span)
}
/// Parses a `break` expression after the `break` keyword.
///
/// Consumes an optional break value expression. The terminating `;` is left
/// for the surrounding expression parser.
#[must_use]
pub(super) fn parse_break_tail(
&mut self,
break_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (break_value, span) = match self.peek_token() {
// Non-`;` followers are parsed here so invalid break values are
// reported as break-specific diagnostics.
None | Some(Token::Semicolon) => (None, TokenSpan::new(break_keyword_position)),
_ => {
let break_value = self
.parse_required_expression(
ParseErrorKind::BreakValueInvalidStart,
break_keyword_position,
)
.unwrap_or_fallback(self);
let span = TokenSpan::range(break_keyword_position, break_value.span().end);
(Some(break_value), span)
}
};
self.arena.alloc_node(Expression::Break(break_value), span)
}
/// Parses the continuation of a `goto` expression after its keyword.
///
/// Accepts either a name literal or an identifier as the target label.
#[must_use]
pub(super) fn parse_goto_tail(
&mut self,
goto_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// Labels may be written either as UnrealScript name literals or
// as bare names.
if let Some((label_token, label_position)) = self.peek_token_and_position()
&& (label_token == Token::NameLiteral || label_token.is_valid_identifier_name())
{
self.advance();
return self.arena.alloc_node_between(
Expression::Goto(label_position),
goto_keyword_position,
label_position,
);
}
let error_position = self.peek_position_or_eof();
self.make_error_at_last_consumed(ParseErrorKind::GotoMissingLabel)
.widen_error_span_from(goto_keyword_position)
.blame_token(error_position)
.related_token("goto_keyword", goto_keyword_position)
.report_error(self);
self.make_error_expression_at(goto_keyword_position)
}
}

View File

@ -0,0 +1,80 @@
//! Identifier parsing for Fermented `UnrealScript`.
//!
//! Provides shared routines for parsing both regular and qualified identifiers,
//! e.g. `KFChar.ZombieClot`.
use crate::arena::{self, ArenaVec};
use crate::ast::{IdentifierToken, QualifiedIdentifier, QualifiedIdentifierRef};
use crate::lexer::{self, Token, TokenSpan};
use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses an identifier.
///
/// On failure (unexpected end-of-file or a token that cannot be used as an
/// identifier), produces `invalid_identifier_error_kind`.
pub(crate) fn parse_identifier(
&mut self,
invalid_identifier_error_kind: ParseErrorKind,
) -> ParseResult<'src, 'arena, IdentifierToken> {
let (token, token_position) =
self.require_token_and_position(invalid_identifier_error_kind)?;
let identifier = Parser::identifier_token_from_token(token, token_position)
.ok_or_else(|| self.make_error_at_last_consumed(invalid_identifier_error_kind))?;
self.advance();
Ok(identifier)
}
/// Returns an [`IdentifierToken`] for `token` if it is valid as an
/// identifier name.
///
/// This helper performs only token-to-identifier validation/wrapping;
/// it does not consume input from the parser.
pub(crate) fn identifier_token_from_token(
token: Token,
token_position: lexer::TokenPosition,
) -> Option<IdentifierToken> {
token
.is_valid_identifier_name()
.then_some(IdentifierToken(token_position))
}
/// Parses a qualified (dot-separated) identifier path,
/// e.g. `KFChar.ZombieClot`.
///
/// This is used for name paths where each segment must be
/// a valid identifier and segments are separated by `.` tokens.
///
/// On failure produces an error of specified [`ParseErrorKind`]
/// `invalid_identifier_error_kind`.
pub(crate) fn parse_qualified_identifier(
&mut self,
invalid_identifier_error_kind: ParseErrorKind,
) -> ParseResult<'src, 'arena, QualifiedIdentifierRef<'arena>> {
let head = self.parse_identifier(invalid_identifier_error_kind)?;
let mut tail = None;
let span_start = head.0;
let mut span_end = span_start;
while let Some((Token::Period, dot_position)) = self.peek_token_and_position() {
self.advance(); // '.'
let next_segment = match self
.parse_identifier(invalid_identifier_error_kind)
.widen_error_span_from(head.0)
{
Ok(next_segment) => next_segment,
Err(error) => return Err(error.related_token("qualifier_dot", dot_position)),
};
span_end = next_segment.0;
let tail_vec = tail.get_or_insert_with(|| ArenaVec::new_in(self.arena));
tail_vec.push(next_segment);
}
Ok(arena::ArenaNode::new_in(
QualifiedIdentifier { head, tail },
TokenSpan::range(span_start, span_end),
self.arena,
))
}
}

View File

@ -0,0 +1,123 @@
//! Literal decoding for Fermented `UnrealScript`.
//!
//! This module defines the semantic rules for interpreting literal tokens
//! produced by the lexer. It is responsible only for *decoding* the textual
//! representation of literals into their internal values.
//!
//! The rules implemented here intentionally mirror the quirks of
//! Unreal Engine 2s `UnrealScript`.
use crate::parser::{ParseErrorKind, ParseResult};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Decodes an integer literal string into [`u128`].
///
/// Syntax:
/// - Optional base prefix: `0b` | `0o` | `0x` (case-insensitive).
/// No prefix -> decimal.
/// - Digits must match the base (`0-1`/`0-7`/`0-9A-F`).
/// - Underscores are allowed and ignored (e.g., `1_000`, `0xDE_AD`).
/// - No leading sign; parsed as a non-negative magnitude.
/// - Must fit within [`u128`].
///
/// Examples: `42`, `0b1010_0011`, `0o755`, `0xDEAD_BEEF`.
///
/// On failure, returns [`ParseErrorKind::InvalidNumericLiteral`] at
/// the parser's current cursor position.
pub(crate) fn decode_integer_literal(&self, literal: &str) -> ParseResult<'src, 'arena, u128> {
let (base, content) = match literal.split_at_checked(2) {
Some(("0b" | "0B", stripped)) => (2, stripped),
Some(("0o" | "0O", stripped)) => (8, stripped),
Some(("0x" | "0X", stripped)) => (16, stripped),
_ => (10, literal),
};
let digits_without_underscores = content.replace('_', "");
u128::from_str_radix(&digits_without_underscores, base)
.map_err(|_| self.make_error_at_last_consumed(ParseErrorKind::InvalidNumericLiteral))
}
/// Decodes a float literal as `f64`, following the permissive and only
/// partially documented behavior of `UnrealScript`.
///
/// Unreal Engine 2 does not define a precise and consistent set of rules
/// for float literals and the original compiler contains several quirks.
/// Because of this, we default to normalizing the text using a small set of
/// UnrealScript-specific rules and then parse the result using rust's
/// `f64` parser.
///
/// Rules implemented here:
/// - Only decimal floats and special literals (e.g. `NaN`, `inf`)
/// are supported (no hex or binary formats).
/// - A single trailing `f` or `F`, if present, is removed before parsing.
/// - The literal text is scanned for periods (`.`). If a second period
/// is found, everything from that second `.` onward is discarded.
///
/// Examples:
/// * `1.2.3e4` becomes `1.2`
/// * `1.2e3.4` becomes `1.2e3`
///
/// - After this truncation step, the remaining text is interpreted as a
/// normal rust `f64` literal. This means it may contain digits, at
/// most one decimal point, and an optional exponent part (for example
/// `e3` or `E-2`), but it must otherwise follow rust's `f64` syntax.
/// Underscores, spaces, and other unsupported characters cause a
/// parse error.
///
/// On failure, this function returns
/// [`ParseErrorKind::InvalidNumericLiteral`] at the current parser
/// position.
pub(crate) fn decode_float_literal(&self, literal: &str) -> ParseResult<'src, 'arena, f64> {
let content = literal
.strip_suffix('f')
.or_else(|| literal.strip_suffix('F'))
.unwrap_or(literal);
// Truncate after the second '.', matching UnrealScript behavior
let content = content
.match_indices('.')
.nth(1)
.and_then(|(period_index, _)| content.get(..period_index))
.unwrap_or(content);
content
.parse::<f64>()
.map_err(|_| self.make_error_at_last_consumed(ParseErrorKind::InvalidNumericLiteral))
}
/// Unescapes a tokenized string literal into an arena string.
///
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
/// Unknown escapes drop the backslash and emit the character unchanged
/// (`UnrealScript` behavior).
/// If `raw_string` ends with a trailing `\` (which should not happen for
/// well-formed tokens), that backslash is simply ignored.
///
/// This function assumes `raw_string` is the token text without surrounding
/// quotes.
pub(crate) fn unescape_string_literal(
&self,
raw_string: &str,
) -> crate::arena::ArenaString<'arena> {
let mut buffer = String::with_capacity(raw_string.len());
let mut characters = raw_string.chars();
while let Some(next_character) = characters.next() {
if next_character == '\\' {
// Under the lexer contract, string tokens do not end with a lone
// backslash, so there is always a following character. If this
// invariant is broken, the final '\' is simply ignored here.
if let Some(escaped_character) = characters.next() {
match escaped_character {
'n' => buffer.push('\n'),
't' => buffer.push('\t'),
'"' => buffer.push('"'),
'\\' => buffer.push('\\'),
// Simply leaving the escaped character matches
// UnrealScript behavior.
unrecognized_escape_char => buffer.push(unrecognized_escape_char),
}
}
} else {
buffer.push(next_character);
}
}
self.arena.string(&buffer)
}
}

View File

@ -0,0 +1,32 @@
//! Expression parsing for Fermented `UnrealScript`.
//!
//! This module group implements the language's expression parser around a
//! Pratt-style core. It is split into small submodules by role: precedence,
//! identifiers, literals, selectors, block bodies, keyword-led/control-flow
//! forms, primary-expression dispatch, and the Pratt driver itself.
//!
//! The parser is designed to keep building a best-effort AST on malformed
//! input. Syntax problems are reported through diagnostics, while committed
//! parsers recover locally and return fallback nodes or partial structures when
//! necessary.
//!
//! ## Expression layering
//!
//! The parser distinguishes several layers of expression parsing:
//!
//! - **primaries**: forms that can be parsed directly from the current token,
//! without an already parsed left-hand side;
//! - **selectors**: suffix continuations such as member access, indexing, and
//! calls, which require a left-hand side;
//! - **prefix / postfix / infix operators**: handled by the Pratt parser using
//! precedence ranks.
mod block; // `{ ... }` block-body parsing and block/expression item handling.
mod control_flow; // `if`, `while`, `do`, `foreach`, `for`, `return`, etc.
mod identifier; // Identifier and qualified-name parsing helpers.
mod literals; // Literal decoding and literal-specific parsing utilities.
mod pratt; // Top-level Pratt driver.
mod precedence; // Operator precedence ranks and Pratt binding rules.
mod primary; // Primary-expression parsing and keyword-vs-identifier dispatch.
mod selectors; // Suffix continuations: member access, indexing, and calls.
mod switch; // `switch (...) { ... }` parsing and arm/body recovery.

View File

@ -0,0 +1,276 @@
//! Core of the expression parser for Fermented `UnrealScript`.
//!
//! This module implements a Pratt-style parser for the language's expression
//! grammar, supporting:
//!
//! * Primary expressions (see [`crate::parser::primary`] for details on what
//! we consider to be a primary expression);
//! * Prefix operators;
//! * Postfix operators;
//! * Infix operators with hard-coded precedence and associativity.
//!
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
//! operators bind. Infix parsing uses the pair of binding powers returned by
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
//! The parser infrastructure supports both left- and right-associative
//! operators, but Fermented `UnrealScript` currently defines only
//! left-associative ones.
//!
//! ## Postfix operator vs "selectors"
//!
//! Everywhere here we distinguish *selectors* like field accessor `.`,
//! function call `()` or array indices `[]` from other *postfix operators*
//! as they:
//!
//! 1. Have significantly different semantic meaning;
//! 2. Are not considered operators from `UnrealScript`'s viewpoint
//! (e.g. cannot be overloaded).
//!
//! ## See also
//!
//! - [`parser::Parser::parse_expression`] - main entry point
//! - [`PrecedenceRank`] - operator binding strengths
//! - [`super::precedence`] - operator precedence definitions
use crate::ast::{self, Expression, ExpressionRef};
use crate::lexer::TokenPosition;
use crate::parser::{
self, ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, diagnostic_labels,
};
pub use super::precedence::PrecedenceRank;
/// Returns whether postfix operators like `++` and `--` are disallowed
/// after this expression.
///
/// This restriction applies only to postfix operators. Selectors such as
/// field access `.x`, indexing `[i]`, and calls `(args)` remain allowed.
fn forbids_postfix_operators(expression: &ExpressionRef<'_, '_>) -> bool {
matches!(
**expression,
Expression::If { .. }
| Expression::While { .. }
| Expression::DoUntil { .. }
| Expression::For { .. }
| Expression::ForEach { .. }
| Expression::Switch { .. }
| Expression::Block { .. }
)
}
impl<'src, 'arena> Parser<'src, 'arena> {
// TODO: success here guaranees progress
/// Parses an expression.
///
/// Always returns some expression node; any syntax errors are reported
/// through the parser's diagnostics.
#[must_use]
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
.sync_error_until(self, parser::SyncLevel::ExpressionStart)
.unwrap_or_fallback(self)
}
/// Parses an expression in a grammar position where an expression is
/// required.
///
/// This is the checked variant of [`Parser::parse_expression`]. If the next
/// token is known not to be a valid expression starter, this reports
/// `bad_start_error_kind`, consumes the bad token, and starts panic-mode
/// recovery until [`crate::parser::SyncLevel::ExpressionStart`].
///
/// `required_by_position` identifies the token or construct that created
/// the requirement for an expression. It is attached to the diagnostic with
/// the [`diagnostic_labels::EXPRESSION_REQUIRED_BY`] label.
///
/// `expression_context_position` identifies the local syntactic anchor after
/// which the expression was expected. It is attached to the diagnostic with
/// the [`diagnostic_labels::EXPRESSION_EXPECTED_AFTER`] label.
pub(super) fn parse_required_expression(
&mut self,
bad_start_error_kind: ParseErrorKind,
required_by_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
if self.next_token_definitely_cannot_start_expression() {
let error_position = self.peek_position_or_eof();
return Err(self
.make_error_at(bad_start_error_kind, error_position)
.sync_error_until(self, crate::parser::SyncLevel::ExpressionStart)
.blame_token(error_position)
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
required_by_position,
));
}
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
}
pub(super) fn parse_required_expression_with_context(
&mut self,
bad_start_error_kind: ParseErrorKind,
required_by_position: TokenPosition,
expression_context_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
if self.next_token_definitely_cannot_start_expression() {
let error_position = self.peek_position_or_eof();
return Err(self
.make_error_at(bad_start_error_kind, error_position)
.sync_error_until(self, crate::parser::SyncLevel::ExpressionStart)
.blame_token(error_position)
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
required_by_position,
)
.related_token(
diagnostic_labels::EXPRESSION_EXPECTED_AFTER,
expression_context_position,
));
}
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
}
pub(super) fn make_error_expression_at(
&self,
position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
crate::arena::ArenaNode::new_in(
Expression::Error,
crate::lexer::TokenSpan::new(position),
self.arena,
)
}
/// Parses an expression, including only operators with binding power
/// at least `min_precedence_rank` (as tight or tighter).
fn parse_expression_with_min_precedence_rank(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> parser::ParseExpressionResult<'src, 'arena> {
let mut left_hand_side = self.parse_prefix_or_primary()?;
left_hand_side = self.parse_selectors_after(left_hand_side)?;
// We disallow only postfix operators after expression forms that
// represent control-flow or block constructs. Selectors are still
// parsed normally.
// This avoids ambiguities in cases like:
//
// ```unrealscript
// if test() { do_it(); }
// ++ counter;
// ```
//
// This wasn't a problem in UnrealScript, because such constructs were
// never treated as expressions. And it shouldn't be an issue for us
// because neither `--` or `++` (the only existing default postfix
// operators) make any sense after such expressions anyway.
if !forbids_postfix_operators(&left_hand_side) {
left_hand_side = self.parse_postfix_after(left_hand_side);
}
self.parse_infix_after(left_hand_side, min_precedence_rank)
}
/// Parses a prefix or primary expression (Pratt parser's "nud" or
/// null denotation).
fn parse_prefix_or_primary(&mut self) -> parser::ParseExpressionResult<'src, 'arena> {
let (token, token_lexeme, token_position) =
self.require_token_lexeme_and_position(ParseErrorKind::ExpressionExpected)?;
// Avoid advancing over an obviously wrong token;
// this prevents error cases like `new(Outer, Name, 7 +) SomeClass`.
if token.is_definitely_not_expression_start() {
return Err(self.make_error_at(ParseErrorKind::ExpressionExpected, token_position));
}
self.advance();
if let Ok(operator) = ast::PrefixOperator::try_from(token) {
// In UnrealScript, prefix and postfix operators bind tighter than
// any infix operators, so we can safely parse the right hand side
// at the tightest precedence.
let right_hand_side = self
.parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST)
.related_token("prefix_operator", token_position)?;
Ok(Expression::new_prefix(
self.arena,
token_position,
operator,
right_hand_side,
))
} else {
self.parse_primary_from_current_token(token, token_lexeme, token_position)
}
}
/// Parses all postfix operators it can, creating a tree with
/// `left_hand_side` as a child.
fn parse_postfix_after(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
) -> ExpressionRef<'src, 'arena> {
while let Some((operator, operator_position)) = self.peek_postfix_with_position() {
self.advance();
left_hand_side =
Expression::new_postfix(self.arena, left_hand_side, operator, operator_position);
}
left_hand_side
}
/// Parses infix operators binding at least as tight as
/// `min_precedence_rank`.
///
/// Associativity is encoded by
/// [`super::precedence::infix_precedence_ranks`].
///
/// Stops when the next operator is looser than `min_precedence_rank`.
fn parse_infix_after(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
min_precedence_rank: PrecedenceRank,
) -> parser::ParseExpressionResult<'src, 'arena> {
while let Some((operator, right_precedence_rank)) =
self.peek_infix_with_min_precedence_rank(min_precedence_rank)
{
self.advance();
let infix_operator_position = self.last_consumed_position_or_start();
let right_hand_side = self
.parse_expression_with_min_precedence_rank(right_precedence_rank)
.related_token("infix_operator", infix_operator_position)?;
left_hand_side =
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
}
Ok(left_hand_side)
}
/// Returns the next postfix operator and its position if present.
///
/// Helper to avoid peeking and mapping twice; used to drive the postfix
/// loop without unwraps.
fn peek_postfix_with_position(
&mut self,
) -> Option<(ast::PostfixOperator, crate::lexer::TokenPosition)> {
let (token, token_position) = self.peek_token_and_position()?;
let Ok(operator) = ast::PostfixOperator::try_from(token) else {
return None;
};
Some((operator, token_position))
}
/// If the next token is an infix operator with left binding power at least
/// `min_precedence_rank`, returns its operator and the minimum precedence
/// rank to use when parsing the right-hand side (i.e. the operator's right
/// binding power).
///
/// Otherwise returns [`None`].
fn peek_infix_with_min_precedence_rank(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> Option<(ast::InfixOperator, PrecedenceRank)> {
let (left_precedence_rank, operator, right_precedence_rank) = self
.peek_token()
.and_then(super::precedence::infix_precedence_ranks)?;
if left_precedence_rank.is_looser_than(min_precedence_rank) {
return None;
}
Some((operator, right_precedence_rank))
}
}

View File

@ -0,0 +1,93 @@
//! Precedence tables for Fermented `UnrealScript` operators.
//!
//! These values don't follow the usual *binding power* convention for
//! a Pratt parser, where tighter binding corresponds to a larger number.\
//! Here, the smaller the number, the tighter the binding power.\
//! For this reason, we use the term *precedence rank* instead.
//!
//! ## Operators sorted by precedence (lowest number = tighter binding)
//!
//! ### Infix operators
//!
//! All infix operators in `UnrealScript` are
//! [left-associative](https://wiki.beyondunreal.com/Operators).
//!
//! 12: `**`
//! 16: `*`, `/`, `Cross`, `Dot`
//! 18: `%`
//! 20: `+`, `-`
//! 22: `<<`, `>>`, `>>>`
//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom`
//! 26: `!=`
//! 28: `&`, `^`, `|`
//! 30: `&&`, `^^`
//! 32: `||`
//! 34: `*=`, `/=`, `+=`, `-=`
//! 40: `$`, `*`, `@`
//! 44: `$=`, `*=`, `@=`
//! 45: `-=`
//!
//! Some operator, such as `*`, appear twice with different precedence
//! ranks because they were defined with different values for different types
//! in separate script source files (as in the Killing Floor sources).\
//! However, `UnrealScript` uses only the first definition it encounters in
//! `Object.uc`, which corresponds to the lower value.
//!
//! ### Prefix operators
//!
//! `!`, `~`, `+`, `-`, `++`, `--`.
//!
//! ### Postfix operators
//!
//! `++`, `--`.
use crate::ast::{InfixOperator, infix_operator_info};
use crate::lexer::Token;
/// Compact precedence rank used by the Pratt Parser.
///
/// A smaller number means tighter binding, and a larger number means looser
/// binding. This inverted scale matches how `UnrealScript` tables were recorded.
#[must_use]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct PrecedenceRank(u8);
impl PrecedenceRank {
/// The loosest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by the maximum [`u8`] value.
pub const LOOSEST: Self = Self(u8::MAX);
/// The tightest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by zero.
pub const TIGHTEST: Self = Self(0);
/// Returns `true` if `self` has a looser binding than `other`.
pub const fn is_looser_than(self, other: Self) -> bool {
self.0 > other.0
}
}
/// Maps a token to its infix operator along with its left and right binding
/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`.
///
/// Returns [`None`] if and only if `token` is not an infix operator.
pub fn infix_precedence_ranks(
token: Token,
) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> {
let info = infix_operator_info(token)?;
// All operators are left-associative, so `right_precedence_rank` is set to
// `left_binding_rank - 1` (with our "smaller is tighter" scale, this
// enforces left associativity in Pratt parsing).
//
// Since all precedences are even, subtracting one won't actually cross
// any boundary between operator groups.
Some((
PrecedenceRank(info.right_precedence_rank),
info.operator,
PrecedenceRank(info.right_precedence_rank - 1),
))
}

View File

@ -0,0 +1,359 @@
//! Parser for primary expressions in Fermented UnrealScript.
//!
//! This module implements parsing of primary expressions via
//! [`Parser::parse_primary_from_current_token`] and its helper
//! [`Parser::try_parse_keyword_primary`].
//!
//! ## What is a "primary expression" here?
//!
//! In this module, "primary" is used somewhat more broadly than in a
//! textbook grammar, but it still has one essential property:
//!
//! A primary expression is an expression form that can be parsed
//! directly from the current token, without requiring an already
//! parsed left-hand side.
//!
//! This includes ordinary primaries such as literals, identifiers, and
//! parenthesized expressions, as well as keyword-led forms such as
//! `if`, `while`, `for`, `foreach`, `switch`, `return`, `break`,
//! `continue`, `new`, and `class<...>`.
//!
//! By contrast, selectors, postfix operators, and infix operators are
//! not primaries. They cannot stand on their own here: they are parsed
//! only as continuations of an already parsed expression.
//!
//! So "primary" here does not mean "smallest atomic expression".
//! It means "an expression form that does not need a left-hand side
//! in order to be parsed".
//!
//! ## Keyword-led primaries and identifier fallback
//!
//! Some lexer keywords are always parsed as keyword-led primary expressions
//! in expression position: `if`, `while`, `do`, `foreach`, `return`, `break`,
//! `continue`, `new`, `true`, `false`, and `none`.
//!
//! Other keywords are accepted as keyword-led forms only when the following
//! tokens commit to that syntax. Otherwise they remain available as
//! identifier-like primaries.
//!
//! - `for` is parsed as a loop only when followed by a parenthesized header
//! containing a top-level `;`, matching `for (init; condition; step)`.
//! - `switch` is parsed as a switch expression only when followed by `(`.
//! - `goto` is parsed as a label jump only when it is not followed by `(`.
//! - `class` is parsed as a class type expression only when followed by `<`.
//!
//! These rules are local and syntactic. They avoid name resolution while still
//! supporting existing legacy code that uses some keywords as ordinary names.
//!
//! ### Why is `switch` handled differently?
//!
//! `switch` is handled differently because, in existing `UnrealScript` code,
//! it may appear either as a keyword-led construct or as an identifier.
//!
//! Its disambiguation rule is simpler than for `for`: if the next token is
//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains
//! available as an identifier.
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
mod new;
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a primary expression starting from the provided token.
///
/// The provided token is assumed to be the already consumed first token of
/// the primary expression.
///
/// This includes literals, identifiers, grouped expressions, block
/// expressions, and certain keyword-led forms.
///
/// It does not parse selectors, postfix operators, or infix operators;
/// those are handled afterwards as continuations of the parsed primary.
///
/// # Errors
///
/// Returns [`ParseErrorKind::ExpressionExpected`] if the provided
/// token cannot begin any valid primary expression in this position.
pub(super) fn parse_primary_from_current_token(
&mut self,
token: Token,
token_lexeme: &'src str,
token_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
Ok(match token {
Token::IntegerLiteral => {
let value = self.decode_integer_literal(token_lexeme)?;
self.arena
.alloc_node_at(Expression::Integer(value), token_position)
}
Token::FloatLiteral => {
let value = self.decode_float_literal(token_lexeme)?;
self.arena
.alloc_node_at(Expression::Float(value), token_position)
}
Token::StringLiteral => {
let value = self.unescape_string_literal(token_lexeme);
self.arena
.alloc_node_at(Expression::String(value), token_position)
}
Token::NameLiteral => self.arena.alloc_node_at(
Expression::NameLiteral {
tag: None,
name: token_lexeme,
},
token_position,
),
Token::LeftParenthesis => self.parse_parenthesized_expression_tail(token_position),
Token::LeftBrace => self.parse_block_body_tail(token_position),
Token::Keyword(keyword) => {
match self.try_parse_keyword_primary(keyword, token_position) {
Some(keyword_expression) => keyword_expression,
None => return self.parse_identifier_like_primary(token, token_position),
}
}
_ => return self.parse_identifier_like_primary(token, token_position),
})
}
/// Parses a keyword-led primary expression.
///
/// Returns `None` if the keyword should instead be interpreted as an
/// identifier in this position.
fn try_parse_keyword_primary(
&mut self,
keyword: Keyword,
token_position: TokenPosition,
) -> OptionalExpression<'src, 'arena> {
Some(match keyword {
Keyword::True => self
.arena
.alloc_node_at(Expression::Bool(true), token_position),
Keyword::False => self
.arena
.alloc_node_at(Expression::Bool(false), token_position),
Keyword::None => self.arena.alloc_node_at(Expression::None, token_position),
Keyword::If => self.parse_if_tail(token_position),
Keyword::While => self.parse_while_tail(token_position),
Keyword::Do => self.parse_do_until_tail(token_position),
Keyword::ForEach => self.parse_foreach_tail(token_position),
Keyword::Return => self.parse_return_tail(token_position),
Keyword::Break => self.parse_break_tail(token_position),
Keyword::Continue => self
.arena
.alloc_node_at(Expression::Continue, token_position),
Keyword::New => self.parse_new_expression_tail(token_position),
// These keywords remain valid identifiers unless the following
// tokens commit to the keyword-led form.
Keyword::For
if let Some(left_parenthesis_position) = self.peek_for_loop_header_left_parenthesis_position() =>
{
self.advance(); // `(`
self.parse_for_tail(token_position, left_parenthesis_position)
}
Keyword::Goto if !matches!(self.peek_token(), Some(Token::LeftParenthesis)) => {
self.parse_goto_tail(token_position)
}
// `switch` is only treated as keyword-led when followed by `(`
// to match the syntax accepted by the existing codebase.
Keyword::Switch if matches!(self.peek_token(), Some(Token::LeftParenthesis)) => {
self.parse_switch_tail(token_position)
}
Keyword::Class => {
if let Some(left_angle_bracket_position) = self.eat_with_position(Token::Less) {
self.parse_class_type_tail(token_position, left_angle_bracket_position)
} else {
return None;
}
}
_ => return None,
})
}
/// Attempts to parse the already-consumed token as an identifier or tagged
/// name literal.
///
/// # Errors
///
/// Returns [`ParseErrorKind::ExpressionExpected`] if the token
/// cannot be used as an identifier in this position.
fn parse_identifier_like_primary(
&mut self,
primary_token: Token,
primary_token_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
let identifier_token =
Self::identifier_token_from_token(primary_token, primary_token_position).ok_or_else(
|| self.make_error_at(ParseErrorKind::ExpressionExpected, primary_token_position),
)?;
// A token that is valid as an identifier may still start a tagged-name
// literal such as `Texture'Foo.Bar'`.
let expression = if let Some((Token::NameLiteral, lexeme, name_position)) =
self.peek_token_lexeme_and_position()
{
self.advance();
self.arena.alloc_node_between(
Expression::NameLiteral {
tag: Some(identifier_token),
name: lexeme,
},
primary_token_position,
name_position,
)
} else {
self.arena.alloc_node_at(
Expression::Identifier(identifier_token),
primary_token_position,
)
};
Ok(expression)
}
/// Parses a parenthesized expression.
///
/// Assumes the opening `(` has already been consumed.
/// Reports and recovers from a missing closing `)`.
pub(super) fn parse_parenthesized_expression_tail(
&mut self,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
if self.next_token_definitely_cannot_start_expression() {
return self
.make_error_at_last_consumed(ParseErrorKind::ParenthesizedExpressionInvalidStart)
.widen_error_span_from(left_parenthesis_position)
.extend_blame_to_next_token(self)
.related_token("left_parenthesis", left_parenthesis_position)
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
.fallback(self);
};
let inner_expression = self.parse_expression();
let right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis,
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
.extend_blame_start_to_covered_start()
.related_token("left_parenthesis", left_parenthesis_position)
.unwrap_or_fallback(self);
self.arena.alloc_node_between(
Expression::Parentheses(inner_expression),
left_parenthesis_position,
right_parenthesis_position,
)
}
/// Parses a class type expression of the form `class<...>`.
///
/// Assumes the `class` keyword and following '<' token have already been
/// consumed. Reports and recovers from malformed type syntax locally.
fn parse_class_type_tail(
&mut self,
class_keyword_position: TokenPosition,
left_angle_bracket_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
match self.peek_token_and_position() {
Some((Token::Greater, right_angle_bracket_position)) => self
.report_missing_class_type_argument(
class_keyword_position,
left_angle_bracket_position,
right_angle_bracket_position,
),
Some((first_token, _)) if first_token.is_valid_identifier_name() => self
.parse_nonempty_class_type_tail(
class_keyword_position,
left_angle_bracket_position,
),
Some((_, bad_position)) => self.report_invalid_class_type_start(
class_keyword_position,
left_angle_bracket_position,
bad_position,
),
None => self.report_invalid_class_type_start(
class_keyword_position,
left_angle_bracket_position,
self.file.eof(),
),
}
}
fn parse_nonempty_class_type_tail(
&mut self,
class_keyword_position: TokenPosition,
left_angle_bracket_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let class_type = match self
.parse_qualified_identifier(ParseErrorKind::ClassTypeExpectedQualifiedTypeName)
.widen_error_span_from(class_keyword_position)
.extend_blame_to_next_token(self)
.sync_error_at(self, SyncLevel::CloseAngleBracket)
.related_token("class_keyword", class_keyword_position)
{
Ok(class_type) => class_type,
Err(error) => return self.report_error_with_fallback(error),
};
let right_angle_bracket_position = self
.expect(
Token::Greater,
ParseErrorKind::ClassTypeMissingClosingAngleBracket,
)
.widen_error_span_from(class_keyword_position)
.sync_error_at(self, SyncLevel::CloseAngleBracket)
.related_token("left_angle_bracket", left_angle_bracket_position)
.related_token("class_keyword", class_keyword_position)
.unwrap_or_fallback(self);
self.arena.alloc_node_between(
Expression::ClassType(class_type),
class_keyword_position,
right_angle_bracket_position,
)
}
fn report_missing_class_type_argument(
&mut self,
class_keyword_position: TokenPosition,
left_angle_bracket_position: TokenPosition,
right_angle_bracket_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
self.advance();
self.make_error_at_last_consumed(ParseErrorKind::ClassTypeMissingTypeArgument)
.widen_error_span_from(class_keyword_position)
.blame(TokenSpan::range(
left_angle_bracket_position,
right_angle_bracket_position,
))
.related_token("left_angle_bracket", left_angle_bracket_position)
.related_token("class_keyword", class_keyword_position)
.fallback(self)
}
fn report_invalid_class_type_start(
&mut self,
class_keyword_position: TokenPosition,
left_angle_bracket_position: TokenPosition,
bad_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
self.make_error_at_last_consumed(ParseErrorKind::ClassTypeInvalidStart)
.widen_error_span_from(class_keyword_position)
.sync_error_at(self, SyncLevel::CloseAngleBracket)
.blame_token(bad_position)
.related_token("left_angle_bracket", left_angle_bracket_position)
.related_token("class_keyword", class_keyword_position)
.fallback(self)
}
/// Returns `true` iff the next token is definitely not a valid start of an
/// expression.
///
/// This is intentionally conservative:
/// - `true` means parsing an expression here is pointless;
/// - `false` means "might be valid", so the normal expression parser should
/// decide and potentially emit a more specific error.
#[must_use]
pub(super) fn next_token_definitely_cannot_start_expression(&mut self) -> bool {
self.peek_token()
.map_or(true, Token::is_definitely_not_expression_start)
}
}

View File

@ -0,0 +1,355 @@
//! Parser for `new` expressions in Fermented UnrealScript.
use super::super::selectors::{CallArgumentListParseState, ParsedArgumentSlot};
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
/// Determines how parsing of the class specifier proceeds after the optional
/// `new(...)` argument list has been parsed or recovered.
#[must_use]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum NewClassSpecifierParseAction {
Parse,
Skip,
}
/// Stores the parsed `new(...)` arguments and the action to take for
/// class-specifier parsing.
#[derive(Debug)]
struct NewArgumentListParseResult<'src, 'arena> {
outer_argument: OptionalExpression<'src, 'arena>,
name_argument: OptionalExpression<'src, 'arena>,
flags_argument: OptionalExpression<'src, 'arena>,
class_specifier_parse_action: NewClassSpecifierParseAction,
}
impl<'src, 'arena> NewArgumentListParseResult<'src, 'arena> {
/// Returns the parse result for `new` without an argument list.
fn without_argument_list() -> Self {
Self {
outer_argument: None,
name_argument: None,
flags_argument: None,
class_specifier_parse_action: NewClassSpecifierParseAction::Parse,
}
}
}
/// Holds shared state for parsing the optional `new(...)` argument list.
#[derive(Debug)]
struct NewArgumentListParseState<'src, 'arena> {
call_argument_list_parse_state: CallArgumentListParseState,
new_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
outer_argument: OptionalExpression<'src, 'arena>,
name_argument: OptionalExpression<'src, 'arena>,
flags_argument: OptionalExpression<'src, 'arena>,
}
impl<'src, 'arena> NewArgumentListParseState<'src, 'arena> {
/// Stores an argument in the current `new` argument slot.
fn store_argument_in_current_slot(&mut self, argument: OptionalExpression<'src, 'arena>) {
match self.call_argument_list_parse_state.parsed_argument_slot_count {
1 => self.outer_argument = argument,
2 => self.name_argument = argument,
3 => self.flags_argument = argument,
_ => unreachable!("this method cannot be called after parsing three arguments"),
}
}
/// Returns the span of the argument in the current parsed slot.
///
/// Assumes the current slot has already been stored.
#[must_use]
fn current_argument_span(&self) -> Option<TokenSpan> {
debug_assert!(
(1..=3).contains(&self.call_argument_list_parse_state.parsed_argument_slot_count),
"parsed_slot_count out of range in new-argument parser"
);
match self.call_argument_list_parse_state.parsed_argument_slot_count {
1 => &self.outer_argument,
2 => &self.name_argument,
3 => &self.flags_argument,
// Diagnostics can fall back to a missing span here.
_ => return None,
}
.as_ref()
.map(|argument| *argument.span())
}
/// Returns the span of the last parsed non-empty `new` argument.
#[must_use]
fn last_parsed_allowed_argument_span(&self) -> Option<TokenSpan> {
[
&self.flags_argument,
&self.name_argument,
&self.outer_argument,
]
.into_iter()
.find_map(|slot| slot.as_ref().map(|argument| *argument.span()))
}
/// Finishes argument-list parsing and returns the collected result.
#[must_use]
fn into_result(
self,
class_specifier_parse_action: NewClassSpecifierParseAction,
) -> NewArgumentListParseResult<'src, 'arena> {
NewArgumentListParseResult {
outer_argument: self.outer_argument,
name_argument: self.name_argument,
flags_argument: self.flags_argument,
class_specifier_parse_action,
}
}
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a `new` expression.
///
/// Assumes the `new` keyword has already been consumed. Parses an optional
/// parenthesized argument list before the required class specifier.
#[must_use]
pub(super) fn parse_new_expression_tail(
&mut self,
new_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let mut argument_list_end_position = None;
let NewArgumentListParseResult {
outer_argument,
name_argument,
flags_argument,
class_specifier_parse_action,
} = if let Some(left_parenthesis_position) = self.eat_with_position(Token::LeftParenthesis)
{
let parsed_argument_list =
self.parse_new_argument_list_tail(new_keyword_position, left_parenthesis_position);
argument_list_end_position = self.last_consumed_position();
parsed_argument_list
} else {
NewArgumentListParseResult::without_argument_list()
};
let class_specifier = self.parse_new_class_specifier(
new_keyword_position,
argument_list_end_position,
class_specifier_parse_action,
);
let class_specifier_end_position = class_specifier.span().end;
self.arena.alloc_node_between(
Expression::New {
outer_argument,
name_argument,
flags_argument,
class_specifier,
},
new_keyword_position,
class_specifier_end_position,
)
}
/// Parses the parenthesized argument list of a `new` expression.
///
/// Assumes the opening `(` has already been consumed.
#[must_use]
fn parse_new_argument_list_tail(
&mut self,
new_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> NewArgumentListParseResult<'src, 'arena> {
let mut state = NewArgumentListParseState {
new_keyword_position,
left_parenthesis_position,
outer_argument: None,
name_argument: None,
flags_argument: None,
call_argument_list_parse_state: CallArgumentListParseState::new(),
};
if let Some(class_specifier_parse_action) = self.parse_new_argument_slots(&mut state) {
return state.into_result(class_specifier_parse_action);
}
self.diagnose_extra_new_arguments(&mut state);
let class_specifier_parse_action = if self.eat(Token::RightParenthesis) {
NewClassSpecifierParseAction::Parse
} else {
self.recover_from_missing_new_closing_parenthesis(&state)
};
state.into_result(class_specifier_parse_action)
}
/// Parses up to three positional `new` arguments.
///
/// Returns [`Some`] only when recovery determines whether
/// the class specifier should be parsed or skipped.
#[must_use]
fn parse_new_argument_slots(
&mut self,
state: &mut NewArgumentListParseState<'src, 'arena>,
) -> Option<NewClassSpecifierParseAction> {
// Only successful slot parses continue the loop,
// so each iteration makes progress.
while state.call_argument_list_parse_state.parsed_argument_slot_count < 3
&& let ParsedArgumentSlot::Argument(argument) =
self.parse_next_call_argument_slot(&mut state.call_argument_list_parse_state)
{
// On `ParsedCallArgumentSlot::Argument(_)`,
// `parse_call_argument_slot` increases `parsed_slot_count` by 1,
// so it is now in `1`, `2` or `3`, guaranteeing that this call
// is valid and won't hit `unreachable!`.
state.store_argument_in_current_slot(argument);
if state
.call_argument_list_parse_state
.last_slot_missing_separator
{
if let Some(class_specifier_parse_action) =
self.recover_from_missing_new_argument_separator(state)
{
return Some(class_specifier_parse_action);
}
}
}
None
}
/// Recovers from a missing separator between `new` arguments.
///
/// Returns [`Some`] when recovery instead treats the boundary as the end of
/// the argument list.
fn recover_from_missing_new_argument_separator(
&mut self,
state: &mut NewArgumentListParseState<'src, 'arena>,
) -> Option<NewClassSpecifierParseAction> {
let has_parsed_all_allowed_arguments =
state.call_argument_list_parse_state.parsed_argument_slot_count >= 3;
let likely_missing_comma = !self.next_token_definitely_cannot_start_expression()
&& !has_parsed_all_allowed_arguments;
if likely_missing_comma {
let next_argument_position = self.peek_position_or_eof();
let mut error = self
.make_error_at_last_consumed(ParseErrorKind::NewArgumentMissingComma)
.widen_error_span_from(state.left_parenthesis_position)
.blame_token(next_argument_position)
.related_token("new_keyword", state.new_keyword_position)
.related_token("left_parenthesis", state.left_parenthesis_position);
if let Some(argument_span) = state.current_argument_span() {
error = error.related("previous_argument", argument_span);
}
error.report_error(self);
None
} else {
// If this does not look like another argument,
// treat the boundary as the missing `)` rather than inventing
// an extra comma diagnostic.
Some(self.recover_from_missing_new_closing_parenthesis(state))
}
}
/// Diagnoses and skips any arguments beyond the three accepted by `new`.
fn diagnose_extra_new_arguments(
&mut self,
state: &mut NewArgumentListParseState<'src, 'arena>,
) {
// Require an explicit comma before diagnosing extra arguments so this
// does not mask a missing `)`.
if let Some((Token::Comma, extra_argument_comma_position)) = self.peek_token_and_position()
{
// Preserve the first extra argument span for a more precise
// diagnostic before we do any syncing.
let first_extra_argument_span =
match self.parse_next_call_argument_slot(&mut state.call_argument_list_parse_state) {
ParsedArgumentSlot::Argument(Some(argument)) => Some(*argument.span()),
ParsedArgumentSlot::Argument(None) => None,
ParsedArgumentSlot::NoMoreArguments => None,
};
let mut error = self
.make_error_at_last_consumed(ParseErrorKind::NewTooManyArguments)
.widen_error_span_from(state.left_parenthesis_position)
.blame_token(extra_argument_comma_position)
.related_token("new_keyword", state.new_keyword_position)
.related_token("left_parenthesis", state.left_parenthesis_position);
if let Some(span) = state.last_parsed_allowed_argument_span() {
error = error.related("last_allowed_argument", span);
}
if let Some(span) = first_extra_argument_span {
error = error.related("first_extra_argument", span);
}
error
.sync_error_until_matching_delimiter(self, state.left_parenthesis_position)
.report_error(self);
}
}
/// Recovers from a missing closing `)` in a `new(...)` argument list.
///
/// Returns whether class-specifier parsing should continue after recovery.
#[must_use]
fn recover_from_missing_new_closing_parenthesis(
&mut self,
state: &NewArgumentListParseState<'src, 'arena>,
) -> NewClassSpecifierParseAction {
self.make_error_at_last_consumed(ParseErrorKind::NewMissingClosingParenthesis)
.widen_error_span_from(state.left_parenthesis_position)
.blame_token(self.peek_position_or_eof())
.related_token("new_keyword", state.new_keyword_position)
.related_token("left_parenthesis", state.left_parenthesis_position)
.report(self);
// Missing-delimiter recovery normally syncs to the matching `)`.
// `new(...) ClassName` is an exception: after a missing `)`, the next
// expression may already be the class specifier, not another argument.
let matching_right_parenthesis_ahead = self
.file
.matching_delimiter(state.left_parenthesis_position)
.is_some_and(|right_parenthesis_position| {
self.peek_position_or_eof() <= right_parenthesis_position
});
if matching_right_parenthesis_ahead {
self.recover_at_matching_delimiter_or_sync(state.left_parenthesis_position);
// After syncing through the matched `)`, the argument-list error is
// contained, so class-specifier parsing can proceed normally.
return NewClassSpecifierParseAction::Parse;
}
if self.next_token_definitely_cannot_start_expression() {
// There is no plausible class specifier to parse, so skip it to
// avoid error cascade.
NewClassSpecifierParseAction::Skip
} else {
NewClassSpecifierParseAction::Parse
}
}
/// Parses the class specifier of a `new` expression after argument-list
/// parsing and recovery.
#[must_use]
fn parse_new_class_specifier(
&mut self,
new_keyword_position: TokenPosition,
argument_list_end: Option<TokenPosition>,
class_specifier_parse_action: NewClassSpecifierParseAction,
) -> ExpressionRef<'src, 'arena> {
match class_specifier_parse_action {
NewClassSpecifierParseAction::Parse
if self.next_token_definitely_cannot_start_expression() =>
{
let mut error = self
.make_error_at_last_consumed(ParseErrorKind::NewMissingClassSpecifier)
.widen_error_span_from(new_keyword_position)
.sync_error_until(self, SyncLevel::ExpressionStart)
.extend_blame_to_next_token(self)
.related_token("new_keyword", new_keyword_position);
if let Some(argument_list_end) = argument_list_end {
error = error.related_token("argument_list_end", argument_list_end);
}
return self.report_error_with_fallback(error);
}
NewClassSpecifierParseAction::Parse => self.parse_expression(),
NewClassSpecifierParseAction::Skip => {
let error_position = self.peek_position_or_eof();
self.make_error_expression_at(error_position)
}
}
}
}

View File

@ -0,0 +1,320 @@
//! Parser support for expression selectors.
//!
//! Selectors are suffix forms that require an already parsed left-hand side,
//! such as member access, indexing, and calls.
use crate::ast::{self, ExpressionRef};
use crate::lexer::{self, Token, TokenPosition};
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
// Lack of `Copy` is deliberate to avoid accidental reuse of parser state.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct CallArgumentListParseState {
/// Number of argument slots already yielded, including omitted slots.
pub(super) parsed_argument_slot_count: usize,
/// Whether the last yielded argument expression lacked a following
/// separator (',' or ')' or end-of-file).
pub(super) last_slot_missing_separator: bool,
}
impl CallArgumentListParseState {
#[must_use]
pub(super) fn new() -> Self {
Self {
parsed_argument_slot_count: 0,
last_slot_missing_separator: false,
}
}
#[must_use]
fn has_parsed_any_argument_slots(&self) -> bool {
self.parsed_argument_slot_count > 0
}
}
/// Represents the result of parsing one call argument slot.
#[must_use]
#[derive(Debug, PartialEq)]
pub(super) enum ParsedArgumentSlot<'src, 'arena> {
/// No further slots should be parsed.
NoMoreArguments,
/// A parsed slot. `None` represents an omitted argument.
Argument(ast::OptionalExpression<'src, 'arena>),
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses zero or more postfix selectors after `left_hand_side`.
///
/// Returns the resulting expression after all contiguous selectors.
pub(super) fn parse_selectors_after(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
) -> ParseExpressionResult<'src, 'arena> {
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
left_hand_side = match next_token {
Token::Period => {
self.advance(); // '.'
self.parse_member_access_selector_after(left_hand_side, next_token_position)?
}
Token::LeftBracket => {
self.advance(); // '['
self.parse_index_selector_after(left_hand_side, next_token_position)?
}
Token::LeftParenthesis => {
self.advance(); // '('
self.parse_call_selector_after(left_hand_side, next_token_position)
}
_ => break,
};
self.ensure_forward_progress(next_token_position);
}
Ok(left_hand_side)
}
/// Parses a member access selector after `left_hand_side`.
///
/// Expects the leading `.` to have already been consumed.
fn parse_member_access_selector_after(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
period_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
let member_access_start = left_hand_side.span().start;
let member_name_position = self.peek_position_or_eof();
let member_name = self
.parse_identifier(ParseErrorKind::MemberAccessMissingMemberName)
.blame_token(member_name_position)
.related_token("period", period_position)?;
let member_access_end = member_name.0;
Ok(self.arena.alloc_node(
ast::Expression::Member {
target: left_hand_side,
name: member_name,
},
lexer::TokenSpan::range(member_access_start, member_access_end),
))
}
/// Parses an index selector after `left_hand_side`.
///
/// Expects the leading `[` to have already been consumed.
fn parse_index_selector_after(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_bracket_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
let index_expression = self
.parse_required_expression_with_context(
ParseErrorKind::IndexMissingExpression,
left_hand_side.span().end,
left_bracket_position,
)
.sync_error_at_matching_delimiter(self, left_bracket_position)?;
let right_bracket_position = self
.expect(
Token::RightBracket,
ParseErrorKind::IndexMissingClosingBracket,
)
.widen_error_span_from(left_bracket_position)
.sync_error_at_matching_delimiter(self, left_bracket_position)
.related_token("left_bracket", left_bracket_position)?;
let expression_start = left_hand_side.span().start;
Ok(self.arena.alloc_node_between(
ast::Expression::Index {
target: left_hand_side,
index: index_expression,
},
expression_start,
right_bracket_position,
))
}
/// Parses a call selector after `left_hand_side`.
///
/// Expects the leading `(` to have already been consumed.
/// Reports malformed argument lists internally and still returns
/// a call expression.
fn parse_call_selector_after(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let callee_end_position = left_hand_side.span().end;
let argument_list =
self.parse_call_argument_list(callee_end_position, left_parenthesis_position);
let right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::FunctionCallMissingClosingParenthesis,
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
.related_token("callee", callee_end_position)
.related_token("left_parenthesis", left_parenthesis_position)
.unwrap_or_fallback(self);
let expression_start = left_hand_side.span().start;
self.arena.alloc_node_between(
ast::Expression::Call {
callee: left_hand_side,
arguments: argument_list,
},
expression_start,
right_parenthesis_position,
)
}
/// Parses a call argument list after an already-consumed `(`.
///
/// Returns all parsed argument slots, preserving omitted arguments
/// as `None`. Does not consume the closing `)`.
fn parse_call_argument_list(
&mut self,
callee_end_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ast::ArgumentList<'src, 'arena> {
let mut argument_list = crate::arena::ArenaVec::new_in(self.arena);
let mut argument_list_state = CallArgumentListParseState::new();
let mut progress_checkpoint = None;
while let ParsedArgumentSlot::Argument(argument) =
self.parse_next_call_argument_slot(&mut argument_list_state)
{
if let Some(progress_checkpoint) = progress_checkpoint {
self.ensure_forward_progress(progress_checkpoint);
}
let parsed_argument_span = argument.as_ref().map(|argument| *argument.span());
argument_list.push(argument);
if argument_list_state.last_slot_missing_separator {
if !self.recover_after_missing_function_call_argument_separator(
callee_end_position,
left_parenthesis_position,
parsed_argument_span,
) {
break;
}
}
progress_checkpoint = self.peek_position();
}
argument_list
}
/// Parses the next logical call-argument slot.
///
/// In UnrealScript, commas introduce follow-up argument slots, so `f(x,)`
/// means `f(x, <omitted>)`, not a call with a tolerated trailing separator.
///
/// Returns [`ParsedArgumentSlot::NoMoreArguments`] when the argument list
/// has ended or no safe recovery can continue it.
/// Returns [`ParsedArgumentSlot::Argument`] for a parsed slot, including
/// omitted slots.
///
/// Repeated calls with the same `state` are guaranteed to eventually return
/// [`ParsedArgumentSlot::NoMoreArguments`], even for malformed input.
///
/// Records per-slot status in `state`.
pub(super) fn parse_next_call_argument_slot(
&mut self,
state: &mut CallArgumentListParseState,
) -> ParsedArgumentSlot<'src, 'arena> {
state.last_slot_missing_separator = false;
// A comma belongs to the next slot because a final comma represents an
// omitted final argument, not a tolerated trailing separator.
match self.peek_token() {
None | Some(Token::RightParenthesis) => {
return ParsedArgumentSlot::NoMoreArguments;
}
Some(Token::Comma) => {
// In `f(,x)`, the leading comma both creates the omitted first
// slot and separates it from `x`, so the first slot must not
// consume it.
if state.has_parsed_any_argument_slots() {
self.advance();
}
if self.is_at_call_argument_boundary() {
state.parsed_argument_slot_count += 1;
return ParsedArgumentSlot::Argument(None);
}
}
_ => (),
}
let position_before_argument = self.peek_position_or_eof();
let mut argument = self.parse_expression();
let expression_recovery_made_no_progress =
self.peek_position_or_eof() == position_before_argument;
if expression_recovery_made_no_progress {
self.recover_until(SyncLevel::ListSeparator);
let list_level_recovery_made_no_progress =
self.peek_position_or_eof() == position_before_argument;
if list_level_recovery_made_no_progress {
return ParsedArgumentSlot::NoMoreArguments;
} else {
argument
.span_mut()
.extend_to(self.last_consumed_position_or_start());
}
}
state.parsed_argument_slot_count += 1;
state.last_slot_missing_separator = !self.is_at_call_argument_boundary();
ParsedArgumentSlot::Argument(Some(argument))
}
/// Reports and recovers from a missing call-argument separator.
///
/// Returns whether argument-list parsing can continue at
/// the recovered position.
#[must_use]
fn recover_after_missing_function_call_argument_separator(
&mut self,
callee_end_position: TokenPosition,
left_parenthesis_position: TokenPosition,
previous_argument_span: Option<lexer::TokenSpan>,
) -> bool {
if self.next_token_definitely_cannot_start_expression() {
let unexpected_token_position = self.peek_position_or_eof();
let mut error = self
.make_error_at(
ParseErrorKind::FunctionCallUnexpectedTokenInArgumentList,
unexpected_token_position,
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_until(self, SyncLevel::ListSeparator)
.blame_token(unexpected_token_position)
.related_token("callee", callee_end_position)
.related_token("left_parenthesis", left_parenthesis_position);
if let Some(previous_argument_span) = previous_argument_span {
error = error.related("argument", previous_argument_span);
}
error.report(self);
self.is_at_call_argument_boundary()
} else {
let next_argument_position = self.peek_position_or_eof();
let mut error = self
.make_error_at(
ParseErrorKind::FunctionCallArgumentMissingComma,
next_argument_position,
)
.blame_token(next_argument_position)
.related_token("callee", callee_end_position)
.related_token("left_parenthesis", left_parenthesis_position);
debug_assert!(previous_argument_span.is_some());
if let Some(previous_argument_span) = previous_argument_span {
error = error.related("previous_argument", previous_argument_span);
}
error.report(self);
true
}
}
/// Returns whether the current token is a call-argument boundary.
#[must_use]
fn is_at_call_argument_boundary(&mut self) -> bool {
matches!(
self.peek_token(),
None | Some(Token::Comma | Token::RightParenthesis)
)
}
}

View File

@ -0,0 +1,540 @@
//! Parsing for `switch (...) { ... }` expressions in Fermented UnrealScript.
//!
//! Dispatch into this module happens only after `primary.rs` has committed to
//! keyword-led `switch` syntax. That commitment is purely syntactic: `switch`
//! followed by `(` is treated as a switch expression; otherwise `switch` may
//! still be parsed as an identifier-like primary.
//!
//! This module owns parsing of the selector, body braces, `case` labels,
//! `default`, duplicate-default diagnostics, cases-after-default diagnostics,
//! and recovery for invalid top-level switch items.
use crate::arena::ArenaVec;
use crate::ast::{self, ExpressionRef, StatementList, StatementRef, SwitchCaseRef};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{
ParseError, ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel,
};
/*
Parser structure:
switch body = everything between `{` and `}`
switch section = one labeled part: `case ...:` body or `default:` body
case labels = stacked `case <expr>:` labels
section body = statements until `case`, `default`, or `}`
preamble = invalid statements before the first section label
parse_switch_tail
parse_switch_header_tail
parse_switch_sections_tail
parse_case_section_into_state
parse_case_labels
expect_case_label_colon
parse_switch_section_body
parse_default_section_into_state
parse_switch_section_body
parse_invalid_switch_preamble
parse_switch_section_body
*/
#[derive(Debug)]
struct SwitchParseState<'src, 'arena> {
switch_keyword_position: TokenPosition,
left_brace_position: TokenPosition,
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
default_arm: Option<StatementList<'src, 'arena>>,
// Retained until the full switch is parsed so diagnostics can report all
// duplicate `default`s and all `case`s that follow the first `default`.
default_keyword_positions: Vec<TokenPosition>,
case_keyword_positions_after_default: Vec<TokenPosition>,
span: TokenSpan,
}
impl<'src, 'arena> SwitchParseState<'src, 'arena> {
#[must_use]
fn new(
switch_keyword_position: TokenPosition,
left_brace_position: TokenPosition,
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
) -> Self {
Self {
switch_keyword_position,
left_brace_position,
selector,
cases,
default_arm: None,
default_keyword_positions: Vec::new(),
case_keyword_positions_after_default: Vec::new(),
span: TokenSpan::new(switch_keyword_position),
}
}
#[must_use]
fn has_default(&self) -> bool {
!self.default_keyword_positions.is_empty()
}
#[must_use]
fn first_default_keyword_position(&self) -> Option<TokenPosition> {
self.default_keyword_positions.first().copied()
}
#[must_use]
fn diagnostic_context(&self) -> SwitchDiagnosticContext {
SwitchDiagnosticContext {
switch_keyword_position: self.switch_keyword_position,
selector_span: *self.selector.span(),
left_brace_position: self.left_brace_position,
}
}
}
/// Carries source locations reused by switch diagnostics.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
struct SwitchDiagnosticContext {
switch_keyword_position: TokenPosition,
selector_span: TokenSpan,
left_brace_position: TokenPosition,
}
impl SwitchDiagnosticContext {
#[must_use]
fn attach_to_error(self, error: ParseError) -> ParseError {
error
.related_token("switch_keyword", self.switch_keyword_position)
.related("selector", self.selector_span)
.related_token("left_brace", self.left_brace_position)
}
#[must_use]
fn attach_to_result<'src, 'arena, T>(
self,
result: ParseResult<'src, 'arena, T>,
) -> ParseResult<'src, 'arena, T> {
result.map_err(|error| self.attach_to_error(error))
}
}
#[must_use]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum SwitchSectionBodyExit {
AtSectionBoundary,
RecoveredAtSwitchBoundary,
}
#[must_use]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum SwitchSectionsExit {
ClosedByRightBrace,
ClosedByRecovery,
EndOfFile,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a `switch` expression after consuming the `switch` keyword.
///
/// If the switch is closed normally, returns an [`ast::Expression::Switch`]
/// whose span covers the construct from `switch_start_position` through the
/// closing `}`.
///
/// On errors, reports diagnostics and returns a best-effort switch node.
#[must_use]
pub(crate) fn parse_switch_tail(
&mut self,
switch_start_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let mut state = match self.parse_switch_header_tail(switch_start_position) {
Ok(state) => state,
Err(switch_node) => return switch_node,
};
match self.parse_switch_sections_tail(&mut state) {
SwitchSectionsExit::ClosedByRightBrace | SwitchSectionsExit::ClosedByRecovery => {
self.report_delayed_switch_errors(&state);
self.alloc_switch_node_from_state(state)
}
SwitchSectionsExit::EndOfFile => {
let eof_position = self.peek_position_or_eof();
state
.diagnostic_context()
.attach_to_error(
self.make_error_at(ParseErrorKind::SwitchMissingClosingBrace, eof_position)
.sync_error_at_matching_delimiter(self, state.left_brace_position),
)
.report(self);
state.span.extend_to(self.last_consumed_position_or_start());
self.report_delayed_switch_errors(&state);
self.alloc_switch_node_from_state(state)
}
}
}
fn parse_switch_header_tail(
&mut self,
switch_start_position: TokenPosition,
) -> Result<SwitchParseState<'src, 'arena>, ExpressionRef<'src, 'arena>> {
// The caller has already accepted `switch` as expression syntax,
// so selector parsing can rely on normal expression recovery instead of
// revalidating the legacy `switch (` disambiguation.
let selector = self.parse_expression();
let span = TokenSpan::new(switch_start_position).extended(selector.span().end);
let Some(left_brace_position) = self
.expect(Token::LeftBrace, ParseErrorKind::SwitchMissingBody)
.related("selector", *selector.span())
.related_token("switch_keyword", switch_start_position)
.ok_or_report(self)
else {
return Err(self.alloc_switch_node(selector, self.arena.vec(), None, span));
};
Ok(SwitchParseState::new(
switch_start_position,
left_brace_position,
selector,
self.arena.vec(),
))
}
fn parse_switch_sections_tail(
&mut self,
state: &mut SwitchParseState<'src, 'arena>,
) -> SwitchSectionsExit {
while let Some((token, token_position)) = self.peek_token_and_position() {
let body_exit = match token {
Token::RightBrace => {
self.advance(); // '}'
state.span.extend_to(token_position);
return SwitchSectionsExit::ClosedByRightBrace;
}
Token::Keyword(Keyword::Case) => {
self.parse_case_section_into_state(state, token_position)
}
Token::Keyword(Keyword::Default) => {
self.parse_default_section_into_state(state, token_position)
}
// Invalid switch-level items are parsed before being discarded
// so ordinary statement diagnostics still run.
_ => self.parse_invalid_switch_preamble(state.diagnostic_context(), token_position),
};
if body_exit == SwitchSectionBodyExit::RecoveredAtSwitchBoundary {
state.span.extend_to(self.last_consumed_position_or_start());
return SwitchSectionsExit::ClosedByRecovery;
}
// Guard against parser bugs that would otherwise leave
// block parsing stuck on the same token.
self.ensure_forward_progress(token_position);
}
SwitchSectionsExit::EndOfFile
}
/// Parses a `case` section and appends it to `state`.
///
/// A section may have stacked `case <expr>:` labels and contains statements
/// until the next section boundary.
///
/// Returns the boundary that stopped body parsing.
fn parse_case_section_into_state(
&mut self,
state: &mut SwitchParseState<'src, 'arena>,
first_case_position: TokenPosition,
) -> SwitchSectionBodyExit {
if state.has_default() {
state
.case_keyword_positions_after_default
.push(first_case_position);
}
let switch_context = state.diagnostic_context();
let labels = self.parse_case_labels(switch_context);
let mut statements = self.arena.vec();
let body_exit =
self.parse_switch_section_body(switch_context.left_brace_position, &mut statements);
let case_span = compute_switch_case_span(first_case_position, &labels, &statements);
let case_node = self.arena.alloc_node(
ast::SwitchCase {
labels,
body: statements,
},
case_span,
);
state.cases.push(case_node);
body_exit
}
/// Parses a `default:` section into `state`.
///
/// Duplicate `default` sections contribute statements to the first one so
/// recovery preserves their bodies while diagnostics are delayed.
///
/// Returns the boundary that stopped body parsing.
fn parse_default_section_into_state(
&mut self,
state: &mut SwitchParseState<'src, 'arena>,
default_position: TokenPosition,
) -> SwitchSectionBodyExit {
self.advance(); // 'default'
state.default_keyword_positions.push(default_position);
let switch_context = state.diagnostic_context();
let default_arm = state.default_arm.get_or_insert_with(|| self.arena.vec());
switch_context
.attach_to_result(
self.expect(Token::Colon, ParseErrorKind::SwitchDefaultMissingColon)
.widen_error_span_from(default_position)
.related_token("default_keyword", default_position),
)
.sync_error_until(self, SyncLevel::StatementStart)
.report_error(self);
self.parse_switch_section_body(switch_context.left_brace_position, default_arm)
}
/// Parses invalid switch-level items up to the next section boundary.
///
/// Parsed statements are discarded after diagnostics; they are not part of
/// any switch arm.
fn parse_invalid_switch_preamble(
&mut self,
switch_context: SwitchDiagnosticContext,
preamble_start_position: TokenPosition,
) -> SwitchSectionBodyExit
where
'src: 'arena,
{
// Build the statements only to reuse normal statement diagnostics and
// recovery; switch-level items cannot be represented in the switch AST.
let mut discarded_statements = self.arena.vec();
let body_exit = self.parse_switch_section_body(
switch_context.left_brace_position,
&mut discarded_statements,
);
let preamble_span = TokenSpan::range(
preamble_start_position,
self.last_consumed_position_or_start(),
);
let error = switch_context.attach_to_error(
self.make_error_at_last_consumed(ParseErrorKind::SwitchTopLevelItemNotCase)
.widen_error_span_from(preamble_start_position)
.blame(preamble_span),
);
if discarded_statements.len() > 1 {
error.related("multiple_items", preamble_span)
} else {
error.related("single_item", preamble_span)
}
.report(self);
body_exit
}
fn parse_case_labels(
&mut self,
switch_context: SwitchDiagnosticContext,
) -> ArenaVec<'arena, ExpressionRef<'src, 'arena>> {
let mut labels = self.arena.vec();
while let Some((Keyword::Case, case_position)) = self.peek_keyword_and_position() {
self.advance(); // 'case'
let mut case_expression_span = None;
let mut should_expect_colon = true;
if let Some((Token::Colon, colon_position)) = self.peek_token_and_position() {
switch_context
.attach_to_error(
self.make_error_at(
ParseErrorKind::SwitchCaseMissingExpression,
colon_position,
)
.blame_token(colon_position)
.related_token("case_keyword", case_position),
)
.report(self);
} else {
// Recover only to the label delimiter here;
// `expect_case_label_colon` will consume it and avoid
// a duplicate missing-colon diagnostic.
should_expect_colon = !self.next_token_definitely_cannot_start_expression();
let expression = switch_context
.attach_to_result(self.parse_required_expression(
ParseErrorKind::SwitchCaseExpressionInvalidStart,
case_position,
))
.related_token("case_keyword", case_position)
.sync_error_at(self, SyncLevel::ColonDelimiter)
.unwrap_or_fallback(self);
case_expression_span = Some(*expression.span());
labels.push(expression);
}
// Expression recovery may still leave a valid colon to consume
if self.peek_token() == Some(Token::Colon) {
should_expect_colon = true;
}
if should_expect_colon {
self.expect_case_label_colon(switch_context, case_position, case_expression_span);
}
}
labels
}
fn expect_case_label_colon(
&mut self,
switch_context: SwitchDiagnosticContext,
case_position: TokenPosition,
case_expression_span: Option<TokenSpan>,
) {
// If the colon is missing, skip to a statement-or-stronger boundary so
// the damaged label is not parsed as arm body.
let missing_colon_error = self
.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
.widen_error_span_from(case_position)
.related_token("case_keyword", case_position);
let missing_colon_error = if let Some(case_expression_span) = case_expression_span {
missing_colon_error.related("case_expression", case_expression_span)
} else {
missing_colon_error
};
switch_context
.attach_to_result(missing_colon_error)
.sync_error_until(self, SyncLevel::StatementStart)
.report_error(self);
}
/// Parses the statements belonging to the current switch section.
///
/// Returns [`SwitchSectionBodyExit::AtSectionBoundary`] when parsing stops
/// at `case`, `default`, `}`, or end-of-file. Returns
/// [`SwitchSectionBodyExit::RecoveredAtSwitchBoundary`] when statement
/// recovery has to synchronize to the switch's closing delimiter.
fn parse_switch_section_body(
&mut self,
left_brace_position: TokenPosition,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
) -> SwitchSectionBodyExit {
while let Some((token, token_position)) = self.peek_token_and_position() {
match token {
Token::Keyword(Keyword::Case | Keyword::Default) | Token::RightBrace => {
return SwitchSectionBodyExit::AtSectionBoundary;
}
// Boundaries outside this switch are left to block-item parsing
// so it can attach the more specific item-level diagnostic.
_ => match self.parse_and_append_next_block_item(statements) {
Ok(()) => {
// Guard against parser bugs that would otherwise leave
// switch parsing stuck on the same token.
self.ensure_forward_progress(token_position);
}
Err(error) => {
// Item recovery could not find a local boundary, so
// recover at the switch's matching closing brace.
let error =
error.sync_error_at_matching_delimiter(self, left_brace_position);
let error_statement = error.fallback(self);
statements.push(error_statement);
return SwitchSectionBodyExit::RecoveredAtSwitchBoundary;
}
},
}
}
SwitchSectionBodyExit::AtSectionBoundary
}
fn report_delayed_switch_errors(&mut self, state: &SwitchParseState<'src, 'arena>) {
self.report_duplicate_switch_defaults(state);
self.report_switch_cases_after_default(state);
}
fn report_duplicate_switch_defaults(&mut self, state: &SwitchParseState<'src, 'arena>) {
let Some((first_default_position, duplicate_positions)) =
state.default_keyword_positions.split_first()
else {
return;
};
let Some(first_duplicate_position) = duplicate_positions.first().copied() else {
return;
};
let mut error = state.diagnostic_context().attach_to_error(
self.make_error_at(
ParseErrorKind::SwitchDuplicateDefault,
first_duplicate_position,
)
.related_token("first_default", *first_default_position),
);
for (index, duplicate_position) in duplicate_positions.iter().copied().enumerate() {
error = error.related_token(
format!("duplicate_default_{}", index + 1),
duplicate_position,
);
}
error.report(self);
}
fn report_switch_cases_after_default(&mut self, state: &SwitchParseState<'src, 'arena>) {
let Some(first_default_position) = state.first_default_keyword_position() else {
return;
};
let Some(first_case_position) = state.case_keyword_positions_after_default.first().copied()
else {
return;
};
let mut error = state.diagnostic_context().attach_to_error(
self.make_error_at(ParseErrorKind::SwitchCasesAfterDefault, first_case_position)
.related_token("first_default", first_default_position),
);
for (index, case_position) in state
.case_keyword_positions_after_default
.iter()
.copied()
.enumerate()
{
error = error.related_token(format!("case_after_default_{}", index + 1), case_position);
}
error.report(self);
}
#[must_use]
fn alloc_switch_node(
&self,
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
default_arm: Option<StatementList<'src, 'arena>>,
span: TokenSpan,
) -> ExpressionRef<'src, 'arena> {
self.arena.alloc_node(
ast::Expression::Switch {
selector,
cases,
default_arm,
},
span,
)
}
#[must_use]
fn alloc_switch_node_from_state(
&self,
state: SwitchParseState<'src, 'arena>,
) -> ExpressionRef<'src, 'arena> {
self.alloc_switch_node(state.selector, state.cases, state.default_arm, state.span)
}
}
/// Computes the span of a `case` section.
///
/// The span starts at the first `case` label and extends through the section
/// body, or through the last label for an empty section.
#[must_use]
fn compute_switch_case_span(
first_case_position: TokenPosition,
labels: &[ExpressionRef],
statements: &[StatementRef],
) -> TokenSpan {
let mut span = TokenSpan::new(first_case_position);
if let Some(last_statement) = statements.last() {
span.extend_to(last_statement.span().end);
} else if let Some(last_label) = labels.last() {
span.extend_to(last_label.span().end);
}
span
}

View File

@ -1,99 +0,0 @@
use crate::ast::{AstSpan, Expression};
use crate::lexer::{Token, TokenLocation};
use crate::parser::ParseErrorKind;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parse the continuation of a `return` after its keyword was consumed.
///
/// Doesn't consume the terminating `;`.
/// If the next token is not `;`, parses an expression as the optional
/// value. Produces an [`Expression::Return`] whose span runs from
/// the `return` keyword to the end of the value if present, otherwise to
/// the `return` keyword.
#[must_use]
pub(crate) fn parse_return_cont(
&mut self,
return_start_location: TokenLocation,
) -> crate::ast::ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
let value = self.parse_expression();
let span = AstSpan {
from: return_start_location,
to: value.span().to,
};
(Some(value), span)
} else {
(
None,
AstSpan {
from: return_start_location,
to: return_start_location,
},
)
};
self.arena.alloc(Expression::Return(value), span)
}
/// Parse the continuation of a `break` after its keyword was consumed.
///
/// Doesn't consume the terminating `;`.
/// If the next token is not `;`, parses an optional value expression.
/// Produces an [`Expression::Break`] spanning from `break` to the end
/// of the value if present, otherwise to the `break` keyword.
#[must_use]
pub(crate) fn parse_break_cont(
&mut self,
break_start_location: TokenLocation,
) -> crate::ast::ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
let value = self.parse_expression();
let span = AstSpan {
from: break_start_location,
to: value.span().to,
};
(Some(value), span)
} else {
(
None,
AstSpan {
from: break_start_location,
to: break_start_location,
},
)
};
self.arena.alloc(Expression::Break(value), span)
}
/// Parses a `goto` expression after `goto`, assuming that the `goto` token
/// was consumed.
///
/// Requires the next token to be an identifier label.
/// On missing token, returns [`ParseErrorKind::UnexpectedEndOfFile`].
/// On a non-identifier next token,
/// returns [`ParseErrorKind::GotoMissingLabel`].
/// On success, produces an [`Expression::Goto`] spanning from `goto`
/// to the label token.
#[must_use]
pub(crate) fn parse_goto_cont(
&mut self,
goto_start_location: TokenLocation,
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
let Some((token, text, token_location)) = self.peek_token_lexeme_and_location() else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
if token == Token::Identifier {
let span = AstSpan {
from: goto_start_location,
to: token_location,
};
self.advance();
Ok(self
.arena
.alloc(Expression::Goto(self.arena.string(text)), span))
} else {
Err(self.make_error_here(ParseErrorKind::GotoMissingLabel))
}
}
}

View File

@ -0,0 +1,297 @@
//! Parsing of callable definitions for Fermented `UnrealScript`
//! (functions, events, delegates, operators).
use crate::arena::ArenaVec;
use crate::ast::{
CallableDefinition, CallableDefinitionRef, CallableKind, CallableModifier,
CallableModifierKind, CallableName, IdentifierToken, InfixOperator, InfixOperatorName,
ParameterRef, PostfixOperator, PostfixOperatorName, PrefixOperator, PrefixOperatorName,
TypeSpecifierRef,
};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{
ParseError, ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel,
recovery::RecoveryFallback,
};
/// Temporary parsed representation of a callable header without its body.
#[derive(Debug)]
pub(super) struct ParsedCallableHeader<'src, 'arena> {
pub start_position: TokenPosition,
pub modifiers: crate::arena::ArenaVec<'arena, CallableModifier>,
pub kind: CallableKind,
pub return_type_specifier: Option<TypeSpecifierRef<'src, 'arena>>,
pub name: CallableName,
pub parameters: crate::arena::ArenaVec<'arena, ParameterRef<'src, 'arena>>,
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for ParsedCallableHeader<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
let fallback_position = error.covered_span.start;
ParsedCallableHeader {
start_position: fallback_position,
modifiers: parser.arena.vec(),
kind: CallableKind::Function,
return_type_specifier: None,
name: CallableName::Identifier(IdentifierToken(fallback_position)),
parameters: parser.arena.vec(),
}
}
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a callable definition.
///
/// Assumes [`Parser::is_callable_header_ahead`] has already confirmed that
/// a callable declaration begins at the current position. This affects
/// the diagnostics produced for malformed input.
#[must_use]
pub(crate) fn parse_callable_definition(&mut self) -> CallableDefinitionRef<'src, 'arena> {
let header = self.parse_callable_header().unwrap_or_fallback(self);
let body = if self.eat(Token::LeftBrace) {
Some(self.parse_braced_block_statements_tail(self.last_consumed_position_or_start()))
} else {
self.expect(
Token::Semicolon,
ParseErrorKind::CallableMissingBodyOrSemicolon,
)
.report_error(self);
None
};
let span = TokenSpan::range(
header.start_position,
self.last_consumed_position_or_start(),
);
self.arena.alloc_node(
CallableDefinition {
name: header.name,
kind: header.kind,
return_type_specifier: header.return_type_specifier,
modifiers: header.modifiers,
parameters: header.parameters,
body,
},
span,
)
}
/// Parses a callable header without the body.
fn parse_callable_header(
&mut self,
) -> ParseResult<'src, 'arena, ParsedCallableHeader<'src, 'arena>> {
let start_position = self.require_position(ParseErrorKind::CallableExpectedHeader)?;
let mut modifiers = self.arena.vec();
self.collect_callable_modifiers(&mut modifiers);
let kind = self.parse_callable_kind()?;
self.collect_callable_modifiers(&mut modifiers);
// `(` cannot appear inside a return type in this grammar,
// so seeing it here means the callable has no return type specifier.
let return_type_specifier = match self.peek_token_at(1) {
Some(Token::LeftParenthesis) => None,
_ => Some(self.parse_type_specifier()?),
};
let name = self.parse_callable_name(kind)?;
self.expect(
Token::LeftParenthesis,
ParseErrorKind::CallableParamsMissingOpeningParenthesis,
)
.report_error(self);
let parameters = self.parse_parameter_list();
self.expect(
Token::RightParenthesis,
ParseErrorKind::CallableParamsMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
Ok(ParsedCallableHeader {
start_position,
modifiers,
kind,
return_type_specifier,
name,
parameters,
})
}
fn parse_callable_kind(&mut self) -> ParseResult<'src, 'arena, CallableKind> {
if let Some(keyword) = self.peek_keyword() {
// Handle this separately because only infix operators can carry
// an optional precedence and cannot, therefore, be handled by
// a simple converter.
if keyword == Keyword::Operator {
self.advance();
let precedence = self.parse_optional_parenthesized_integer(
ParseErrorKind::CallableOperatorInvalidPrecedence,
);
return Ok(CallableKind::InfixOperator(precedence));
}
if let Ok(kind) = CallableKind::try_from(keyword) {
self.advance();
return Ok(kind);
}
}
Err(self.make_error_at_last_consumed(ParseErrorKind::CallableExpectedKind))
}
fn parse_callable_name(
&mut self,
kind: CallableKind,
) -> ParseResult<'src, 'arena, CallableName> {
match kind {
CallableKind::Function | CallableKind::Event | CallableKind::Delegate => self
.parse_identifier(ParseErrorKind::CallableNameNotIdentifier)
.map(CallableName::Identifier),
CallableKind::PrefixOperator => {
let (token, operator_position) = self.require_token_and_position(
ParseErrorKind::CallablePrefixOperatorInvalidSymbol,
)?;
let operator = PrefixOperator::try_from(token).map_err(|()| {
self.make_error_at_last_consumed(ParseErrorKind::CallablePrefixOperatorInvalidSymbol)
})?;
self.advance();
Ok(CallableName::PrefixOperator(PrefixOperatorName {
kind: operator,
position: operator_position,
}))
}
CallableKind::InfixOperator(_) => {
let (token, operator_position) = self.require_token_and_position(
ParseErrorKind::CallableInfixOperatorInvalidSymbol,
)?;
let operator = InfixOperator::try_from(token).map_err(|()| {
self.make_error_at_last_consumed(ParseErrorKind::CallableInfixOperatorInvalidSymbol)
})?;
self.advance();
Ok(CallableName::InfixOperator(InfixOperatorName {
kind: operator,
position: operator_position,
}))
}
CallableKind::PostfixOperator => {
let (token, operator_position) = self.require_token_and_position(
ParseErrorKind::CallablePostfixOperatorInvalidSymbol,
)?;
let operator = PostfixOperator::try_from(token).map_err(|()| {
self.make_error_at_last_consumed(ParseErrorKind::CallablePostfixOperatorInvalidSymbol)
})?;
self.advance();
Ok(CallableName::PostfixOperator(PostfixOperatorName {
kind: operator,
position: operator_position,
}))
}
}
}
/// Parses an uninterrupted sequence of function modifiers into
/// given vector.
pub(crate) fn collect_callable_modifiers(
&mut self,
modifiers: &mut ArenaVec<'arena, CallableModifier>,
) {
while let Some(next_mod) = self.parse_function_modifier() {
modifiers.push(next_mod);
}
}
fn parse_function_modifier(&mut self) -> Option<CallableModifier> {
let (keyword, start) = self.peek_keyword_and_position()?;
let kind = match keyword {
Keyword::Native => {
self.advance();
let native_id = self.parse_optional_parenthesized_integer(
ParseErrorKind::NativeModifierIdNotIntegerLiteral,
);
CallableModifierKind::Native(native_id)
}
Keyword::Config => {
self.advance();
let ident = self
.parse_required_parenthesized_identifier(
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
)
.unwrap_or(IdentifierToken(start));
CallableModifierKind::Config(ident)
}
_ => {
let simple = CallableModifierKind::try_from(keyword).ok()?;
// Only advance after confirming it is the modifier
self.advance();
simple
}
};
let span = TokenSpan::range(start, self.last_consumed_position_or_start());
Some(CallableModifier { kind, span })
}
fn parse_optional_parenthesized_integer(&mut self, close_err: ParseErrorKind) -> Option<u128> {
if !self.eat(Token::LeftParenthesis) {
return None;
}
let value = match self.peek_token_and_lexeme() {
Some((Token::IntegerLiteral, lex)) => {
self.advance();
self.decode_integer_literal(lex).ok_or_report(self)
}
Some(_) => {
self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral);
self.advance();
None
}
None => {
self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral);
None
}
};
self.expect(Token::RightParenthesis, close_err)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
value
}
fn parse_required_parenthesized_identifier(
&mut self,
close_err: ParseErrorKind,
ident_err: ParseErrorKind,
) -> Option<IdentifierToken> {
if !self.eat(Token::LeftParenthesis) {
self.report_error_here(ident_err);
return None;
}
let ident = match self.peek_token_lexeme_and_position() {
Some((tok, _, pos)) if tok.is_valid_identifier_name() => {
self.advance();
Some(IdentifierToken(pos))
}
Some(_) => {
self.report_error_here(ident_err);
self.advance();
None
}
None => {
self.report_error_here(ident_err);
None
}
};
self.expect(Token::RightParenthesis, close_err)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
ident
}
}

View File

@ -0,0 +1,50 @@
//! Lookahead for callable headers in Fermented `UnrealScript`.
use crate::lexer::{Keyword, Token};
use crate::parser::Parser;
impl Parser<'_, '_> {
/// Returns whether the upcoming tokens have the syntactic shape of
/// a callable header.
///
/// Returns `true` when the following tokens consist of zero or more
/// callable modifiers followed by a keyword that defines a callable kind.
///
/// Does not check whether any parenthesized arguments are valid.
#[must_use]
pub(crate) fn is_callable_header_ahead(&mut self) -> bool {
let mut lookahead_offset = 0;
while let Some(keyword) = self.peek_keyword_at(lookahead_offset) {
if keyword.is_callable_kind_keyword() {
return true;
}
if let Some(token_width) = self.callable_modifier_width_at(keyword, lookahead_offset) {
lookahead_offset += token_width;
} else {
break;
}
}
false
}
fn callable_modifier_width_at(
&mut self,
keyword: Keyword,
lookahead_token_offset: usize,
) -> Option<usize> {
if !keyword.is_callable_modifier() {
return None;
}
if matches!(keyword, Keyword::Native | Keyword::Config)
&& self.peek_token_at(lookahead_token_offset + 1) == Some(Token::LeftParenthesis)
&& self.peek_token_at(lookahead_token_offset + 3) == Some(Token::RightParenthesis)
{
// `native(...)` and `config(...)` consume a parenthesized specifier
// in modifier position, so lookahead must skip the whole modifier.
Some(4)
} else {
Some(1)
}
}
}

View File

@ -0,0 +1,3 @@
mod definition;
mod lookahead;
mod params;

View File

@ -0,0 +1,107 @@
use crate::arena::ArenaVec;
use crate::ast::{Parameter, ParameterModifier, ParameterModifierKind, ParameterRef};
use crate::lexer::{Keyword, Token, TokenSpan};
use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
// allowed to switch to result returning
pub(crate) fn parse_parameter_list(&mut self) -> ArenaVec<'arena, ParameterRef<'src, 'arena>> {
let mut params = self.arena.vec();
if matches!(self.peek_token(), Some(Token::RightParenthesis)) {
return params;
}
loop {
let start_pos = self.last_consumed_position_or_start();
let mut modifiers = self.arena.vec();
while let Some((next_keyword, next_position)) = self.peek_keyword_and_position() {
match next_keyword {
Keyword::Optional => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Optional,
position: next_position,
});
self.advance();
}
Keyword::Out => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Out,
position: next_position,
});
self.advance();
}
Keyword::Coerce => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Coerce,
position: next_position,
});
self.advance();
}
Keyword::Skip => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Skip,
position: next_position,
});
self.advance();
}
_ => break,
}
}
let type_spec = match self.parse_type_specifier() {
Ok(t) => t,
Err(e) => {
self.report_error(e);
self.recover_until(SyncLevel::ListSeparator);
if self.eat(Token::Comma) {
continue;
}
break;
}
};
let name = self
.parse_identifier(ParseErrorKind::ParamMissingIdentifier)
.unwrap_or_fallback(self);
let array_len = match self.parse_array_len_expr() {
Ok(v) => v,
Err(e) => {
self.report_error(e);
self.recover_until(SyncLevel::CloseBracket);
let _ = self.eat(Token::RightBracket);
None
}
};
let default_value = if self.eat(Token::Assign) {
Some(self.parse_expression())
} else {
None
};
let span = TokenSpan::range(start_pos, self.last_consumed_position_or_start());
params.push(self.arena.alloc_node(
Parameter {
modifiers,
type_specifier: type_spec,
name,
array_size: array_len,
default_value,
},
span,
));
if !self.eat(Token::Comma) || matches!(self.peek_token(), Some(Token::RightParenthesis))
{
break;
}
self.ensure_forward_progress(start_pos);
}
params
}
}

View File

@ -1,7 +1,15 @@
mod block;
mod control;
mod flow;
mod pratt;
mod precedence;
mod statements;
mod switch;
//! ## Naming conventions
//!
//! Some naming conventions that might not be obvious:
//!
//! - `*_tail` means the opening token or keyword has already been consumed.
//! Tail parsers build the rest of the construct and usually return a total,
//! recovered result.
//! - `*_into` means the method extends an already parsed value or appends into
//! an existing output container.
mod class;
mod declarations;
pub(super) mod expression;
mod function;
mod statement;

View File

@ -1,406 +0,0 @@
//! Expression parsing for the language front-end.
//!
//! This module implements a Pratt-style parser for the language's expression
//! grammar, supporting:
//!
//! * Primary expressions (literals, identifiers, parenthesized expressions)
//! * Prefix operators
//! * Postfix operators
//! * Infix operators with precedence and associativity
//!
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
//! operators bind. Infix parsing uses the pair of binding powers returned by
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
//! The parser infrastructure supports both left- and right-associative
//! operators, but Fermented UnrealScript currently defines only
//! right-associative ones.
//!
//! ## See also
//!
//! - [`crate::parser::Parser::parse_expression`] - main entry point
//! - [`PrecedenceRank`] - operator binding strengths
//! - [`super::precedence`] - operator precedence definitions
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
use crate::lexer::{Token, TokenLocation};
use crate::parser::{
ParseErrorKind, ParseExpressionResult, ParseResult, ResultRecoveryExt, SyncLevel,
};
pub(crate) use super::precedence::PrecedenceRank;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses an expression.
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
self.parse_expression_with_precedence(PrecedenceRank::LOOSEST)
}
/// Parses an expression with operators of at least `min_precedence_rank`
/// (as tight or tighter).
fn parse_expression_with_precedence(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> ExpressionRef<'src, 'arena> {
// Intentional order: (1) prefix/primary, (2) postfix (tighter than
// any infix), (3) infix. We don't run a second postfix pass;
// `(a+b)!` works because the parenthesized sub-expression had its own
// postfix pass before returning.
let mut left_hand_side = self
.parse_prefix_or_primary()
.sync_error_until(self, SyncLevel::Expression)
.unwrap_or_fallback(self);
// Postfix operators are tighter than any infix ones
left_hand_side = self.parse_postfix_into(left_hand_side);
left_hand_side = self.parse_infix_into(left_hand_side, min_precedence_rank);
left_hand_side
}
/// Parses a prefix or primary expression (Pratt parser's "nud" or
/// null denotation).
///
/// Errors with [`ParseErrorKind::UnexpectedEndOfFile`] if the stream ends
/// before a valid prefix/primary.
fn parse_prefix_or_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
let Some((token, token_location)) = self.peek_token_and_location() else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
if let Ok(operator) = crate::ast::PrefixOperator::try_from(token) {
self.advance();
let right_hand_side = self.parse_expression_with_precedence(PrecedenceRank::TIGHTEST);
Ok(Expression::new_prefix(
self.arena,
token_location,
operator,
right_hand_side,
))
} else {
self.parse_primary()
}
}
/// Parses a primary expression: literals, identifiers, or a parenthesized
/// sub-expression.
///
/// # Errors
///
/// [`ParseErrorKind::ExpressionUnexpectedToken`] if the next token
/// cannot start a primary; [`ParseErrorKind::UnexpectedEndOfFile`]
/// at end of input.
fn parse_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
// For diagnostics, we only advance *after* fully parsing the current
// literal/token.
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
match token {
Token::IntegerLiteral => {
let value = self.parse_integer_literal(token_text)?;
self.advance();
Ok(self
.arena
.alloc_at(Expression::Integer(value), token_location))
}
Token::FloatLiteral => {
let value = self.parse_float_literal(token_text)?;
self.advance();
Ok(self
.arena
.alloc_at(Expression::Float(value), token_location))
}
Token::StringLiteral => {
let value = unescape_string_literal(self.arena, token_text);
self.advance();
Ok(self
.arena
.alloc_at(Expression::String(value), token_location))
}
Token::True => {
self.advance();
Ok(self.arena.alloc_at(Expression::Bool(true), token_location))
}
Token::False => {
self.advance();
Ok(self.arena.alloc_at(Expression::Bool(false), token_location))
}
Token::None => {
self.advance();
Ok(self.arena.alloc_at(Expression::None, token_location))
}
Token::Identifier => {
self.advance();
Ok(self
.arena
.alloc_at(Expression::Identifier(token_text), token_location))
}
Token::LeftParenthesis => {
self.advance();
self.parse_parenthesized_expression_cont(token_location)
}
Token::If => {
self.advance();
Ok(self.parse_if_cont(token_location))
}
Token::While => {
self.advance();
Ok(self.parse_while_cont(token_location))
}
Token::Do => {
self.advance();
self.parse_do_until_cont(token_location)
}
Token::ForEach => {
self.advance();
Ok(self.parse_foreach_cont(token_location))
}
Token::For => {
self.advance();
self.parse_for_cont(token_location)
}
Token::Brace(crate::lexer::BraceKind::Normal) => {
self.advance();
Ok(self.parse_block_cont(token_location))
}
Token::Return => {
self.advance();
Ok(self.parse_return_cont(token_location))
}
Token::Break => {
self.advance();
Ok(self.parse_break_cont(token_location))
}
Token::Continue => {
self.advance();
Ok(self.arena.alloc_at(Expression::Continue, token_location))
}
Token::Goto => {
self.advance();
self.parse_goto_cont(token_location)
}
Token::Switch => {
self.advance();
self.parse_switch_cont(token_location)
}
_ => {
// Unexpected token in expression.
Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken))
}
}
}
/// Parses an expression in parentheses.
///
/// Assumes the `(` was already consumed; its location is
/// `left_parenthesis_location`.
/// On success, allocates a [`Expression::Parentheses`] node with a span
/// covering from `(` to `)`.
///
/// Errors with [`ParseErrorKind::ExpressionMissingClosingParenthesis`] if
/// a closing `)` is missing; the diagnostic is associated with
/// the opening `(` via `left_parenthesis_location`.
fn parse_parenthesized_expression_cont(
&mut self,
left_parenthesis_location: TokenLocation,
) -> ParseExpressionResult<'src, 'arena> {
let inner_expression = self.parse_expression();
let right_parenthesis_location = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ExpressionMissingClosingParenthesis,
)
.widen_error_span_from(left_parenthesis_location)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
Ok(self.arena.alloc_between(
Expression::Parentheses(inner_expression),
left_parenthesis_location,
right_parenthesis_location,
))
}
/// Parses all postfix operators it can, creating a tree with
/// `left_hand_side` as a child.
fn parse_postfix_into(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
) -> ExpressionRef<'src, 'arena> {
// Single peek that yields `(postfix_op, location)` so the postfix loop
// can advance once per operator without extra matching/unwraps.
while let Some((operator, operator_location)) = self.peek_postfix_with_location() {
self.advance();
left_hand_side =
Expression::new_postfix(self.arena, left_hand_side, operator, operator_location);
}
left_hand_side
}
/// Parses infix operators binding at least as tight as
/// `min_precedence_rank`.
///
/// Associativity is encoded by
/// [`super::precedence::infix_precedence_ranks`]: the right-hand
/// side is parsed with `right_precedence_rank`, so `a - b - c` vs
/// `a ^ b ^ c` associate correctly based on the pair
/// `(left_rank, right_rank)`.
///
/// Stops when the next operator is looser than `min_precedence_rank`.
fn parse_infix_into(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
min_precedence_rank: PrecedenceRank,
) -> ExpressionRef<'src, 'arena> {
while let Some((operator, right_precedence_rank)) =
self.peek_infix_at_least(min_precedence_rank)
{
self.advance();
let right_hand_side = self.parse_expression_with_precedence(right_precedence_rank);
left_hand_side =
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
}
left_hand_side
}
/// Parses an integer literal as [`i128`].
///
/// Chosen to cover FerUS's integer range so constant folding
/// remains precise.
///
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
/// not a valid integer.
fn parse_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
text.parse::<i128>()
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
/// Parses a float literal as [`f64`].
///
/// Chosen to cover FerUS's float range so constant folding remains
/// precise.
///
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
/// not a valid float.
fn parse_float_literal(&mut self, text: &str) -> ParseResult<f64> {
if let Ok(parsed_value) = text.parse::<f64>() {
Ok(parsed_value)
} else {
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
}
/// Returns the next postfix operator and its location if present.
///
/// Helper to avoid peeking and mapping twice; used to drive the postfix
/// loop without unwraps.
fn peek_postfix_with_location(
&mut self,
) -> Option<(crate::ast::PostfixOperator, TokenLocation)> {
let Some((token, token_location)) = self.peek_token_and_location() else {
return None;
};
let Ok(operator) = crate::ast::PostfixOperator::try_from(token) else {
return None;
};
Some((operator, token_location))
}
/// If the next token is an infix operator with left binding power at least
/// `min_precedence_rank`, returns its operator and precedence rank.
///
/// Otherwise return [`None`].
fn peek_infix_at_least(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> Option<(crate::ast::InfixOperator, PrecedenceRank)> {
let (left_precedence_rank, operator, right_precedence_rank) = self
.peek_token()
.and_then(super::precedence::infix_precedence_ranks)?;
if left_precedence_rank.is_looser_than(min_precedence_rank) {
return None;
}
Some((operator, right_precedence_rank))
}
/// Parses one item inside a `{ ... }` block.
///
/// The item can be a statement (e.g. a variable declaration) or an
/// expression. If the item is an expression without a following
/// semicolon, it is returned as the block's current tail expression
/// - the value considered to be the block's result. In well-formed
/// code such a tail expression appears only at the very end of the block.
///
/// This method never consumes the closing `}` and is only meant to be
/// called while parsing inside a block.
pub(crate) fn parse_block_item(
&mut self,
statements: &mut crate::arena::ArenaVec<'arena, crate::ast::StatementRef<'src, 'arena>>,
) -> Option<crate::ast::ExpressionRef<'src, 'arena>> {
if let Some(mut next_statement) = self.parse_statement() {
if next_statement.needs_semicolon() {
// For statements we immediately know if lack of
// semicolon is an issue
if let Some(Token::Semicolon) = self.peek_token() {
next_statement.span_mut().to = self.peek_location();
self.advance(); // ';'
} else {
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterStatement);
}
}
statements.push(next_statement);
} else {
let mut next_expression = self.parse_expression();
if let Expression::Error = *next_expression {
self.recover_until(SyncLevel::Statement);
next_expression.span_mut().to = self.peek_location();
}
if let Some((Token::Semicolon, semicolon_location)) = self.peek_token_and_location() {
self.advance(); // ;
let span = crate::ast::AstSpan {
from: next_expression.span().from,
to: semicolon_location,
};
let expression_statement_node = self
.arena
.alloc(crate::ast::Statement::Expression(next_expression), span);
statements.push(expression_statement_node);
} else {
return Some(next_expression);
}
}
None
}
}
/// Unescapes a tokenized string literal into an arena string.
///
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
///
/// Note: this function assumes `raw` is the token text without surrounding
/// quotes.
fn unescape_string_literal<'arena>(
arena: &'arena crate::arena::Arena,
raw: &str,
) -> crate::arena::ArenaString<'arena> {
let mut buffer = String::with_capacity(raw.len());
let mut characters = raw.chars();
while let Some(next_character) = characters.next() {
if next_character == '\\' {
// The lexer never produces a trailing backslash in a string token,
// so there's always a following character to inspect.
if let Some(escaped_character) = characters.next() {
match escaped_character {
'n' => buffer.push('\n'),
't' => buffer.push('\t'),
'"' => buffer.push('"'),
'\\' => buffer.push('\\'),
// Simply leaving escaped character as-is is an expected
// behavior by UnrealScript
other => buffer.push(other),
}
}
} else {
buffer.push(next_character);
}
}
arena.string(&buffer)
}

View File

@ -1,185 +0,0 @@
//! Precedence tables for Fermented UnrealScript operators.
//!
//! These values don't follow the usual *binding power* convention for
//! a Pratt parser, where tighter binding corresponds to a larger number.
//! Here, the smaller the number, the tighter the binding power.
//! For this reason, we use the term *precedence rank* instead.
//!
//! ## Operators sorted by precedence (lowest number = tighter binding)
//!
//! ### Infix operators
//!
//! All infix operators in UnrealScript are
//! [left-associative](https://wiki.beyondunreal.com/Operators).
//!
//! 12: `**`
//! 16: `*`, `/`, `Cross`, `Dot`
//! 18: `%`
//! 20: `+`, `-`
//! 22: `<<`, `>>`, `>>>`
//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom`
//! 26: `!=`
//! 28: `&`, `^`, `|`
//! 30: `&&`, `^^`
//! 32: `||`
//! 34: `*=`, `/=`, `+=`, `-=`
//! 40: `$`, `*`, `@`
//! 44: `$=`, `*=`, `@=`
//! 45: `-=`
//!
//! Some operator, such as `*`, appear twice with different precedence
//! ranks because they were defined with different values for different types
//! in separate script source files (as in the Killing Floor sources).
//! However, UnrealScript uses only the first definition it encounters in
//! `Object.uc`, which corresponds to the lower value.
//!
//! ### Prefix operators
//!
//! `!`, `~`, `-`, `++`, `--`.
//!
//! ### Postfix operators
//!
//! `++`, `--`.
use crate::ast::{InfixOperator, PostfixOperator, PrefixOperator};
use crate::lexer::Token;
/// Compact precedence rank used by the Pratt Parser.
///
/// A smaller number means tighter binding, and a larger number means looser
/// binding. This inverted scale matches how UnrealScript tables were recorded.
#[must_use]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct PrecedenceRank(u8);
impl PrecedenceRank {
/// The loosest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by the maximum [`u8`] value.
pub const LOOSEST: Self = PrecedenceRank(u8::MAX);
/// The tightest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by zero.
pub const TIGHTEST: PrecedenceRank = PrecedenceRank(0);
/// Returns `true` if `other` has a looser binding than `self`.
///
/// # Examples
///
/// ```
/// # use crate::parser::expressions::PrecedenceRank;
/// let a = PrecedenceRank(40);
/// let b = PrecedenceRank(34);
/// assert!(a.is_looser_than(b)); // 40 is looser than 34
///
/// let c = PrecedenceRank(20);
/// let d = PrecedenceRank(24);
/// assert!(!c.is_looser_than(d)); // 20 is tighter than 24
/// ```
pub fn is_looser_than(self, other: Self) -> bool {
self.0 > other.0
}
}
impl TryFrom<Token> for PrefixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PrefixOperator::*;
Ok(match token {
Token::Not => Not,
Token::Minus => Minus,
Token::BitwiseNot => BitwiseNot,
Token::Increment => Increment,
Token::Decrement => Decrement,
_ => return Err(()),
})
}
}
impl TryFrom<Token> for PostfixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PostfixOperator::*;
Ok(match token {
Token::Increment => Increment,
Token::Decrement => Decrement,
_ => return Err(()),
})
}
}
/// Maps a token to its infix operator along with its left and right binding
/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`.
///
/// Returns [`None`] if and only if `token` is not an infix operator.
pub(crate) fn infix_precedence_ranks(
token: Token,
) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> {
use crate::ast::InfixOperator::*;
let (left_precedence_rank, operator) = match token {
// 12: `**`
Token::Exponentiation => (12, Exponentiation),
// 16: `*`, `/`, `Cross`, `Dot` (left-assoc)
Token::Multiply => (16, Multiply),
Token::Divide => (16, Divide),
Token::Cross => (16, Cross),
Token::Dot => (16, Dot),
// 18: `%`
Token::Modulo => (18, Modulo),
// 20: `+`, `-`
Token::Plus => (20, Plus),
Token::Minus => (20, Minus),
// 22: `<<`, `>>`, `>>>`
Token::LeftShift => (22, LeftShift),
Token::RightShift => (22, RightShift),
Token::LogicalRightShift => (22, LogicalRightShift),
// 24: comparison operators
Token::Less => (24, Less),
Token::LessEqual => (24, LessEqual),
Token::Greater => (24, Greater),
Token::GreaterEqual => (24, GreaterEqual),
Token::Equal => (24, Equal),
Token::ApproximatelyEqual => (24, ApproximatelyEqual),
Token::ClockwiseFrom => (24, ClockwiseFrom),
// 26: `!=`
Token::NotEqual => (26, NotEqual),
// 28: bit-wise `&`, `^`, `|`
Token::BitwiseAnd => (28, BitwiseAnd),
Token::BitwiseXor => (28, BitwiseXor),
Token::BitwiseOr => (28, BitwiseOr),
// 30: logical `&&`, `^^`
Token::And => (30, And),
Token::Xor => (30, Xor),
// 32: logical `||`
Token::Or => (32, Or),
// 34: `*=`, `/=`, `+=`, `-=`
Token::MultiplyAssign => (34, MultiplyAssign),
Token::DivideAssign => (34, DivideAssign),
Token::PlusAssign => (34, PlusAssign),
Token::MinusAssign => (34, MinusAssign),
// Simple '=' treated with same precedence
Token::Assign => (34, Assign),
Token::ModuloAssign => (34, ModuloAssign),
// 40: `$`, `@`
Token::Concat => (40, Concat),
Token::ConcatSpace => (40, ConcatSpace),
// 44: `$=`, `@=`
Token::ConcatAssign => (44, ConcatAssign),
Token::ConcatSpaceAssign => (44, ConcatSpaceAssign),
_ => return None,
};
// All operators are left-associative, so `right_precedence_rank` is set to
// `left_binding_rank - 1` (with our "smaller is tighter" scale, this
// enforces left associativity in Pratt parsing).
//
// Since all precedences are even, subtracting one won't actually cross
// any boundary between operator groups.
Some((
PrecedenceRank(left_precedence_rank),
operator,
PrecedenceRank(left_precedence_rank - 1),
))
}

View File

@ -0,0 +1,86 @@
//! Statement parsing for the language front-end.
//!
//! Implements a simple recursive-descent parser for
//! *Fermented `UnrealScript` statements*.
use crate::ast::{Statement, StatementRef};
use crate::lexer::{Keyword, Token, TokenSpan};
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a single statement.
///
/// Does not consume a trailing `;` except for [`Statement::Empty`].
/// The caller handles semicolons (WRONG NOW - WE MUST HANDLE THEM). Returns [`Some`] if a statement is
/// recognized; otherwise [`None`].
/// ALSO WE SPECIFICALLY DONT HANDLE EXPRESSION TYPE STATEMENTS
#[must_use]
pub(crate) fn parse_statement(&mut self) -> Option<StatementRef<'src, 'arena>> {
let Some((token, lexeme, position)) = self.peek_token_lexeme_and_position() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
return None;
};
match token {
// Empty statement
Token::Semicolon => {
self.advance(); // `;`
Some(
self.arena
.alloc_node(Statement::Empty, TokenSpan::new(position)),
)
}
// UnrealScript `local` declaration
Token::Keyword(Keyword::Local) => {
self.advance(); // `local`
let start = position;
let type_spec = self.parse_type_specifier().unwrap_or_fallback(self);
let declarators = self.parse_variable_declarators();
// TODO: parse
let span = TokenSpan::range(start, self.last_consumed_position_or_start());
Some(self.arena.alloc_node(
Statement::LocalVariableDeclaration {
type_spec,
declarators,
},
span,
))
}
// Label: Ident ':' (also tolerate Begin:/End:)
Token::Identifier | Token::Keyword(Keyword::Begin | Keyword::End)
if matches!(self.peek_token_at(1), Some(Token::Colon)) =>
{
self.advance(); // ident/begin/end
self.advance(); // :
Some(self.arena.alloc_node(
Statement::Label(self.arena.string(lexeme)),
TokenSpan::range(position, self.last_consumed_position_or_start()),
))
}
// Nested function/event/operator inside blocks
t if t == Token::Keyword(Keyword::Function)
|| t == Token::Keyword(Keyword::Event)
|| t.is_valid_function_modifier() =>
{
let f = self.parse_callable_definition();
let span = *f.span();
Some(self.arena.alloc_node(Statement::Function(f), span))
}
// C-like variable declaration starting with a TypeSpec
/*token if self.looks_like_variable_declaration_start(token) => Some(
self.parse_variable_declaration_start()
.sync_error_until(self, SyncLevel::Statement)
.unwrap_or_fallback(self),
),*/
// Not a statement
_ => None,
}
}
}

View File

@ -1,185 +0,0 @@
//! Statement parsing for the language front-end.
//!
//! Implements a simple recursive-descent parser for
//! *Fermented UnrealScript statements*.
use crate::ast::{AstSpan, Statement, StatementRef};
use crate::lexer::Token;
use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a single statement.
///
/// Does not consume a trailing `;` except for [`Statement::Empty`].
/// The caller handles semicolons. Returns [`Some`] if a statement is
/// recognized; otherwise [`None`].
#[must_use]
pub(crate) fn parse_statement(&mut self) -> Option<StatementRef<'src, 'arena>> {
let Some((token, lexeme, location)) = self.peek_token_lexeme_and_location() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
return None;
};
match token {
// Empty statement
Token::Semicolon => {
self.advance(); // `;`
Some(self.arena.alloc(Statement::Empty, AstSpan::new(location)))
}
// UnrealScript's standard `local` variable declaration
Token::Local => {
self.advance(); // `local`
Some(
self.parse_local_variable_declaration_cont()
.widen_error_span_from(location)
.sync_error_until(self, SyncLevel::Statement)
.unwrap_or_fallback(self),
)
}
// Label definition
Token::Identifier if matches!(self.peek_token_at(1), Some(Token::Colon)) => {
self.advance(); // `Token::Identifier`
self.advance(); // `:`
Some(self.arena.alloc(
Statement::Label(self.arena.string(lexeme)),
AstSpan::range(location, self.last_visited_location()),
))
}
// C-like variable declaration
token
if token.is_valid_type_name_token()
&& Some(Token::Identifier) == self.peek_token_at(1) =>
{
self.advance(); // `TYPE_NAME`
// Next token is guaranteed to exist by the arm condition
Some(self.parse_variable_declaration_cont(lexeme))
}
// Not a statement
_ => None,
}
}
/// Parses a local variable declaration after `local` has been consumed.
///
/// Requires the next token to be a type name. Initializers are not allowed.
/// Reports and recovers from errors; the identifier list may be empty if
/// recovery fails.
fn parse_local_variable_declaration_cont(
&mut self,
) -> crate::parser::ParseResult<'src, 'arena, StatementRef<'src, 'arena>> {
let Some((type_token, type_name)) = self.peek_token_and_lexeme() else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
if !type_token.is_valid_type_name_token() {
return Err(self.make_error_here(ParseErrorKind::LocalInvalidTypeName));
}
let declaration_start_location = self.last_visited_location();
self.advance(); // `TYPE_NAME`
let type_name = self.arena.string(type_name);
let identifiers = self.parse_local_identifier_list();
if identifiers.is_empty() {
self.make_error_here(ParseErrorKind::LocalMissingIdentifier)
.widen_error_span_from(declaration_start_location)
.report_error(self);
}
Ok(self.arena.alloc(
Statement::LocalVariableDeclaration {
type_name,
identifiers,
},
AstSpan::range(declaration_start_location, self.last_visited_location()),
))
}
/// Parses a comma-separated list of identifiers for a local declaration.
///
/// Best-effort recovery from errors. Returns an empty list if no valid
/// identifiers are found.
fn parse_local_identifier_list(
&mut self,
) -> crate::arena::ArenaVec<'arena, crate::arena::ArenaString<'arena>> {
let mut identifiers = self.arena.vec();
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
if token == Token::Identifier {
identifiers.push(self.arena.string(next_variable_name));
self.advance(); // `Token::Identifier`
} else {
self.report_error_here(ParseErrorKind::LocalBadVariableIdentifier);
// Try to recover to the next variable name
self.recover_until(SyncLevel::ListSeparator);
}
// Disallow initializers in `local`.
if let Some(Token::Assign) = self.peek_token() {
self.report_error_here(ParseErrorKind::LocalInitializerNotAllowed);
self.recover_until(SyncLevel::ListSeparator);
}
// Can the list continue?
// Loop cannot stall: each iteration consumes a token or breaks
if !self.eat(Token::Comma) {
break;
}
}
// End-of-file branch
identifiers
}
/// Parses a non-local variable declaration after the type name token
/// has been consumed.
///
/// The caller must guarantee that at least one declarator follows.
/// Optional initializers are allowed.
fn parse_variable_declaration_cont(
&mut self,
type_name: &'src str,
) -> StatementRef<'src, 'arena> {
let declaration_start_location = self.last_visited_location();
let type_name = self.arena.string(type_name);
let declarations = self.parse_variable_declaration_list();
// An identifier required by method's condition
debug_assert!(!declarations.is_empty());
self.arena.alloc(
Statement::VariableDeclaration {
type_name,
declarations,
},
AstSpan::range(declaration_start_location, self.last_visited_location()),
)
}
/// Parses a comma-separated list of declarators with optional `=`
/// initializers.
///
/// Best-effort recovery on errors.
/// The caller should invoke this when the next token starts a declarator.
fn parse_variable_declaration_list(
&mut self,
) -> crate::arena::ArenaVec<'arena, crate::ast::VariableDeclarator<'src, 'arena>> {
let mut variables = self.arena.vec();
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
if token == Token::Identifier {
self.advance(); // `Token::Identifier`
let name = self.arena.string(next_variable_name);
let initializer = if self.eat(Token::Assign) {
Some(self.parse_expression())
} else {
None
};
variables.push(crate::ast::VariableDeclarator { name, initializer });
} else {
self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier);
// Try to recover to the next variable name
self.recover_until(SyncLevel::ListSeparator);
}
// Can the list continue?
// Loop cannot stall: each iteration consumes a token or breaks
if !self.eat(Token::Comma) {
break;
}
}
// End-of-file branch
variables
}
}

View File

@ -1,227 +0,0 @@
use crate::arena::ArenaVec;
use crate::ast::{AstSpan, ExpressionRef, StatementRef};
use crate::lexer::{Token, TokenLocation};
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a `switch` expression after the `switch` keyword was consumed.
///
/// Arm bodies accept statements and expressions. A last, expression without
/// `;` in the last arm becomes the switch's tail value if none was
/// captured yet.
/// Only one `default` case arm is allowed.
/// Returns a best-effort switch node on premature EOF.
#[must_use]
pub(crate) fn parse_switch_cont(
&mut self,
switch_start_location: TokenLocation,
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
let selector = self.parse_expression();
self.expect(
Token::Brace(crate::lexer::BraceKind::Normal),
ParseErrorKind::SwitchMissingBody,
)
.report_error(self);
let (mut cases, mut default_arm, mut tail) = (self.arena.vec(), None, None);
let mut span = AstSpan::new(switch_start_location);
loop {
let Some((token, token_location)) = self.peek_token_and_location() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
span.extend_to(self.peek_location());
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
};
match token {
Token::RightBrace => {
self.advance(); // '}'
span.extend_to(token_location);
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
}
Token::Case => {
if default_arm.is_some() {
self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault);
}
let case_node = self.parse_switch_case_group(token_location, &mut tail);
cases.push(case_node);
}
Token::Default => {
if default_arm.is_some() {
self.report_error_here(ParseErrorKind::SwitchDuplicateDefault);
}
// We still parse a duplicate default to surface all errors.
// Bodies are effectively fused for error reporting;
// compilation stops anyway, so this trades AST correctness
// for diagnostics.
self.parse_switch_default_arm(
token_location,
default_arm.get_or_insert_with(|| self.arena.vec()),
&mut tail,
);
}
// This can only be triggered before parsing any `case` or
// `default` arms, since they stop either at the start of
// another arm declaration (e.g. at `case`/`default`) or
// at the `}` that ends switch body.
_ => self.parse_switch_preamble_items(&mut tail),
}
// Ensure forward progress under errors to avoid infinite loops.
if self.peek_location() <= token_location {
self.advance();
}
}
}
/// Parses a stacked `case` group and its body:
/// `case <expr>: (case <expr>:)* <arm-body-until-boundary>`.
///
/// Returns the allocated [`crate::ast::CaseRef`] node.
#[must_use]
fn parse_switch_case_group(
&mut self,
first_case_location: TokenLocation,
tail: &mut Option<ExpressionRef<'src, 'arena>>,
) -> crate::ast::CaseRef<'src, 'arena> {
let mut labels = self.arena.vec();
while let Some((Token::Case, case_location)) = self.peek_token_and_location() {
// Guaranteed progress: we entered on `Token::Case`.
self.advance(); // 'case'
labels.push(self.parse_expression());
// Enforce `:` after each case with statement-level recovery.
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
.widen_error_span_from(case_location)
.sync_error_until(self, crate::parser::SyncLevel::Statement)
.report_error(self);
}
let mut body = self.arena.vec();
self.parse_switch_arm_body(&mut body, tail);
let case_span = compute_case_span(first_case_location, &labels, &body);
self.arena
.alloc(crate::ast::SwitchCase { labels, body }, case_span)
}
/// Parses the `default :` arm and its body.
///
/// Does not consume a boundary token after the body.
fn parse_switch_default_arm(
&mut self,
default_location: TokenLocation,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
tail: &mut Option<ExpressionRef<'src, 'arena>>,
) {
self.advance(); // 'default'
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
.widen_error_span_from(default_location)
.sync_error_until(self, crate::parser::SyncLevel::Statement)
.report_error(self);
self.parse_switch_arm_body(statements, tail);
}
/// Parses items of a single switch arm body until a boundary token or EOF.
///
/// Boundary tokens: `case`, `default`, `}`.
fn parse_switch_arm_body(
&mut self,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
tail: &mut Option<ExpressionRef<'src, 'arena>>,
) {
// No need to report end-of-file as it'll be done by
// `parse_switch_cont`.
while let Some((token, token_location)) = self.peek_token_and_location() {
match token {
// Complain about tail instruction if `switch` body
// doesn't end here
Token::Case | Token::Default => {
if let Some(tail_expression) = tail.take() {
self.report_error_here(ParseErrorKind::SwitchBareExpressionBeforeNextArm);
let span = *tail_expression.span();
let stmt = self
.arena
.alloc(crate::ast::Statement::Expression(tail_expression), span);
statements.push(stmt);
}
break;
}
Token::RightBrace => break,
_ => (),
}
// We know that at this point:
// 1. There is still a token and it is not EOF;
// 2. It isn't end of the block.
// So having a tail statement there is a problem!
if let Some(tail_expression) = tail.take() {
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
let tail_span = *tail_expression.span();
let node = self.arena.alloc(
crate::ast::Statement::Expression(tail_expression),
tail_span,
);
statements.push(node);
}
*tail = self.parse_block_item(statements);
// Ensure forward progress under errors to avoid infinite loops.
if self.peek_location() <= token_location {
self.advance();
}
}
}
/// Parses items that were found in code *before* any arm (`case`/`default`)
/// declaration.
///
/// These aren't allowed, but we still want to perform a proper parsing step
/// to report whatever errors we can in case programmer simply forgot to put
/// an arm declaration.
///
/// Boundary tokens: `case`, `default`, `}`.
fn parse_switch_preamble_items(&mut self, tail: &mut Option<ExpressionRef<'src, 'arena>>) {
// Report the spurious token.
self.report_error_here(ParseErrorKind::SwitchTopLevelItemNotCase);
// Discard parsed statements into a sink vector.
// This is a bit "hacky", but I don't want to adapt code to skip
// production of AST nodes just to report errors in
// one problematic case.
let mut sink = self.arena.vec();
self.parse_switch_arm_body(&mut sink, tail);
}
/// Helper to allocate a `Switch` expression with the given span.
#[must_use]
fn alloc_switch_node(
&mut self,
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, crate::ast::CaseRef<'src, 'arena>>,
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
tail: Option<ExpressionRef<'src, 'arena>>,
span: AstSpan,
) -> ExpressionRef<'src, 'arena> {
self.arena.alloc(
crate::ast::Expression::Switch {
selector,
cases,
default_arm,
tail,
},
span,
)
}
}
/// Computes [`AstSpan`] covering all labels and the body.
#[must_use]
fn compute_case_span(
labels_start_location: TokenLocation,
labels: &[ExpressionRef],
body: &[StatementRef],
) -> AstSpan {
let mut span = AstSpan {
from: labels_start_location,
to: labels_start_location,
};
if let Some(last_statement) = body.last() {
span.extend_to(last_statement.span().to);
} else if let Some(last_label) = labels.last() {
span.extend_to(last_label.span().to);
}
span
}

View File

@ -1,8 +1,8 @@
//! Parser for Fermented UnrealScript (FerUS).
//! Parser for Fermented `UnrealScript` (`FerUS`).
//!
//! Consumes tokens from [`crate::lexer::TokenizedFile`] and allocates AST
//! nodes in [`crate::arena::Arena`]. Basic expressions use a Pratt parser;
//! the rest rely on recursive descent in [`crate::parser::grammar`].
//! the rest rely on recursive descent in [`crate::parser::grammar`].\
//! Non-fatal errors accumulate in `Parser::diagnostics` as
//! [`crate::diagnostics::Diagnostic`]; recovery skips to sync points defined by
//! [`crate::parser::recovery::SyncLevel`] and synthesizes error nodes while
@ -27,17 +27,14 @@
use super::lexer;
pub use lexer::{TokenPiece, Tokens};
pub use lexer::{TokenData, Tokens};
mod cursor;
mod errors;
mod grammar;
pub mod pretty;
mod recovery;
mod trivia;
pub use pretty::{ExprTree, StmtTree};
pub use errors::ParseError;
pub(crate) use errors::{ParseErrorKind, ParseResult};
pub(crate) use recovery::{ResultRecoveryExt, SyncLevel};
@ -46,21 +43,34 @@ pub(crate) use trivia::{TriviaKind, TriviaToken};
pub type ParseExpressionResult<'src, 'arena> =
ParseResult<'src, 'arena, crate::ast::ExpressionRef<'src, 'arena>>;
pub(crate) mod diagnostic_labels {
pub(crate) const EXPRESSION_REQUIRED_BY: &str = "expression_required_by";
pub(crate) const EXPRESSION_EXPECTED_AFTER: &str = "expression_expected_after";
}
// TODO: add some kind of bailing for infinite loops
// let remaining_steps = file.token_count().saturating_mul(256).saturating_add(1024);
/// A recursive-descent parser over token from [`crate::lexer::TokenizedFile`].
pub struct Parser<'src, 'arena> {
file: &'src lexer::TokenizedFile<'src>,
arena: &'arena crate::arena::Arena,
pub diagnostics: Vec<crate::diagnostics::Diagnostic>,
cursor: cursor::CursorComponent<'src>,
trivia: trivia::TriviaComponent<'src>,
cursor: cursor::Cursor<'src, 'src>,
trivia: trivia::TriviaIndexBuilder<'src>,
}
impl<'src, 'arena> Parser<'src, 'arena> {
pub fn new(file: &'src lexer::TokenizedFile<'src>, arena: &'arena crate::arena::Arena) -> Self {
Self {
file,
arena,
diagnostics: Vec::new(),
cursor: cursor::CursorComponent::new(file),
trivia: trivia::TriviaComponent::default(),
cursor: cursor::Cursor::new(file),
trivia: trivia::TriviaIndexBuilder::default(),
}
}
pub fn file(&self) -> &'src lexer::TokenizedFile<'src> {
self.file
}
}

View File

@ -1,353 +0,0 @@
use crate::ast::{Expression, Statement, SwitchCase, VariableDeclarator};
use core::fmt;
/// A borrow of either a statement or an expression node,
/// plus helpers to enrich the printed tree.
enum AnyNode<'src, 'a, 'b> {
Stmt(&'b Statement<'src, 'a>),
Expr(&'b Expression<'src, 'a>),
Case(&'b SwitchCase<'src, 'a>),
/// A leaf line with a preformatted label (e.g., variable names).
Text(String),
/// Wraps a child with a tag like "cond", "body", "else", "init".
Tagged(&'static str, Box<AnyNode<'src, 'a, 'b>>),
}
/// Public wrappers to print trees starting from either kind of node.
pub struct StmtTree<'src, 'a, 'b>(pub &'b Statement<'src, 'a>);
pub struct ExprTree<'src, 'a, 'b>(pub &'b Expression<'src, 'a>);
impl<'src, 'a, 'b> fmt::Display for StmtTree<'src, 'a, 'b> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt_node(AnyNode::Stmt(self.0), f, "", true)
}
}
impl<'src, 'a, 'b> fmt::Display for ExprTree<'src, 'a, 'b> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt_node(AnyNode::Expr(self.0), f, "", true)
}
}
fn fmt_node<'src, 'a, 'b>(
node: AnyNode<'src, 'a, 'b>,
f: &mut fmt::Formatter<'_>,
prefix: &str,
is_last: bool,
) -> fmt::Result {
write!(f, "{}{}─ ", prefix, if is_last { "" } else { "" })?;
writeln!(f, "{}", label(&node))?;
let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "" });
let kids = children(node);
let len = kids.len();
for (i, child) in kids.into_iter().enumerate() {
let last = i + 1 == len;
fmt_node(child, f, &new_prefix, last)?;
}
Ok(())
}
/// ----- Labeling -----
fn label<'src, 'a, 'b>(node: &AnyNode<'src, 'a, 'b>) -> String {
match node {
AnyNode::Expr(e) => expr_label(e),
AnyNode::Stmt(s) => stmt_label(s),
AnyNode::Case(c) => case_label(c),
AnyNode::Text(s) => s.clone(),
AnyNode::Tagged(tag, inner) => format!("{tag}: {}", label(inner)),
}
}
fn quote_str(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for ch in s.chars() {
match ch {
'\\' => out.push_str("\\\\"),
'"' => out.push_str("\\\""),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c => out.push(c),
}
}
out.push('"');
out
}
fn expr_label<'src, 'a>(e: &Expression<'src, 'a>) -> String {
match e {
Expression::Binary(_, op, _) => format!("Binary {op}"),
Expression::LeftUnary(op, _) => format!("UnaryL {op}"),
Expression::RightUnary(_, op) => format!("UnaryR {op}"),
Expression::Identifier(s) => format!("Ident {s}"),
Expression::String(s) => {
// Avoid assuming ArenaString exposes &str; go via Display -> String.
format!("String {}", quote_str(&s.to_string()))
}
Expression::Integer(i) => format!("Int {i}"),
Expression::Float(x) => format!("Float {x}"),
Expression::Bool(true) => "Bool true".into(),
Expression::Bool(false) => "Bool false".into(),
Expression::None => "None".into(),
Expression::Parentheses(_) => "Parentheses".into(),
Expression::Block { statements, tail } => {
let n = statements.len() + usize::from(tail.is_some());
let tail_s = if tail.is_some() { " tail" } else { "" };
format!("BlockExpr ({n} items{tail_s})")
}
Expression::If { .. } => "IfExpr".into(),
Expression::While { .. } => "WhileExpr".into(),
Expression::DoUntil { .. } => "DoUntilExpr".into(),
Expression::ForEach { .. } => "ForEachExpr".into(),
Expression::For { .. } => "ForExpr".into(),
Expression::Switch {
cases,
default_arm: default,
..
} => {
let d = if default.is_some() { " yes" } else { " no" };
format!("SwitchExpr cases={} default:{}", cases.len(), d)
}
Expression::Goto(label) => format!("Goto {}", label.to_string()),
Expression::Continue => "Continue".into(),
Expression::Break(Some(_)) => "Break value".into(),
Expression::Break(None) => "Break".into(),
Expression::Return(Some(_)) => "Return value".into(),
Expression::Return(None) => "Return".into(),
Expression::Error => "Error".into(),
}
}
/// ----- Children collection -----
fn children<'src, 'a, 'b>(node: AnyNode<'src, 'a, 'b>) -> Vec<AnyNode<'src, 'a, 'b>> {
match node {
AnyNode::Expr(e) => expr_children(e),
AnyNode::Stmt(s) => stmt_children(s),
AnyNode::Case(c) => case_children(c),
AnyNode::Text(_) => vec![],
AnyNode::Tagged(_, inner) => children(*inner),
}
}
/// Expression children can include statements inside Block/Switch.
fn expr_children<'src, 'a, 'b>(e: &'b Expression<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
match e {
// Purely expression subtrees
Expression::Binary(lhs, _, rhs) => vec![AnyNode::Expr(&*lhs), AnyNode::Expr(&*rhs)],
Expression::LeftUnary(_, expr) => vec![AnyNode::Expr(&*expr)],
Expression::RightUnary(expr, _) => vec![AnyNode::Expr(&*expr)],
Expression::Parentheses(expr) => vec![AnyNode::Expr(&*expr)],
// Structured expression forms
Expression::Block { statements, tail } => {
let mut out: Vec<AnyNode<'src, 'a, 'b>> = statements
.iter()
.map(|s| AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*s))))
.collect();
if let Some(t) = tail.as_ref() {
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
}
out
}
Expression::If {
condition,
body,
else_body,
} => {
let mut out = vec![
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
];
if let Some(e) = else_body {
out.push(AnyNode::Tagged("else", Box::new(AnyNode::Expr(&*e))));
}
out
}
Expression::While { condition, body } => vec![
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
],
Expression::DoUntil { condition, body } => vec![
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
AnyNode::Tagged("until", Box::new(AnyNode::Expr(&*condition))),
],
Expression::ForEach { iterator, body } => vec![
AnyNode::Tagged("iter", Box::new(AnyNode::Expr(&*iterator))),
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
],
Expression::For {
init,
condition,
step,
body,
} => {
let mut out = Vec::with_capacity(4);
if let Some(i) = init {
out.push(AnyNode::Tagged("init", Box::new(AnyNode::Expr(&*i))));
}
if let Some(c) = condition {
out.push(AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*c))));
}
if let Some(s) = step {
out.push(AnyNode::Tagged("step", Box::new(AnyNode::Expr(&*s))));
}
out.push(AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))));
out
}
Expression::Switch {
selector,
cases,
default_arm: default,
tail,
} => {
let mut out: Vec<AnyNode<'src, 'a, 'b>> = vec![AnyNode::Tagged(
"selector",
Box::new(AnyNode::Expr(&*selector)),
)];
for case in cases.iter() {
out.push(AnyNode::Tagged("case", Box::new(AnyNode::Case(&*case))));
}
if let Some(d) = default.as_ref() {
for stmt in d.iter() {
out.push(AnyNode::Tagged("default", Box::new(AnyNode::Stmt(&*stmt))));
}
}
if let Some(t) = tail.as_ref() {
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
}
out
}
// Leaves
Expression::Identifier(_)
| Expression::String(_)
| Expression::Integer(_)
| Expression::Float(_)
| Expression::Bool(_)
| Expression::None
| Expression::Goto(_)
| Expression::Continue
| Expression::Break(None)
| Expression::Return(None)
| Expression::Error => vec![],
// Single optional-child leaves
Expression::Break(Some(v)) => vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))],
Expression::Return(Some(v)) => {
vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))]
}
}
}
fn stmt_label<'src, 'a>(s: &Statement<'src, 'a>) -> String {
use Statement::*;
match s {
Empty => "Empty ;".into(),
Expression(_) => "Expression".into(),
LocalVariableDeclaration {
type_name,
identifiers: variable_names,
} => {
let count = variable_names.len();
let names = variable_names
.iter()
.map(|n| n.to_string())
.collect::<Vec<_>>()
.join(", ");
format!("LocalVarDecl type={type_name} count={count} names=[{names}]")
}
VariableDeclaration {
type_name,
declarations: variable_names,
} => {
let total = variable_names.len();
let inits = variable_names
.iter()
.filter(|v| v.initializer.is_some())
.count();
let names = variable_names
.iter()
.map(|VariableDeclarator { name, .. }| name.to_string())
.collect::<Vec<_>>()
.join(", ");
format!("VarDecl type={type_name} vars={total} inits={inits} names=[{names}]")
}
Label(name) => format!("Label {name}"),
Error => "Error".into(),
}
}
fn stmt_children<'src, 'a, 'b>(s: &'b Statement<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
use Statement::*;
match s {
Empty | Label(_) | Error => vec![],
Expression(expr) => vec![AnyNode::Expr(&*expr)],
LocalVariableDeclaration {
identifiers: variable_names,
..
} => variable_names
.iter()
.map(|n| AnyNode::Text(format!("name: {n}")))
.collect(),
VariableDeclaration {
declarations: variable_names,
..
} => {
let mut out = Vec::new();
for VariableDeclarator {
name,
initializer: initial_value,
} in variable_names.iter()
{
out.push(AnyNode::Text(format!("var: {name}")));
if let Some(init_expr) = initial_value {
out.push(AnyNode::Tagged(
"init",
Box::new(AnyNode::Expr(&*init_expr)),
));
}
}
out
}
}
}
fn case_children<'src, 'a, 'b>(c: &'b SwitchCase<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
let mut out = Vec::new();
for lbl in c.labels.iter() {
out.push(AnyNode::Tagged("label", Box::new(AnyNode::Expr(&*lbl))));
}
for stmt in c.body.iter() {
out.push(AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*stmt))));
}
out
}
fn case_label<'src, 'a>(c: &SwitchCase<'src, 'a>) -> String {
let l = c.labels.len();
let b = c.body.len();
format!("Case labels={l} body_items={b}")
}

View File

@ -4,89 +4,181 @@
//! token is found. The sync target is chosen from [`SyncLevel`] based on
//! the error kind. Methods on [`ParseResult`] let callers widen the error span,
//! synchronize, report, and produce fallback values.
//!
//! General idea is that any method that returns something other than an error
//! can be assumed to have reported it.
use crate::lexer::{Token, TokenLocation};
#![allow(dead_code)]
// TODO: remove dead code
use crate::ast::{CallableKind, IdentifierToken, QualifiedIdentifier};
use crate::diagnostics::diagnostic_from_parse_error;
use crate::lexer::{Token, TokenPosition, TokenSpan};
use crate::parser::{ParseError, ParseResult, Parser};
/// Synchronization groups the parser can stop at during recovery.
///
/// Stronger levels subsume weaker ones. The enum's variant order defines this
/// ordering of strength via [`Ord`]; changing it changes recovery behavior.
/// The variant order defines recovery strength: later variants are treated as
/// "stronger" boundaries, so synchronizing to a weaker level will also stop
/// at any stronger one.
///
/// This enum is intentionally coarse-grained and semantic. It is not meant to
/// encode arbitrary token sets.
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)]
pub(crate) enum SyncLevel {
/// Tokens that appear inside expressions.
pub enum SyncLevel {
/// Tokens that can reasonably continue or restart an expression.
///
/// Includes operators, member access `.`, ternary `? :`, an opening `(`,
/// and identifiers.
Expression,
/// List separator `,`.
/// This is the loosest recovery level.
ExpressionStart,
/// Separator between homogeneous list elements, e.g. `,`.
///
/// Synchronizing here also stops at closing delimiters and stronger
/// structural boundaries.
ListSeparator,
/// Close of a parenthesized subexpression `)`.
CloseParenthesis,
/// Close of an index or list `]`.
/// Closing `>` of an angle-bracket-delimited type/class argument list.
CloseAngleBracket,
/// Closing `]` of an index or bracket-delimited construct.
CloseBracket,
/// Statement boundary or starter.
Statement,
/// Block boundary braces (both `{` and `}`).
/// Closing `)` of a parenthesized/grouped construct.
CloseParenthesis,
ColonDelimiter,
/// A statement boundary or statement starter.
///
/// Includes `;` and keywords that begin standalone statements /
/// statement-like control-flow forms.
StatementStart,
/// Statement terminator `;`.
StatementTerminator,
/// Start of a `switch` arm.
///
/// This is useful because `case` / `default` are stronger boundaries than
/// ordinary statements inside switch parsing.
SwitchSectionBoundary,
/// Start of a declaration-like item.
///
/// Used for recovery in declaration-containing bodies where the next
/// sensible point is "the next member/declaration" rather than merely
/// "some statement".
DeclarationStart,
/// A hard block boundary.
///
/// This is the strongest normal recovery point.
BlockBoundary,
/// Start of a top-level or class-level declaration.
TopDeclaration,
}
impl SyncLevel {
/// Converts [`Token`] to its [`SyncLevel`], if it has one.
fn for_token(token: Token) -> Option<SyncLevel> {
use SyncLevel::*;
use Token::*;
/// Converts a token to its synchronization class, if any.
const fn for_token(token: Token) -> Option<Self> {
use crate::lexer::Keyword;
use SyncLevel::{
BlockBoundary, CloseAngleBracket, CloseBracket, CloseParenthesis, ColonDelimiter,
DeclarationStart, ExpressionStart, ListSeparator, StatementStart, StatementTerminator,
SwitchSectionBoundary,
};
match token {
Exponentiation | Increment | Decrement | Not | BitwiseNot | Dot | Cross | Multiply
| Divide | Modulo | Plus | Minus | ConcatSpace | Concat | LeftShift
| LogicalRightShift | RightShift | Less | LessEqual | Greater | GreaterEqual
| Equal | NotEqual | ApproximatelyEqual | ClockwiseFrom | BitwiseAnd | BitwiseOr
| BitwiseXor | And | Xor | Or | Assign | MultiplyAssign | DivideAssign
| ModuloAssign | PlusAssign | MinusAssign | ConcatAssign | ConcatSpaceAssign
| Period | Question | Colon | LeftParenthesis | Identifier => Some(Expression),
// List / delimiter boundaries
Token::Comma => Some(ListSeparator),
Token::Greater => Some(CloseAngleBracket),
Token::RightParenthesis => Some(CloseParenthesis),
Token::RightBracket => Some(CloseBracket),
Comma => Some(ListSeparator),
// Statement-level boundaries
Token::Keyword(
Keyword::If
| Keyword::Else
| Keyword::Switch
| Keyword::For
| Keyword::ForEach
| Keyword::While
| Keyword::Do
| Keyword::Until
| Keyword::Return
| Keyword::Break
| Keyword::Continue
| Keyword::Local,
) => Some(StatementStart),
RightParenthesis => Some(CloseParenthesis),
RightBracket => Some(CloseBracket),
Token::Colon => Some(ColonDelimiter),
Case | Default | If | Else | Switch | For | ForEach | While | Do | Return | Break
| Continue | Local | Semicolon => Some(Statement),
Token::Semicolon => Some(StatementTerminator),
Brace(_) | RightBrace => Some(BlockBoundary),
// Switch-specific stronger boundary
Token::Keyword(Keyword::Case | Keyword::Default) => Some(SwitchSectionBoundary),
Class | Struct | Enum | State | Function | Event | Delegate | Operator | Var
| Replication | NativeReplication | DefaultProperties | CppText | ExecDirective => {
Some(TopDeclaration)
// Declaration/member starts
Token::Keyword(
Keyword::Class
| Keyword::Struct
| Keyword::Enum
| Keyword::State
| Keyword::Function
| Keyword::Event
| Keyword::Delegate
| Keyword::Operator
| Keyword::Var
| Keyword::Replication
| Keyword::NativeReplication
| Keyword::DefaultProperties
| Keyword::CppText
| Keyword::CppStruct,
)
| Token::ExecDirective => Some(DeclarationStart),
// Hard structural stop
Token::LeftBrace | Token::CppBlock | Token::RightBrace => Some(BlockBoundary),
_ => {
if token.is_definitely_not_expression_start() {
None
} else {
Some(ExpressionStart)
}
}
_ => Option::None,
}
}
fn fallback_sync_level_for_delimiter_start(token: Option<Token>) -> SyncLevel {
match token {
Some(Token::LeftParenthesis) => SyncLevel::CloseParenthesis,
Some(Token::LeftBracket) => SyncLevel::CloseBracket,
Some(Token::LeftBrace) => SyncLevel::BlockBoundary,
_ => SyncLevel::CloseParenthesis,
}
}
fn is_opening_delimiter(token: Token) -> bool {
matches!(
token,
Token::LeftParenthesis | Token::LeftBracket | Token::LeftBrace
)
}
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Converts a parse error into a diagnostic and queues it.
///
/// Placeholder implementation.
fn handle_error(&mut self, error: ParseError) {
let diagnostic = crate::diagnostics::DiagnosticBuilder::error(format!(
"error {:?} while parsing",
error.kind
))
.primary_label(error.source_span, "happened here")
.build();
self.diagnostics.push(diagnostic);
pub fn report_error(&mut self, error: ParseError) {
//self.diagnostics.push(Diagnostic::from(error));
self.diagnostics
.push(diagnostic_from_parse_error(error, self.file()));
}
/// Reports a parser error with [`crate::parser::ParseErrorKind`] at
/// the current location and queues an appropriate diagnostic.
pub fn report_error_here(&mut self, error_kind: crate::parser::ParseErrorKind) {
let new_error = self.make_error_here(error_kind);
self.handle_error(new_error);
let new_error = self.make_error_at_last_consumed(error_kind);
self.report_error(new_error);
}
/// Skips tokens until a token with `min_sync` level or stronger is found.
@ -99,27 +191,163 @@ impl<'src, 'arena> Parser<'src, 'arena> {
{
break;
}
// Always advances when `peek_token()` is `Some(...)`,
// so the loop cannot be infinite.
self.advance();
}
}
/// Skips tokens until a token with exactly `level` is found, then consumes
/// that token.
///
/// This mirrors the behavior used by [`ResultRecoveryExt::sync_error_at`]:
/// stronger sync tokens can stop [`Self::recover_until`], but they are not
/// consumed unless they are exactly the requested level.
fn recover_at_sync_level(&mut self, level: SyncLevel) {
self.recover_until(level);
if self
.peek_token()
.and_then(SyncLevel::for_token)
.is_some_and(|next_level| next_level == level)
{
self.advance();
}
}
/// Recovers up to the lexer-produced matching delimiter for
/// `delimiter_start`, if possible, but does not consume it.
///
/// If `delimiter_start` is not an opening delimiter, if no match is known,
/// or if the parser has already moved past the matching delimiter, this
/// falls back to ordinary sync-level recovery inferred from
/// `delimiter_start`.
pub(crate) fn recover_until_matching_delimiter_or_sync(
&mut self,
delimiter_start: TokenPosition,
) {
let start_token = self.file().token_at(delimiter_start).map(|data| data.token);
let fallback_level = SyncLevel::fallback_sync_level_for_delimiter_start(start_token);
let Some(start_token) = start_token else {
self.recover_until(fallback_level);
return;
};
if !SyncLevel::is_opening_delimiter(start_token) {
self.recover_until(fallback_level);
return;
}
let Some(target) = self.file().matching_delimiter(delimiter_start) else {
self.recover_until(fallback_level);
return;
};
if self.peek_position_or_eof() > target {
self.recover_until(fallback_level);
return;
}
while let Some(position) = self.peek_position() {
if position >= target {
break;
}
self.advance();
}
}
/// Recovers by using the lexer-produced matching delimiter for
/// `delimiter_start`, if possible.
///
/// If `delimiter_start` is not an opening delimiter, if no match is known,
/// or if the parser has already moved past the matching delimiter, this
/// falls back to ordinary sync-level recovery inferred from
/// `delimiter_start`.
pub(crate) fn recover_at_matching_delimiter_or_sync(&mut self, delimiter_start: TokenPosition) {
let start_token = self.file().token_at(delimiter_start).map(|data| data.token);
let fallback_level = SyncLevel::fallback_sync_level_for_delimiter_start(start_token);
let Some(start_token) = start_token else {
self.recover_at_sync_level(fallback_level);
return;
};
if !SyncLevel::is_opening_delimiter(start_token) {
self.recover_at_sync_level(fallback_level);
return;
}
let Some(target) = self.file().matching_delimiter(delimiter_start) else {
self.recover_at_sync_level(fallback_level);
return;
};
if self.peek_position_or_eof() > target {
self.recover_at_sync_level(fallback_level);
return;
}
while let Some(position) = self.peek_position() {
if position >= target {
break;
}
self.advance();
}
if self.peek_position_or_eof() == target {
self.advance();
} else {
self.recover_at_sync_level(fallback_level);
}
}
/// Reports `error` and returns the recovery fallback for `T`.
///
/// This is the primitive used when parsing must keep going with a
/// best-effort placeholder value of the expected type.
#[must_use]
pub(crate) fn report_error_with_fallback<T>(&mut self, error: ParseError) -> T
where
T: RecoveryFallback<'src, 'arena>,
{
let fallback = T::fallback_value(self, &error);
self.report_error(error);
fallback
}
}
/// Supplies a fallback value after a parse error so parsing can continue and
/// reveal further errors.
pub(crate) trait RecoveryFallback<'src, 'arena>: Sized {
pub trait RecoveryFallback<'src, 'arena>: Sized {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self;
}
/// Extends [`ParseResult`] with recovery-related methods for
/// fluent error handling.
pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized {
/// !!!! Can we store a parser reference instead of passing it into every method?
pub trait ResultRecoveryExt<'src, 'arena, T>: Sized {
/// Extends the left end of the error span to `from`.
///
/// Does nothing if `Self` is `Ok(...)`.
#[must_use]
fn widen_error_span_from(self, from: TokenLocation) -> Self;
fn widen_error_span_from(self, from: TokenPosition) -> Self;
fn blame(self, blame_span: TokenSpan) -> Self;
fn related(self, tag: impl Into<String>, related_span: TokenSpan) -> Self;
fn blame_token(self, blame_position: TokenPosition) -> Self {
self.blame(TokenSpan::new(blame_position))
}
fn extend_blame_to_next_token(self, parser: &mut Parser<'src, 'arena>) -> Self;
fn extend_blame_start_to_covered_start(self) -> Self;
fn extend_blame_end_to_covered_end(self) -> Self;
// TODO: say that we use textual tags because they are very local to each error and read better
// than some kind of constant.
fn related_token(self, tag: impl Into<String>, related_position: TokenPosition) -> Self {
self.related(tag, TokenSpan::new(related_position))
}
/// Extends the right end of the error span up to but not including
/// the next token of the given sync `level`.
@ -135,30 +363,87 @@ pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized {
#[must_use]
fn sync_error_at(self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self;
/// Extends the right end of the error span up to, but not including, the
/// known matching closing delimiter for `delimiter_start`, if that delimiter
/// is known and has not already been passed.
///
/// If no usable delimiter match exists, falls back to ordinary
/// [`SyncLevel`]-based recovery. The fallback level is inferred from the
/// token at `delimiter_start`.
#[must_use]
fn sync_error_until_matching_delimiter(
self,
parser: &mut Parser<'src, 'arena>,
delimiter_start: TokenPosition,
) -> Self;
/// Extends the right end of the error span to include the known matching
/// closing delimiter for `delimiter_start`, if that delimiter is known and
/// has not already been passed.
///
/// If no usable delimiter match exists, falls back to ordinary
/// [`SyncLevel`]-based recovery. The fallback level is inferred from the
/// token at `delimiter_start`:
///
/// - `(` -> [`SyncLevel::CloseParenthesis`]
/// - `[` -> [`SyncLevel::CloseBracket`]
/// - `{` -> [`SyncLevel::BlockBoundary`]
/// - anything else -> [`SyncLevel::CloseParenthesis`]
#[must_use]
fn sync_error_at_matching_delimiter(
self,
parser: &mut Parser<'src, 'arena>,
delimiter_start: TokenPosition,
) -> Self;
/// Either returns expected value or its best effort fallback.
#[must_use]
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T;
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
where
T: RecoveryFallback<'src, 'arena>;
/// Produces the contained value if successful,
/// or a fallback if an error occurred.
fn report_error(self, parser: &mut Parser<'src, 'arena>);
fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool;
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<T>;
}
impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T>
where
T: RecoveryFallback<'src, 'arena>,
{
fn widen_error_span_from(mut self, from: TokenLocation) -> Self {
impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T> {
fn widen_error_span_from(mut self, from: TokenPosition) -> Self {
if let Err(ref mut error) = self {
error.source_span.from = std::cmp::min(error.source_span.from, from);
error.covered_span.start = std::cmp::min(error.covered_span.start, from);
}
self
}
fn blame(self, blame_span: TokenSpan) -> Self {
self.map_err(|error| error.blame(blame_span))
}
fn extend_blame_to_next_token(self, parser: &mut Parser<'src, 'arena>) -> Self {
self.map_err(|error| error.extend_blame_to_next_token(parser))
}
fn extend_blame_start_to_covered_start(self) -> Self {
self.map_err(|error| error.extend_blame_start_to_covered_start())
}
fn extend_blame_end_to_covered_end(self) -> Self {
self.map_err(|error| error.extend_blame_end_to_covered_end())
}
fn related(self, tag: impl Into<String>, related_span: TokenSpan) -> Self {
self.map_err(|error| error.related(tag, related_span))
}
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
if let Err(ref mut error) = self {
parser.recover_until(level);
error.source_span.to = parser.last_visited_location();
error.covered_span.end = std::cmp::max(
error.covered_span.end,
parser.last_consumed_position_or_start(),
);
}
self
}
@ -166,60 +451,289 @@ where
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
if let Err(ref mut error) = self {
parser.recover_until(level);
error.source_span.to = parser.peek_location();
// If we're at end-of-file, this'll simply do nothing.
parser.advance();
if parser
.peek_token()
.and_then(SyncLevel::for_token)
.is_some_and(|next_level| next_level == level)
{
parser.advance();
}
error.covered_span.end = std::cmp::max(
error.covered_span.end,
parser.last_consumed_position_or_start(),
);
}
self
}
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T {
self.unwrap_or_else(|error| {
let value = T::fallback_value(parser, &error);
parser.handle_error(error);
value
})
fn sync_error_until_matching_delimiter(
mut self,
parser: &mut Parser<'src, 'arena>,
delimiter_start: TokenPosition,
) -> Self {
if let Err(ref mut error) = self {
parser.recover_until_matching_delimiter_or_sync(delimiter_start);
error.covered_span.end = std::cmp::max(
error.covered_span.end,
parser.last_consumed_position_or_start(),
);
}
self
}
fn report_error(self, parser: &mut Parser<'src, 'arena>) {
fn sync_error_at_matching_delimiter(
mut self,
parser: &mut Parser<'src, 'arena>,
delimiter_start: TokenPosition,
) -> Self {
if let Err(ref mut error) = self {
parser.recover_at_matching_delimiter_or_sync(delimiter_start);
error.covered_span.end = std::cmp::max(
error.covered_span.end,
parser.last_consumed_position_or_start(),
);
}
self
}
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
where
T: RecoveryFallback<'src, 'arena>,
{
self.unwrap_or_else(|error| parser.report_error_with_fallback(error))
}
fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool {
if let Err(error) = self {
parser.handle_error(error);
parser.report_error(error);
true
} else {
false
}
}
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<T> {
match self {
Ok(value) => Some(value),
Err(error) => {
parser.report_error(error);
None
}
}
}
}
impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
fn widen_error_span_from(mut self, from: TokenLocation) -> Self {
self.source_span.from = std::cmp::min(self.source_span.from, from);
fn widen_error_span_from(mut self, from: TokenPosition) -> Self {
self.covered_span.start = std::cmp::min(self.covered_span.start, from);
self
}
fn blame(mut self, blame_span: TokenSpan) -> Self {
self.blame_span = blame_span;
self
}
fn extend_blame_to_next_token(mut self, parser: &mut Parser<'src, 'arena>) -> Self {
self.blame_span.end = std::cmp::max(self.blame_span.end, parser.peek_position_or_eof());
self
}
fn extend_blame_start_to_covered_start(mut self) -> Self {
self.blame_span.start = std::cmp::min(self.blame_span.start, self.covered_span.start);
self
}
fn extend_blame_end_to_covered_end(mut self) -> Self {
self.blame_span.end = std::cmp::max(self.blame_span.end, self.covered_span.end);
self
}
fn related(mut self, tag: impl Into<String>, related_span: TokenSpan) -> Self {
self.related_spans.insert(tag.into(), related_span);
self
}
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
parser.recover_until(level);
self.source_span.to = parser.last_visited_location();
self.covered_span.end = std::cmp::max(
self.covered_span.end,
parser.last_consumed_position_or_start(),
);
self
}
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
parser.recover_until(level);
self.source_span.to = parser.peek_location();
// If we're at end-of-file, this'll simply do nothing.
parser.advance();
if parser
.peek_token()
.and_then(SyncLevel::for_token)
.is_some_and(|next_level| next_level == level)
{
parser.advance();
}
self.covered_span.end = std::cmp::max(
self.covered_span.end,
parser.last_consumed_position_or_start(),
);
self
}
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> () {
parser.handle_error(self);
fn sync_error_until_matching_delimiter(
mut self,
parser: &mut Parser<'src, 'arena>,
delimiter_start: TokenPosition,
) -> Self {
parser.recover_until_matching_delimiter_or_sync(delimiter_start);
self.covered_span.end = std::cmp::max(
self.covered_span.end,
parser.last_consumed_position_or_start(),
);
self
}
fn report_error(self, parser: &mut Parser<'src, 'arena>) {
parser.handle_error(self);
fn sync_error_at_matching_delimiter(
mut self,
parser: &mut Parser<'src, 'arena>,
delimiter_start: TokenPosition,
) -> Self {
parser.recover_at_matching_delimiter_or_sync(delimiter_start);
self.covered_span.end = std::cmp::max(
self.covered_span.end,
parser.last_consumed_position_or_start(),
);
self
}
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) {
parser.report_error(self);
}
fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool {
parser.report_error(self);
true
}
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<()> {
parser.report_error(self);
None
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation {
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 {
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
Self::default()
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 {
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
Self::default()
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::IdentifierToken {
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
error.source_span.to
Self(error.covered_span.start)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena>
for crate::ast::CallableDefinitionRef<'src, 'arena>
{
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
// default return type: Named("") at error span
let ret_id = crate::ast::IdentifierToken(err.covered_span.start);
let return_type = crate::arena::ArenaNode::new_in(
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, ret_id)),
err.covered_span,
parser.arena,
);
let def = crate::ast::CallableDefinition {
name: crate::ast::CallableName::Identifier(IdentifierToken(err.covered_span.start)),
kind: CallableKind::Function,
return_type_specifier: Some(return_type),
modifiers: parser.arena.vec(),
parameters: parser.arena.vec(),
body: None,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StructDefRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::StructDefinition {
name: None,
base_type_name: None,
modifiers: parser.arena.vec(),
fields: parser.arena.vec(),
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassVarDeclRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let dummy_ident = crate::ast::IdentifierToken(err.covered_span.start);
let type_spec = crate::arena::ArenaNode::new_in(
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(
parser.arena,
dummy_ident,
)),
err.covered_span,
parser.arena,
);
let def = crate::ast::ClassVarDecl {
paren_specs: None,
modifiers: parser.arena.vec(),
type_spec,
declarators: parser.arena.vec(),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena>
for crate::ast::ReplicationBlockRef<'src, 'arena>
{
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::ReplicationBlock {
rules: parser.arena.vec(),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StateDeclRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::StateDecl {
name: crate::ast::IdentifierToken(err.covered_span.start),
parent: None,
modifiers: parser.arena.vec(),
ignores: None,
body: parser.arena.vec(),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenPosition {
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
error.covered_span.end
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for (Token, TokenPosition) {
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
(Token::Error, error.covered_span.end)
}
}
@ -227,7 +741,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExpressionRef<
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
crate::arena::ArenaNode::new_in(
crate::ast::Expression::Error,
error.source_span,
error.covered_span,
parser.arena,
)
}
@ -237,17 +751,64 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StatementRef<'
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
crate::arena::ArenaNode::new_in(
crate::ast::Statement::Error,
error.source_span,
error.covered_span,
parser.arena,
)
}
}
impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option<T>
where
T: RecoveryFallback<'src, 'arena>,
{
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
Some(T::fallback_value(parser, error))
impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option<T> {
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
None
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassConstDeclRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let name = crate::ast::IdentifierToken(err.covered_span.start);
let value = crate::ast::DeclarationLiteralRef {
literal: crate::ast::DeclarationLiteral::None,
position: err.covered_span.start,
};
let def = crate::ast::ClassConstDecl {
name,
value,
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::TypeSpecifierRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let dummy = crate::ast::IdentifierToken(err.covered_span.start);
crate::arena::ArenaNode::new_in(
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, dummy)),
err.covered_span,
parser.arena,
)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExecDirectiveRef<'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::ExecDirective {
text: parser.arena.string(""),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl ParseError {
pub fn fallback<'src, 'arena, T>(self, parser: &mut Parser<'src, 'arena>) -> T
where
T: RecoveryFallback<'src, 'arena>,
{
parser.report_error_with_fallback(self)
}
pub fn report<'src, 'arena>(self, parser: &mut Parser<'src, 'arena>) {
parser.report_error(self);
}
}

View File

@ -1,6 +1,6 @@
//! This module provides trivia token collection mechanism that lets parser code
//! iterate over significant tokens while ignoring trivia and preserving
//! full information for linting, formatting, and documentation.
//! Records trivia separately from significant tokens so parser code can work
//! with significant tokens without losing comments, whitespace, or line
//! structure.
//!
//! Tokens considered *trivia* are:
//!
@ -10,13 +10,30 @@
//! 4. [`crate::lexer::Token::Whitespace`].
//!
//! Every other token is considered *significant*.
//!
//! ## Required usage
//!
//! This is an internal helper. Callers must follow the protocol below.
//!
//! [`TriviaIndexBuilder`] must be driven over a single token stream in
//! strictly increasing [`TokenPosition`] order.
//! Call [`TriviaIndexBuilder::record_trivia`] for each trivia token in source
//! order, and call [`TriviaIndexBuilder::record_significant_token`] for each
//! significant token.
//!
//! After the last significant token has been processed, call
//! [`TriviaIndexBuilder::into_index`] to attach any trailing trivia.
//!
//! Violating this protocol is a logic error.
use crate::lexer::TokenLocation;
#![allow(dead_code)]
// TODO: remove dead code
/// Types of trivia tokens, corresponding directly to the matching variants of
/// [`crate::lexer::Token`].
use crate::lexer::TokenPosition;
/// Kinds of trivia tokens corresponding to variants of [`crate::lexer::Token`].
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub(crate) enum TriviaKind {
pub enum TriviaKind {
Whitespace,
Newline,
LineComment,
@ -29,269 +46,208 @@ impl std::convert::TryFrom<crate::lexer::Token> for TriviaKind {
fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> {
use crate::lexer::Token;
match token {
Token::Whitespace => Ok(TriviaKind::Whitespace),
Token::Newline => Ok(TriviaKind::Newline),
Token::LineComment => Ok(TriviaKind::LineComment),
Token::BlockComment => Ok(TriviaKind::BlockComment),
Token::Whitespace => Ok(Self::Whitespace),
Token::Newline => Ok(Self::Newline),
Token::LineComment => Ok(Self::LineComment),
Token::BlockComment => Ok(Self::BlockComment),
_ => Err(()),
}
}
}
/// Complete description of a trivia token.
/// A recorded trivia token.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub(crate) struct TriviaToken<'src> {
/// Specific type of the trivia.
pub struct TriviaToken<'src> {
/// Kind of trivia token.
pub kind: TriviaKind,
/// Actual content of the token.
/// Source text of the token.
pub text: &'src str,
/// Location of this trivia token in the token stream.
pub location: TokenLocation,
pub position: TokenPosition,
}
type TriviaRange = std::ops::Range<usize>;
type TriviaMap = std::collections::HashMap<TriviaLocation, TriviaRange>;
type TriviaRangeMap = std::collections::HashMap<BoundaryLocation, std::ops::Range<usize>>;
/// Immutable index over all recorded trivia.
/// Extends [`TokenPosition`] with start-of-file and end-of-file markers.
///
/// Enables O(1) access to trivia immediately before/after any significant
/// token, plus file-leading and file-trailing trivia. Returned slices alias
/// internal storage and live for `'src`.
#[derive(Clone, Debug, Default)]
#[allow(dead_code)]
pub(crate) struct TriviaIndex<'src> {
/// All trivia tokens, stored contiguously in file order.
tokens: Vec<TriviaToken<'src>>,
/// Maps token location to the trivia tokens stored right after it.
after_map: TriviaMap,
/// Maps token location to the trivia tokens stored right before it.
before_map: TriviaMap,
}
/// Extends [`TokenLocation`] with *start of file* value.
///
/// Regular [`TokenLocation`] does not need this value, but trivia requires
/// a way to express "trivia before any significant token".
/// Regular [`TokenPosition`] values are enough for significant tokens, but
/// trivia also needs to represent content before the first significant token
/// and after the last one.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum TriviaLocation {
/// Position before any tokens, trivia or otherwise.
enum BoundaryLocation {
StartOfFile,
/// This variant can also express "end of file" through
/// [`TokenLocation::EndOfFile`].
At(TokenLocation),
Token(TokenPosition),
EndOfFile,
}
/// Mutable builder for `TriviaIndex`.
/// Immutable index over recorded trivia.
///
/// Used inside the parser to record trivia between successive significant
/// tokens in file order, then frozen via `into_index`.
#[derive(Debug, Default)]
#[allow(dead_code)]
pub(crate) struct TriviaComponent<'src> {
/// Provides O(1) access to trivia immediately before or after any significant
/// token, as well as file-leading and file-trailing trivia. Returned slices
/// borrow the index, and the contained token texts live for `'src`.
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub struct TriviaIndex<'src> {
/// All trivia tokens, stored contiguously in file order.
tokens: Vec<TriviaToken<'src>>,
/// Maps token location to the trivia tokens stored right after it.
after_map: TriviaMap,
/// Maps token location to the trivia tokens stored right before it.
before_map: TriviaMap,
/// Location of the last gap's right boundary,
/// for debug-time invariant checks.
#[cfg(debug_assertions)]
last_right_boundary: Option<TriviaLocation>,
/// Maps a trivia boundary location to the trivia tokens stored right
/// after it.
trivia_after_boundary: TriviaRangeMap,
/// Maps a trivia boundary location to the trivia tokens stored right
/// before it.
trivia_before_boundary: TriviaRangeMap,
}
impl<'src> TriviaComponent<'src> {
/// Records trivia tokens that lie strictly between
/// `previous_token_location` and `next_token_location`.
///
/// [`None`] for `previous_token_location` means beginning of file;
/// `next_token_location` may be [`TokenLocation::EndOfFile`].
///
/// Empties `gap_trivia` without changing its capacity.
///
/// Requirements (checked in debug builds):
/// - previous_token_location < next_token_location;
/// - calls are monotonic: each gap starts at or after the last end;
/// - `collected` is nonempty and strictly ordered by `location`;
/// - all `collected` lie strictly inside (prev, next).
pub(crate) fn record_between_locations(
&mut self,
previous_token_location: Option<TokenLocation>,
next_token_location: TokenLocation,
gap_trivia: &mut Vec<TriviaToken<'src>>,
) {
#[cfg(debug_assertions)]
self.debug_assert_valid_recording_batch(
previous_token_location,
next_token_location,
&gap_trivia,
);
/// Mutable builder for [`TriviaIndex`].
///
/// Records trivia between successive significant tokens while the caller walks
/// a token stream in file order. Once all tokens have been processed, call
/// [`TriviaIndexBuilder::into_index`] to finalize the index.
#[derive(Debug)]
pub struct TriviaIndexBuilder<'src> {
/// All trivia tokens, stored contiguously in file order.
tokens: Vec<TriviaToken<'src>>,
/// Maps boundary location to the trivia tokens stored right after it.
trivia_after_boundary: TriviaRangeMap,
/// Maps boundary location to the trivia tokens stored right before it.
trivia_before_boundary: TriviaRangeMap,
if gap_trivia.is_empty() {
/// Trivia collected since the last significant token (or file start),
/// not yet attached to a right boundary.
pending_trivia: Vec<TriviaToken<'src>>,
/// Left boundary of the currently open gap.
current_left_boundary: BoundaryLocation,
}
impl Default for TriviaIndexBuilder<'_> {
fn default() -> Self {
Self {
tokens: Vec::new(),
trivia_after_boundary: TriviaRangeMap::default(),
trivia_before_boundary: TriviaRangeMap::default(),
pending_trivia: Vec::new(),
current_left_boundary: BoundaryLocation::StartOfFile,
}
}
}
impl<'src> TriviaIndexBuilder<'src> {
/// Records `token` as trivia.
///
/// Tokens must be recorded in file order.
pub(crate) fn record_trivia(&mut self, token: TriviaToken<'src>) {
#[cfg(debug_assertions)]
self.debug_assert_position_is_in_order(token.position);
self.pending_trivia.push(token);
}
/// Records a significant token at `position`.
///
/// Positions must be recorded in file order.
pub(crate) fn record_significant_token(&mut self, position: TokenPosition) {
let right_boundary = BoundaryLocation::Token(position);
#[cfg(debug_assertions)]
self.debug_assert_position_is_in_order(position);
self.flush_pending_trivia_to_boundary(right_boundary);
self.current_left_boundary = right_boundary;
}
// Stores one trivia range under both neighboring boundaries so lookups
// from either side return the same slice.
fn flush_pending_trivia_to_boundary(&mut self, right_boundary: BoundaryLocation) {
if self.pending_trivia.is_empty() {
return;
}
let previous_token_location = previous_token_location
.map(TriviaLocation::At)
.unwrap_or(TriviaLocation::StartOfFile);
let next_token_location = TriviaLocation::At(next_token_location);
let trivia_start = self.tokens.len();
self.tokens.append(gap_trivia);
self.tokens.append(&mut self.pending_trivia);
let trivia_end = self.tokens.len();
self.after_map
.insert(previous_token_location, trivia_start..trivia_end);
self.before_map
.insert(next_token_location, trivia_start..trivia_end);
self.trivia_after_boundary
.insert(self.current_left_boundary, trivia_start..trivia_end);
self.trivia_before_boundary
.insert(right_boundary, trivia_start..trivia_end);
}
/// Freezes into an immutable, shareable index.
/// Finalizes the builder and returns the completed trivia index.
///
/// Any pending trivia is recorded as trailing trivia.
#[must_use]
#[allow(dead_code)]
pub(crate) fn into_index(self) -> TriviaIndex<'src> {
pub(crate) fn into_index(mut self) -> TriviaIndex<'src> {
self.flush_pending_trivia_to_boundary(BoundaryLocation::EndOfFile);
TriviaIndex {
tokens: self.tokens,
after_map: self.after_map,
before_map: self.before_map,
trivia_after_boundary: self.trivia_after_boundary,
trivia_before_boundary: self.trivia_before_boundary,
}
}
/// Trivia immediately after the significant token at `location`.
///
/// Returns an empty slice if `location` is not pointing at
/// a significant token or if no trivia was recorded after it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.after_map)
}
/// Trivia immediately before the significant token at `location`.
///
/// Returns an empty slice if `location` is not pointing at
/// a significant token or if no trivia was recorded before it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.before_map)
}
/// Trivia before any significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
}
/// Trivia after the last significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
self.slice_for(
TriviaLocation::At(TokenLocation::EndOfFile),
&self.before_map,
)
}
// Helper: return the recorded slice or an empty slice if none.
#[track_caller]
#[allow(dead_code)]
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] {
if let Some(range) = map.get(&key) {
// Ranges are guaranteed to be valid by construction
&self.tokens[range.start..range.end]
} else {
&[]
}
}
/// Debug-only validation for `record_between_locations`'s contract.
// Catches out-of-order recording during development; the builder relies
// on this ordering invariant.
#[cfg(debug_assertions)]
fn debug_assert_valid_recording_batch(
&mut self,
previous_token_location: Option<TokenLocation>,
next_token_location: TokenLocation,
collected: &[TriviaToken<'src>],
) {
// Prevent zero-width or reversed gaps
debug_assert!(previous_token_location < Some(next_token_location));
let previous_token_location = previous_token_location
.map(TriviaLocation::At)
.unwrap_or(TriviaLocation::StartOfFile);
let next_token_location = TriviaLocation::At(next_token_location);
// Enforce monotonic gaps: we record in file order
if let Some(last_right) = self.last_right_boundary {
debug_assert!(previous_token_location >= last_right);
fn debug_assert_position_is_in_order(&self, position: TokenPosition) {
let location = BoundaryLocation::Token(position);
debug_assert!(location > self.current_left_boundary);
if let Some(last) = self.pending_trivia.last() {
debug_assert!(last.position < position);
}
self.last_right_boundary = Some(next_token_location);
let first_trivia_location = collected
.first()
.map(|token| TriviaLocation::At(token.location))
.expect("Provided trivia tokens array should not be empty.");
let last_trivia_location = collected
.last()
.map(|token| TriviaLocation::At(token.location))
.expect("Provided trivia tokens array should not be empty.");
// Ensure trivia lies strictly inside the gap
debug_assert!(previous_token_location < first_trivia_location);
debug_assert!(next_token_location > last_trivia_location);
// Ensure trivia locations are strictly increasing
debug_assert!(
collected
.windows(2)
.all(|window| window[0].location < window[1].location)
);
}
}
impl<'src> TriviaIndex<'src> {
/// Trivia immediately after the significant token at `location`.
/// Returns the trivia immediately after the significant token at
/// `position`.
///
/// Returns an empty slice if `location` is not pointing at
/// a significant token or if no trivia was recorded after it.
/// Returns an empty slice if `position` does not identify a recorded
/// significant token or if no trivia was recorded after it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.after_map)
}
/// Trivia immediately before the significant token at `location`.
///
/// Returns an empty slice if `location` is not pointing at
/// a significant token or if no trivia was recorded before it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.before_map)
}
/// Trivia before any significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
}
/// Trivia after the last significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
pub(crate) fn trivia_after_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
self.slice_for(
TriviaLocation::At(TokenLocation::EndOfFile),
&self.before_map,
BoundaryLocation::Token(position),
&self.trivia_after_boundary,
)
}
// Helper: return the recorded slice or an empty slice if none.
#[track_caller]
#[allow(dead_code)]
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] {
if let Some(range) = map.get(&key) {
// Ranges are guaranteed to be valid by construction
&self.tokens[range.start..range.end]
} else {
&[]
/// Returns the trivia immediately before the significant token at `position`.
///
/// Returns an empty slice if `position` does not identify a recorded
/// significant token or if no trivia was recorded before it.
#[must_use]
pub(crate) fn trivia_before_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
self.slice_for(
BoundaryLocation::Token(position),
&self.trivia_before_boundary,
)
}
/// Returns the trivia before the first significant token.
///
/// If no significant tokens were recorded, returns all recorded trivia.
#[must_use]
pub(crate) fn leading_trivia(&self) -> &[TriviaToken<'src>] {
self.slice_for(BoundaryLocation::StartOfFile, &self.trivia_after_boundary)
}
/// Returns the trivia after the last significant token.
///
/// If no significant tokens were recorded, returns all recorded trivia.
#[must_use]
pub(crate) fn trailing_trivia(&self) -> &[TriviaToken<'src>] {
self.slice_for(BoundaryLocation::EndOfFile, &self.trivia_before_boundary)
}
fn slice_for(&self, key: BoundaryLocation, map: &TriviaRangeMap) -> &[TriviaToken<'src>] {
match map.get(&key) {
Some(range) => {
// Ranges are guaranteed to be valid by construction
debug_assert!(range.start <= range.end);
debug_assert!(range.end <= self.tokens.len());
self.tokens.get(range.clone()).unwrap_or(&[])
}
None => &[],
}
}
}

View File

@ -0,0 +1 @@
mod parser_diagnostics;

1578
rottlib/tests/fixtures/CommandAPI.uc vendored Normal file

File diff suppressed because it is too large Load Diff

1199
rottlib/tests/fixtures/DBRecord.uc vendored Normal file

File diff suppressed because it is too large Load Diff

326
rottlib/tests/fixtures/KVehicle.uc vendored Normal file
View File

@ -0,0 +1,326 @@
// Generic 'Karma Vehicle' base class that can be controlled by a Pawn.
class KVehicle extends Vehicle
native
abstract;
cpptext
{
#ifdef WITH_KARMA
virtual void PostNetReceive();
virtual void PostEditChange();
virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV);
virtual void TickSimulated( FLOAT DeltaSeconds );
virtual void TickAuthoritative( FLOAT DeltaSeconds );
#endif
}
// Effect spawned when vehicle is destroyed
var (KVehicle) class<Actor> DestroyEffectClass;
// Simple 'driving-in-rings' logic.
var (KVehicle) bool bAutoDrive;
// The factory that created this vehicle.
//var KVehicleFactory ParentFactory;
// Weapon system
var bool bVehicleIsFiring, bVehicleIsAltFiring;
const FilterFrames = 5;
var vector CameraHistory[FilterFrames];
var int NextHistorySlot;
var bool bHistoryWarmup;
// Useful function for plotting data to real-time graph on screen.
native final function GraphData(string DataName, float DataValue);
// if _RO_
function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation,
vector momentum, class<DamageType> damageType, optional int HitIndex)
// else UT
//function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation,
// vector momentum, class<DamageType> damageType)
{
Super.TakeDamage(Damage,instigatedBy,HitLocation,Momentum,DamageType);
}
// You got some new info from the server (ie. VehicleState has some new info).
event VehicleStateReceived();
// Called when a parameter of the overall articulated actor has changed (like PostEditChange)
// The script must then call KUpdateConstraintParams or Actor Karma mutators as appropriate.
simulated event KVehicleUpdateParams();
// The pawn Driver has tried to take control of this vehicle
function bool TryToDrive(Pawn P)
{
if ( P.bIsCrouched || (P.Controller == None) || (Driver != None) || !P.Controller.bIsPlayer )
return false;
if ( !P.IsHumanControlled() || !P.Controller.IsInState('PlayerDriving') )
{
KDriverEnter(P);
return true;
}
return false;
}
// Events called on driver entering/leaving vehicle
simulated function ClientKDriverEnter(PlayerController pc)
{
pc.myHUD.bCrosshairShow = false;
pc.myHUD.bShowWeaponInfo = false;
pc.myHUD.bShowPoints = false;
pc.bBehindView = true;
pc.bFreeCamera = true;
pc.SetRotation(rotator( vect(-1, 0, 0) >> Rotation ));
}
function KDriverEnter(Pawn P)
{
local PlayerController PC;
local Controller C;
// Set pawns current controller to control the vehicle pawn instead
Driver = P;
// Move the driver into position, and attach to car.
Driver.SetCollision(false, false);
Driver.bCollideWorld = false;
Driver.bPhysicsAnimUpdate = false;
Driver.Velocity = vect(0,0,0);
Driver.SetPhysics(PHYS_None);
Driver.SetBase(self);
// Disconnect PlayerController from Driver and connect to KVehicle.
C = P.Controller;
p.Controller.Unpossess();
Driver.SetOwner(C); // This keeps the driver relevant.
C.Possess(self);
PC = PlayerController(C);
if ( PC != None )
{
PC.ClientSetViewTarget(self); // Set playercontroller to view the vehicle
// Change controller state to driver
PC.GotoState('PlayerDriving');
ClientKDriverEnter(PC);
}
}
simulated function ClientKDriverLeave(PlayerController pc)
{
pc.bBehindView = false;
pc.bFreeCamera = false;
// This removes any 'roll' from the look direction.
//exitLookDir = Vector(pc.Rotation);
//pc.SetRotation(Rotator(exitLookDir));
pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow;
pc.myHUD.bShowWeaponInfo = pc.myHUD.default.bShowWeaponInfo;
pc.myHUD.bShowPoints = pc.myHUD.default.bShowPoints;
// Reset the view-smoothing
NextHistorySlot = 0;
bHistoryWarmup = true;
}
// Called from the PlayerController when player wants to get out.
function bool KDriverLeave(bool bForceLeave)
{
local PlayerController pc;
local int i;
local bool havePlaced;
local vector HitLocation, HitNormal, tryPlace;
// Do nothing if we're not being driven
if(Driver == None)
return false;
// Before we can exit, we need to find a place to put the driver.
// Iterate over array of possible exit locations.
if (!bRemoteControlled)
{
Driver.bCollideWorld = true;
Driver.SetCollision(true, true);
havePlaced = false;
for(i=0; i < ExitPositions.Length && havePlaced == false; i++)
{
//Log("Trying Exit:"$i);
tryPlace = Location + (ExitPositions[i] >> Rotation);
// First, do a line check (stops us passing through things on exit).
if( Trace(HitLocation, HitNormal, tryPlace, Location, false) != None )
continue;
// Then see if we can place the player there.
if( !Driver.SetLocation(tryPlace) )
continue;
havePlaced = true;
}
// If we could not find a place to put the driver, leave driver inside as before.
if(!havePlaced && !bForceLeave)
{
Log("Could not place driver.");
Driver.bCollideWorld = false;
Driver.SetCollision(false, false);
return false;
}
}
pc = PlayerController(Controller);
ClientKDriverLeave(pc);
// Reconnect PlayerController to Driver.
pc.Unpossess();
pc.Possess(Driver);
pc.ClientSetViewTarget(Driver); // Set playercontroller to view the persone that got out
Controller = None;
Driver.PlayWaiting();
Driver.bPhysicsAnimUpdate = Driver.Default.bPhysicsAnimUpdate;
// Do stuff on client
//pc.ClientSetBehindView(false);
//pc.ClientSetFixedCamera(true);
if (!bRemoteControlled)
{
Driver.Acceleration = vect(0, 0, 24000);
Driver.SetPhysics(PHYS_Falling);
Driver.SetBase(None);
}
// Car now has no driver
Driver = None;
// Put brakes on before you get out :)
Throttle=0;
Steering=0;
// Stop firing when you get out!
bVehicleIsFiring = false;
bVehicleIsAltFiring = false;
return true;
}
// Special calc-view for vehicles
simulated function bool SpecialCalcView(out actor ViewActor, out vector CameraLocation, out rotator CameraRotation )
{
local vector CamLookAt, HitLocation, HitNormal;
local PlayerController pc;
local int i, averageOver;
pc = PlayerController(Controller);
// Only do this mode we have a playercontroller viewing this vehicle
if(pc == None || pc.ViewTarget != self)
return false;
ViewActor = self;
CamLookAt = Location + (vect(-100, 0, 100) >> Rotation);
//////////////////////////////////////////////////////
// Smooth lookat position over a few frames.
CameraHistory[NextHistorySlot] = CamLookAt;
NextHistorySlot++;
if(bHistoryWarmup)
averageOver = NextHistorySlot;
else
averageOver = FilterFrames;
CamLookAt = vect(0, 0, 0);
for(i=0; i<averageOver; i++)
CamLookAt += CameraHistory[i];
CamLookAt /= float(averageOver);
if(NextHistorySlot == FilterFrames)
{
NextHistorySlot = 0;
bHistoryWarmup=false;
}
//////////////////////////////////////////////////////
CameraLocation = CamLookAt + (vect(-600, 0, 0) >> CameraRotation);
if( Trace( HitLocation, HitNormal, CameraLocation, CamLookAt, false, vect(10, 10, 10) ) != None )
{
CameraLocation = HitLocation;
}
return true;
}
simulated function Destroyed()
{
// If there was a driver in the vehicle, destroy him too
if ( Driver != None )
Driver.Destroy();
// Trigger any effects for destruction
if(DestroyEffectClass != None)
spawn(DestroyEffectClass, , , Location, Rotation);
Super.Destroyed();
}
simulated event Tick(float deltaSeconds)
{
}
// Includes properties from KActor
defaultproperties
{
Steering=0
Throttle=0
ExitPositions(0)=(X=0,Y=0,Z=0)
DrivePos=(X=0,Y=0,Z=0)
DriveRot=()
bHistoryWarmup = true;
Physics=PHYS_Karma
bEdShouldSnap=True
bStatic=False
bShadowCast=False
bCollideActors=True
bCollideWorld=False
bProjTarget=True
bBlockActors=True
bBlockNonZeroExtentTraces=True
bBlockZeroExtentTraces=True
bWorldGeometry=False
bBlockKarma=True
bAcceptsProjectors=True
bCanBeBaseForPawns=True
bAlwaysRelevant=True
RemoteRole=ROLE_SimulatedProxy
bNetInitialRotation=True
bSpecialCalcView=True
//bSpecialHUD=true
}

View File

@ -0,0 +1,350 @@
use std::collections::HashMap;
use rottlib::diagnostics::{Diagnostic, Severity};
use rottlib::lexer::{TokenPosition, TokenSpan, TokenizedFile};
#[derive(Debug)]
struct ExpectedLabel {
span: TokenSpan,
message: &'static str,
}
#[derive(Debug)]
struct ExpectedDiagnostic<'a> {
headline: &'static str,
severity: Severity,
code: Option<&'static str>,
primary_label: Option<ExpectedLabel>,
secondary_labels: &'a [ExpectedLabel],
help: Option<&'static str>,
notes: &'a [&'static str],
}
#[track_caller]
fn assert_diagnostic(actual: &Diagnostic, expected: &ExpectedDiagnostic<'_>) {
assert_eq!(actual.headline(), expected.headline);
assert_eq!(actual.severity(), expected.severity);
assert_eq!(actual.code(), expected.code);
assert_eq!(actual.help(), expected.help);
match (actual.primary_label(), expected.primary_label.as_ref()) {
(None, None) => {}
(Some(actual), Some(expected)) => {
assert_eq!(actual.span, expected.span);
assert_eq!(actual.message, expected.message);
}
_ => panic!("primary label mismatch"),
}
let actual_secondary = actual.secondary_labels();
assert_eq!(actual_secondary.len(), expected.secondary_labels.len());
for (actual, expected) in actual_secondary
.iter()
.zip(expected.secondary_labels.iter())
{
assert_eq!(actual.span, expected.span);
assert_eq!(actual.message, expected.message);
}
let actual_notes = actual.notes();
assert_eq!(actual_notes.len(), expected.notes.len());
for (actual, expected) in actual_notes.iter().zip(expected.notes.iter()) {
assert_eq!(actual, expected);
}
}
#[derive(Debug, Clone, Copy)]
struct Fixture {
label: &'static str,
source: &'static str,
}
type FixtureRun = Vec<Diagnostic>;
struct FixtureRuns {
runs: HashMap<&'static str, FixtureRun>,
}
impl FixtureRuns {
#[track_caller]
fn get(&self, label: &str) -> Option<Vec<Diagnostic>> {
self.runs.get(label).cloned()
}
#[track_caller]
fn get_any(&self, label: &str) -> Diagnostic {
self.runs
.get(label)
.map(|fixture_run| fixture_run[0].clone())
.unwrap_or_else(|| panic!("no fixture run for `{label}`"))
}
#[track_caller]
fn get_by_code(&self, label: &str, code: &str) -> Diagnostic {
self.runs
.get(label)
.unwrap_or_else(|| panic!("no fixture run for `{label}`"))
.iter()
.find(|diagnostic| diagnostic.code() == Some(code))
.unwrap_or_else(|| panic!("no `{code}` diagnostic in fixture `{label}`"))
.clone()
}
}
const fn span(position: usize) -> TokenSpan {
TokenSpan {
start: TokenPosition(position),
end: TokenPosition(position),
}
}
const LEXER_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/L0001_01.uc",
source: "`",
},
Fixture {
label: "files/L0002_01.uc",
source: "]",
},
Fixture {
label: "files/L0003_01.uc",
source: "{\n foo(\n}\n",
},
Fixture {
label: "files/L0004_01.uc",
source: "(]",
},
Fixture {
label: "files/L0005_01.uc",
source: "foo(",
},
Fixture {
label: "files/L_mixed_01.uc",
source: "([)]",
},
];
fn run_fixture(fixture: &'static Fixture) -> FixtureRun {
let file = TokenizedFile::tokenize(fixture.source);
file.diagnostics().to_vec()
}
fn run_fixtures(fixtures: &'static [Fixture]) -> FixtureRuns {
let mut runs = HashMap::new();
for fixture in fixtures {
runs.insert(fixture.label, run_fixture(fixture));
}
FixtureRuns { runs }
}
#[test]
fn check_lexer_diagnostic_counts() {
let runs = run_fixtures(LEXER_FIXTURES);
assert_eq!(runs.get("files/L0001_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/L0002_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/L0003_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/L0004_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/L0005_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/L_mixed_01.uc").unwrap().len(), 2);
}
#[test]
fn check_l0001_invalid_token() {
let runs = run_fixtures(LEXER_FIXTURES);
assert_diagnostic(
&runs.get_any("files/L0001_01.uc"),
&ExpectedDiagnostic {
headline: "invalid token: backtick",
severity: Severity::Error,
code: Some("L0001"),
primary_label: Some(ExpectedLabel {
span: span(0),
message: "invalid token: backtick",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}
#[test]
fn check_l0002_unexpected_closing_delimiter() {
let runs = run_fixtures(LEXER_FIXTURES);
assert_diagnostic(
&runs.get_any("files/L0002_01.uc"),
&ExpectedDiagnostic {
headline: "unexpected closing delimiter: `]`",
severity: Severity::Error,
code: Some("L0002"),
primary_label: Some(ExpectedLabel {
span: span(0),
message: "unexpected closing delimiter",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}
#[test]
fn check_l0003_unclosed_delimiter_before_later_close() {
let runs = run_fixtures(LEXER_FIXTURES);
assert_diagnostic(
&runs.get_any("files/L0003_01.uc"),
&ExpectedDiagnostic {
headline: "unclosed delimiter before `}`",
severity: Severity::Error,
code: Some("L0003"),
primary_label: Some(ExpectedLabel {
span: span(4),
message: "this `(` is not closed before `}`",
}),
secondary_labels: &[
ExpectedLabel {
span: span(6),
message: "this `}` is matched with the earlier `{`",
},
ExpectedLabel {
span: span(0),
message: "this `{` is likely the intended match",
},
],
help: None,
notes: &[],
},
);
}
#[test]
fn check_l0004_mismatched_closing_delimiter() {
let runs = run_fixtures(LEXER_FIXTURES);
assert_diagnostic(
&runs.get_any("files/L0004_01.uc"),
&ExpectedDiagnostic {
headline: "mismatched closing delimiter: `]`",
severity: Severity::Error,
code: Some("L0004"),
primary_label: Some(ExpectedLabel {
span: span(1),
message: "closing delimiter does not match `(`",
}),
secondary_labels: &[ExpectedLabel {
span: span(0),
message: "`(` opened here",
}],
help: None,
notes: &[],
},
);
}
#[test]
fn check_l0005_unclosed_delimiter_at_eof() {
let runs = run_fixtures(LEXER_FIXTURES);
assert_diagnostic(
&runs.get_any("files/L0005_01.uc"),
&ExpectedDiagnostic {
headline: "unclosed delimiter: `(`",
severity: Severity::Error,
code: Some("L0005"),
primary_label: Some(ExpectedLabel {
span: span(1),
message: "this `(` was never closed",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}
#[test]
fn check_mixed_recovery_diagnostics() {
let runs = run_fixtures(LEXER_FIXTURES);
assert_diagnostic(
&runs.get_by_code("files/L_mixed_01.uc", "L0003"),
&ExpectedDiagnostic {
headline: "unclosed delimiter before `)`",
severity: Severity::Error,
code: Some("L0003"),
primary_label: Some(ExpectedLabel {
span: span(1),
message: "this `[` is not closed before `)`",
}),
secondary_labels: &[
ExpectedLabel {
span: span(2),
message: "this `)` is matched with the earlier `(`",
},
ExpectedLabel {
span: span(0),
message: "this `(` is likely the intended match",
},
],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_by_code("files/L_mixed_01.uc", "L0002"),
&ExpectedDiagnostic {
headline: "unexpected closing delimiter: `]`",
severity: Severity::Error,
code: Some("L0002"),
primary_label: Some(ExpectedLabel {
span: span(3),
message: "unexpected closing delimiter",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}
#[test]
fn check_recovered_delimiter_matches_are_stored() {
let file = TokenizedFile::tokenize("{\n foo(\n}\n");
assert_eq!(
file.matching_delimiter(TokenPosition(0)),
Some(TokenPosition(6))
);
assert_eq!(
file.matching_delimiter(TokenPosition(6)),
Some(TokenPosition(0))
);
assert_eq!(file.matching_delimiter(TokenPosition(4)), None);
}
#[test]
fn check_mixed_recovery_delimiter_matches_are_stored() {
let file = TokenizedFile::tokenize("([)]");
assert_eq!(
file.matching_delimiter(TokenPosition(0)),
Some(TokenPosition(2))
);
assert_eq!(
file.matching_delimiter(TokenPosition(2)),
Some(TokenPosition(0))
);
assert_eq!(file.matching_delimiter(TokenPosition(1)), None);
assert_eq!(file.matching_delimiter(TokenPosition(3)), None);
}

View File

@ -0,0 +1,455 @@
use super::*;
pub(super) const P0025_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0025_01.uc",
source: "{\n local int Count;\n Count = Count + 1 UpdateHud();\n DrawHud(CanvasRef);\n}\n",
},
Fixture {
label: "files/P0025_02.uc",
source: "{\n local float XL;\n C.TextSize(LevelTitle, XL, YL)\n C.SetPos(0, 0);\n C.DrawText(LevelTitle);\n}\n",
},
Fixture {
label: "files/P0025_03.uc",
source: "{\n local bool bReady;\n bReady = CheckReady()\n if (bReady) { StartMatch(); }\n NotifyReady();\n}\n",
},
Fixture {
label: "files/P0025_04.uc",
source: "{\n local int I;\n Scores[I] = Scores[I] + 1\n I++;\n RefreshScores();\n}\n",
},
];
pub(super) const P0026_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0026_01.uc",
source: "{\n local int Count;\n Count = 0;\n Count++;\n UpdateHud();\n",
},
Fixture {
label: "files/P0026_02.uc",
source: "{\n local bool bReady;\n bReady = CheckReady();\n if (bReady) { StartMatch(); }\n NotifyReady();\n",
},
Fixture {
label: "files/P0026_03.uc",
source: "{ local float XL; do { C.TextSize(LevelTitle, XL, YL); } until (XL < C.ClipX) C.SetPos(0, 0); C.DrawText(LevelTitle);",
},
Fixture {
label: "files/P0026_04.uc",
source: "{\n local int Count;\n Count = Count + 1;\n UpdateHud();\n Count\n",
},
];
pub(super) const P0025_P0026_MIXED_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P_mixed_01.uc",
source: "{ local int Count; Count = Count + 1 UpdateHud(); DrawHud(CanvasRef);",
},
Fixture {
label: "files/P_mixed_02.uc",
source: "{\n local bool bReady;\n bReady = CheckReady()\n if (bReady) { StartMatch(); }\n NotifyReady();\n",
},
];
#[test]
fn check_p0025_fixtures() {
let runs = run_fixtures(P0025_FIXTURES);
assert_eq!(runs.get("files/P0025_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0025_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0025_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0025_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0025_01.uc"),
&ExpectedDiagnostic {
headline: "missing `;` after expression statement",
severity: Severity::Error,
code: Some("P0025"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(21),
end: TokenPosition(21),
},
message: "expected `;` before `UpdateHud`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(11),
end: TokenPosition(19),
},
message: "expression statement",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0025_02.uc"),
&ExpectedDiagnostic {
headline: "missing `;` after expression statement",
severity: Severity::Error,
code: Some("P0025"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(25),
end: TokenPosition(25),
},
message: "expected `;` before `C`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(22),
end: TokenPosition(22),
},
message: "expression statement ends here",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0025_03.uc"),
&ExpectedDiagnostic {
headline: "missing `;` after expression statement",
severity: Severity::Error,
code: Some("P0025"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(20),
end: TokenPosition(20),
},
message: "expected `;` before `if`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(17),
end: TokenPosition(17),
},
message: "expression statement ends here",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0025_04.uc"),
&ExpectedDiagnostic {
headline: "missing `;` after expression statement",
severity: Severity::Error,
code: Some("P0025"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(28),
end: TokenPosition(28),
},
message: "expected `;` before `I`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(25),
end: TokenPosition(25),
},
message: "expression statement ends here",
}],
help: None,
notes: &[],
},
);
}
#[test]
fn check_p0026_fixtures() {
let runs = run_fixtures(P0026_FIXTURES);
assert_eq!(runs.get("files/P0026_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0026_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0026_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0026_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0026_01.uc"),
&ExpectedDiagnostic {
headline: "missing `}` to close block",
severity: Severity::Error,
code: Some("P0026"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(29),
},
message: "expected `}` before end of file",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0026_02.uc"),
&ExpectedDiagnostic {
headline: "missing `}` to close block",
severity: Severity::Error,
code: Some("P0026"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(42),
},
message: "expected `}` before end of file",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0026_03.uc"),
&ExpectedDiagnostic {
headline: "missing `}` to close block",
severity: Severity::Error,
code: Some("P0026"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(59),
end: TokenPosition(59),
},
message: "expected `}` before end of file",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0026_04.uc"),
&ExpectedDiagnostic {
headline: "missing `}` to close block",
severity: Severity::Error,
code: Some("P0026"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(31),
},
message: "expected `}` before end of file",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}
#[test]
fn check_p0025_mixed_fixtures() {
let runs = run_fixtures(P0025_P0026_MIXED_FIXTURES);
assert_eq!(runs.get("files/P_mixed_01.uc").unwrap().len(), 2);
assert_eq!(runs.get("files/P_mixed_02.uc").unwrap().len(), 2);
assert_diagnostic(
&runs.get_by_code("files/P_mixed_01.uc", "P0025"),
&ExpectedDiagnostic {
headline: "missing `;` after expression statement",
severity: Severity::Error,
code: Some("P0025"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(19),
end: TokenPosition(19),
},
message: "expected `;` before `UpdateHud`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(9),
end: TokenPosition(17),
},
message: "expression statement",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_by_code("files/P_mixed_02.uc", "P0025"),
&ExpectedDiagnostic {
headline: "missing `;` after expression statement",
severity: Severity::Error,
code: Some("P0025"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(20),
end: TokenPosition(20),
},
message: "expected `;` before `if`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(17),
end: TokenPosition(17),
},
message: "expression statement ends here",
}],
help: None,
notes: &[],
},
);
}
#[test]
fn check_p0026_mixed_fixtures() {
let runs = run_fixtures(P0025_P0026_MIXED_FIXTURES);
assert_eq!(runs.get("files/P_mixed_01.uc").unwrap().len(), 2);
assert_eq!(runs.get("files/P_mixed_02.uc").unwrap().len(), 2);
assert_diagnostic(
&runs.get_by_code("files/P_mixed_01.uc", "P0026"),
&ExpectedDiagnostic {
headline: "missing `}` to close block",
severity: Severity::Error,
code: Some("P0026"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(29),
end: TokenPosition(29),
},
message: "expected `}` before end of file",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_by_code("files/P_mixed_02.uc", "P0026"),
&ExpectedDiagnostic {
headline: "missing `}` to close block",
severity: Severity::Error,
code: Some("P0026"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(41),
},
message: "expected `}` before end of file",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}
pub(super) const P0027_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0027_01.uc",
source: "{\n local bool bReady;\n bReady = CheckReady();\n else { StartMatch(); }\n NotifyReady();\n}\n",
},
Fixture {
label: "files/P0027_02.uc",
source: "{ local int Count; Count = 3; case 3: Count++; UpdateHud();}",
},
Fixture {
label: "files/P0027_03.uc",
source: "{\n local bool bDone;\n bDone = false;\n until (bDone)\n TickWork();\n}\n",
},
Fixture {
label: "files/P0027_04.uc",
source: "{\n local int Count;\n Count = 0;\n #exec TEXTURE IMPORT NAME=Bad FILE=Bad.bmp\n Count++;\n}\n",
},
];
#[test]
fn check_p0027_fixtures() {
let runs = run_fixtures(P0027_FIXTURES);
assert_eq!(runs.get("files/P0027_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0027_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0027_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0027_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0027_01.uc"),
&ExpectedDiagnostic {
headline: "expected statement or expression, found `else`",
severity: Severity::Error,
code: Some("P0027"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(21),
end: TokenPosition(21),
},
message: "unexpected `else`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0027_02.uc"),
&ExpectedDiagnostic {
headline: "expected statement or expression, found `case`",
severity: Severity::Error,
code: Some("P0027"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(16),
end: TokenPosition(16),
},
message: "unexpected `case`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0027_03.uc"),
&ExpectedDiagnostic {
headline: "expected statement or expression, found `until`",
severity: Severity::Error,
code: Some("P0027"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(19),
end: TokenPosition(19),
},
message: "unexpected `until`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0027_04.uc"),
&ExpectedDiagnostic {
headline: "expected statement or expression, found `#exec` directive",
severity: Severity::Error,
code: Some("P0027"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(19),
end: TokenPosition(19),
},
message: "`#exec` directives are not allowed in a statement block",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
use std::collections::HashMap;
use rottlib::arena::Arena;
use rottlib::diagnostics::{Diagnostic, Severity};
use rottlib::lexer::{TokenPosition, TokenSpan, TokenizedFile};
use rottlib::parser::Parser;
mod block_items;
mod control_flow_expressions;
mod primary_expressions;
mod selector_expressions;
mod switch_expressions;
#[derive(Debug)]
pub(super) struct ExpectedLabel {
pub span: TokenSpan,
pub message: &'static str,
}
#[derive(Debug)]
pub(super) struct ExpectedDiagnostic<'a> {
pub headline: &'static str,
pub severity: Severity,
pub code: Option<&'static str>,
pub primary_label: Option<ExpectedLabel>,
pub secondary_labels: &'a [ExpectedLabel],
pub help: Option<&'static str>,
pub notes: &'a [&'static str],
}
#[track_caller]
pub(super) fn assert_diagnostic(actual: &Diagnostic, expected: &ExpectedDiagnostic<'_>) {
assert_eq!(actual.headline(), expected.headline);
assert_eq!(actual.severity(), expected.severity);
assert_eq!(actual.code(), expected.code);
assert_eq!(actual.help(), expected.help);
match (actual.primary_label(), expected.primary_label.as_ref()) {
(None, None) => {}
(Some(actual), Some(expected)) => {
assert_eq!(actual.span, expected.span);
assert_eq!(actual.message, expected.message);
}
_ => panic!("primary label mismatch"),
}
let actual_secondary = actual.secondary_labels();
assert_eq!(actual_secondary.len(), expected.secondary_labels.len());
for (actual, expected) in actual_secondary
.iter()
.zip(expected.secondary_labels.iter())
{
assert_eq!(actual.span, expected.span);
assert_eq!(actual.message, expected.message);
}
let actual_notes = actual.notes();
assert_eq!(actual_notes.len(), expected.notes.len());
for (actual, expected) in actual_notes.iter().zip(expected.notes.iter()) {
assert_eq!(actual, expected);
}
}
#[derive(Debug, Clone, Copy)]
pub(super) struct Fixture {
pub label: &'static str,
pub source: &'static str,
}
pub(super) type FixtureRun = Vec<Diagnostic>;
pub(super) struct FixtureRuns {
runs: HashMap<&'static str, FixtureRun>,
}
impl FixtureRuns {
#[track_caller]
pub fn get(&self, label: &str) -> Option<Vec<Diagnostic>> {
self.runs.get(label).map(|fixture_run| fixture_run.clone())
}
#[track_caller]
pub fn get_any(&self, label: &str) -> Diagnostic {
self.runs
.get(label)
.map(|fixture_run| fixture_run[0].clone())
.unwrap()
}
#[track_caller]
pub fn get_by_code(&self, label: &str, code: &str) -> Diagnostic {
self.runs
.get(label)
.unwrap_or_else(|| panic!("no fixture run for `{label}`"))
.iter()
.find(|diagnostic| diagnostic.code().as_deref() == Some(code))
.unwrap_or_else(|| panic!("no `{code}` diagnostic in fixture `{label}`"))
.clone()
}
}
fn run_fixture(fixture: &'static Fixture) -> FixtureRun {
let arena = Arena::new();
let file = TokenizedFile::tokenize(fixture.source);
let mut parser = Parser::new(&file, &arena);
let _ = parser.parse_expression();
let diagnostics = parser.diagnostics.clone();
for diagnostic in &diagnostics {
diagnostic.render(&file, fixture.label);
println!();
}
diagnostics
}
pub(super) fn run_fixtures(fixtures: &'static [Fixture]) -> FixtureRuns {
let mut runs = HashMap::new();
for fixture in fixtures {
runs.insert(fixture.label, run_fixture(fixture));
}
FixtureRuns { runs }
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,778 @@
use super::*;
pub(super) const P0028_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0028_01.uc",
source: "{\n local Actor A;\n A.\n}\n",
},
Fixture {
label: "files/P0028_02.uc",
source: "{\n local Actor A;\n A.;\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0028_03.uc",
source: "{\n local Actor A;\n A.\n ;\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0028_04.uc",
source: "{\n local Actor A;\n Log(A.\n );\n Log(\"after\");\n}\n",
},
];
pub(super) const P0029_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0029_01.uc",
source: "{\n local array<int> Values;\n Values[];\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0029_02.uc",
source: "{\n local array<int> Values;\n Values[\n ];\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0029_03.uc",
source: "{\n local array<int> Values;\n Values[, 1];\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0029_04.uc",
source: "{\n local array<int> Values;\n Log(Values[\n ]);\n Log(\"after\");\n}\n",
},
];
pub(super) const P0030_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0030_01.uc",
source: "{\n local array<int> Values;\n Values[0;\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0030_02.uc",
source: "{\n local array<int> Values;\n Values[\n 0\n ;\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0030_03.uc",
source: "{\n local array<int> Values;\n Log(Values[0));\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0030_04.uc",
source: "{\n local array<int> Values;\n Values[GetIndex()\n Values[1] = 7;\n}\n",
},
];
#[test]
fn check_p0028_fixtures() {
let runs = run_fixtures(P0028_FIXTURES);
assert_eq!(runs.get("files/P0028_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0028_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0028_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0028_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0028_01.uc"),
&ExpectedDiagnostic {
headline: "expected member name after `.`, found `}`",
severity: Severity::Error,
code: Some("P0028"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(14),
end: TokenPosition(14),
},
message: "unexpected `}`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(12),
end: TokenPosition(12),
},
message: "after this `.`, a member name was expected",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0028_02.uc"),
&ExpectedDiagnostic {
headline: "expected member name after `.`, found `;`",
severity: Severity::Error,
code: Some("P0028"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(13),
end: TokenPosition(13),
},
message: "unexpected `;`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0028_03.uc"),
&ExpectedDiagnostic {
headline: "expected member name after `.`, found `;`",
severity: Severity::Error,
code: Some("P0028"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(15),
end: TokenPosition(15),
},
message: "unexpected `;`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(12),
end: TokenPosition(12),
},
message: "after this `.`, a member name was expected",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0028_04.uc"),
&ExpectedDiagnostic {
headline: "expected member name after `.`, found `)`",
severity: Severity::Error,
code: Some("P0028"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(17),
end: TokenPosition(17),
},
message: "unexpected `)`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(14),
end: TokenPosition(14),
},
message: "after this `.`, a member name was expected",
}],
help: None,
notes: &[],
},
);
}
#[test]
fn check_p0029_fixtures() {
let runs = run_fixtures(P0029_FIXTURES);
assert_eq!(runs.get("files/P0029_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0029_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0029_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0029_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0029_01.uc"),
&ExpectedDiagnostic {
headline: "expected index expression after `[`, found `]`",
severity: Severity::Error,
code: Some("P0029"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(16),
end: TokenPosition(16),
},
message: "expected expression before `]`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0029_02.uc"),
&ExpectedDiagnostic {
headline: "expected index expression after `[`, found `]`",
severity: Severity::Error,
code: Some("P0029"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(15),
end: TokenPosition(18),
},
message: "expected expression before `]`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(15),
end: TokenPosition(15),
},
message: "after this `[`, an index expression was expected",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0029_03.uc"),
&ExpectedDiagnostic {
headline: "expected index expression after `[`, found `,`",
severity: Severity::Error,
code: Some("P0029"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(16),
end: TokenPosition(16),
},
message: "unexpected `,`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0029_04.uc"),
&ExpectedDiagnostic {
headline: "expected index expression after `[`, found `]`",
severity: Severity::Error,
code: Some("P0029"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(17),
end: TokenPosition(20),
},
message: "expected expression before `]`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(17),
end: TokenPosition(17),
},
message: "after this `[`, an index expression was expected",
}],
help: None,
notes: &[],
},
);
}
#[test]
fn check_p0030_fixtures() {
let runs = run_fixtures(P0030_FIXTURES);
assert_eq!(runs.get("files/P0030_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0030_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0030_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0030_01.uc"),
&ExpectedDiagnostic {
headline: "missing `]` to close index selector",
severity: Severity::Error,
code: Some("P0030"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(17),
end: TokenPosition(17),
},
message: "expected `]` before `;`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0030_02.uc"),
&ExpectedDiagnostic {
headline: "missing `]` to close index selector",
severity: Severity::Error,
code: Some("P0030"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(15),
end: TokenPosition(21),
},
message: "expected `]` before `;`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(15),
end: TokenPosition(15),
},
message: "index selector starts here",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_by_code("files/P0030_03.uc", "P0030"),
&ExpectedDiagnostic {
headline: "missing `]` to close index selector",
severity: Severity::Error,
code: Some("P0030"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(19),
end: TokenPosition(19),
},
message: "expected `]` before `)`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0030_04.uc"),
&ExpectedDiagnostic {
headline: "missing `]` to close index selector",
severity: Severity::Error,
code: Some("P0030"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(15),
end: TokenPosition(21),
},
message: "expected `]` before `Values`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(15),
end: TokenPosition(15),
},
message: "index selector starts here",
}],
help: None,
notes: &[],
},
);
}
pub(super) const P0031_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0031_01.uc",
source: "{\n Func(A B);\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0031_02.uc",
source: "{\n Func\n (A 123);\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0031_03.uc",
source: "{\n Func(\n A\n new SomeClass\n );\n Log(\"after\");\n}\n",
},
];
pub(super) const P0032_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0032_01.uc",
source: "Func(",
},
Fixture {
label: "files/P0032_02.uc",
source: "Func\n(\n A,",
},
Fixture {
label: "files/P0032_03.uc",
source: "Func(A,\n B,",
},
Fixture {
label: "files/P0032_04.uc",
source: "{\n Func\n (\n A,\n B,\n",
},
];
pub(super) const P0033_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0033_01.uc",
source: "{\n Func(A #, B);\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0033_02.uc",
source: "{\n Func\n (A\n #,\n B);\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0033_03.uc",
source: "{\n Func(\n A ?\n , B\n );\n Log(\"after\");\n}\n",
},
Fixture {
label: "files/P0033_04.uc",
source: "{\n Func\n (\n A\n #,\n B\n );\n Log(\"after\");\n}\n",
},
];
#[test]
fn check_p0031_fixtures() {
let runs = run_fixtures(P0031_FIXTURES);
assert_eq!(runs.get("files/P0031_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0031_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0031_03.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0031_01.uc"),
&ExpectedDiagnostic {
headline: "missing `,` between function call arguments",
severity: Severity::Error,
code: Some("P0031"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(7),
end: TokenPosition(7),
},
message: "expected `,` before `B`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(5),
end: TokenPosition(5),
},
message: "previous argument ends here",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0031_02.uc"),
&ExpectedDiagnostic {
headline: "missing `,` between function call arguments",
severity: Severity::Error,
code: Some("P0031"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(9),
end: TokenPosition(9),
},
message: "expected `,` before `123`",
}),
secondary_labels: &[
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(3),
end: TokenPosition(3),
},
message: "function called here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(7),
end: TokenPosition(7),
},
message: "previous argument ends here",
},
],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0031_03.uc"),
&ExpectedDiagnostic {
headline: "missing `,` between function call arguments",
severity: Severity::Error,
code: Some("P0031"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(10),
end: TokenPosition(10),
},
message: "expected `,` before `new`",
}),
secondary_labels: &[
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(4),
end: TokenPosition(4),
},
message: "function call argument list starts here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(7),
end: TokenPosition(7),
},
message: "previous argument ends here",
},
],
help: None,
notes: &[],
},
);
}
#[test]
fn check_p0032_fixtures() {
let runs = run_fixtures(P0032_FIXTURES);
assert_eq!(runs.get("files/P0032_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0032_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0032_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0032_04.uc").unwrap().len(), 2);
assert_diagnostic(
&runs.get_any("files/P0032_01.uc"),
&ExpectedDiagnostic {
headline: "missing `)` to close function call argument list",
severity: Severity::Error,
code: Some("P0032"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(2),
end: TokenPosition(2),
},
message: "expected `)` before end of file",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0032_02.uc"),
&ExpectedDiagnostic {
headline: "missing `)` to close function call argument list",
severity: Severity::Error,
code: Some("P0032"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(2),
end: TokenPosition(7),
},
message: "expected `)` before end of file",
}),
secondary_labels: &[
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(0),
},
message: "function called here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(2),
end: TokenPosition(2),
},
message: "function call argument list starts here",
},
],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0032_03.uc"),
&ExpectedDiagnostic {
headline: "missing `)` to close function call argument list",
severity: Severity::Error,
code: Some("P0032"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(1),
end: TokenPosition(8),
},
message: "expected `)` before end of file",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(1),
end: TokenPosition(1),
},
message: "function call argument list starts here",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_by_code("files/P0032_04.uc", "P0032"),
&ExpectedDiagnostic {
headline: "missing `)` to close function call argument list",
severity: Severity::Error,
code: Some("P0032"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(6),
end: TokenPosition(16),
},
message: "expected `)` before end of file",
}),
secondary_labels: &[
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(3),
end: TokenPosition(3),
},
message: "function called here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(6),
end: TokenPosition(6),
},
message: "function call argument list starts here",
},
],
help: None,
notes: &[],
},
);
}
#[test]
fn check_p0033_fixtures() {
let runs = run_fixtures(P0033_FIXTURES);
assert_eq!(runs.get("files/P0033_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0033_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0033_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0033_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0033_01.uc"),
&ExpectedDiagnostic {
headline: "expected `,` or `)` after argument, found `#`",
severity: Severity::Error,
code: Some("P0033"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(7),
end: TokenPosition(7),
},
message: "unexpected `#`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(5),
end: TokenPosition(5),
},
message: "argument ends here",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0033_02.uc"),
&ExpectedDiagnostic {
headline: "expected `,` or `)` after argument, found `#`",
severity: Severity::Error,
code: Some("P0033"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(10),
end: TokenPosition(10),
},
message: "unexpected `#`",
}),
secondary_labels: &[
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(3),
end: TokenPosition(3),
},
message: "function called here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(7),
end: TokenPosition(7),
},
message: "argument ends here",
},
],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0033_03.uc"),
&ExpectedDiagnostic {
headline: "expected `,` or `)` after argument, found `?`",
severity: Severity::Error,
code: Some("P0033"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(9),
end: TokenPosition(9),
},
message: "unexpected `?`",
}),
secondary_labels: &[
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(4),
end: TokenPosition(4),
},
message: "function call argument list starts here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(7),
end: TokenPosition(7),
},
message: "argument ends here",
},
],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0033_04.uc"),
&ExpectedDiagnostic {
headline: "expected `,` or `)` after argument, found `#`",
severity: Severity::Error,
code: Some("P0033"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(12),
end: TokenPosition(12),
},
message: "unexpected `#`",
}),
secondary_labels: &[
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(3),
end: TokenPosition(3),
},
message: "function called here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(6),
end: TokenPosition(6),
},
message: "function call argument list starts here",
},
ExpectedLabel {
span: TokenSpan {
start: TokenPosition(9),
end: TokenPosition(9),
},
message: "argument ends here",
},
],
help: None,
notes: &[],
},
);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,135 @@
use std::{fs, path::PathBuf};
use rottlib::lexer::{Keyword, Token, TokenizedFile};
/// Returns the path to a fixture file in `tests/fixtures/`.
fn fixture_file_path(name: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join(name)
}
/// Loads a fixture source file as UTF-8 text.
fn read_fixture_source(name: &str) -> String {
fs::read_to_string(fixture_file_path(name))
.unwrap_or_else(|e| panic!("failed to read fixture {name}: {e}"))
}
/// Returns the token at the given token index on a physical line.
///
/// Here `line` is 1-based, to match human line numbers in fixture files.
/// `token_index` is 0-based within `TokenizedFile::line_tokens`.
fn token_on_line(file: &TokenizedFile<'_>, line: usize, token_index: usize) -> Option<Token> {
file.line_tokens(line - 1)
.nth(token_index)
.map(|(_, token_data)| token_data.token)
}
/// Returns reconstructed visible text for a physical line.
///
/// Here `line` is 1-based, to match human line numbers in fixture files.
fn line_text(file: &TokenizedFile<'_>, line: usize) -> Option<String> {
file.line_text(line - 1)
}
#[test]
fn command_api_fixture_queries() {
let source = read_fixture_source("CommandAPI.uc");
let file = TokenizedFile::tokenize(&source);
assert_eq!(file.line_count(), 1578);
assert_eq!(
line_text(&file, 704).as_deref(),
Some(
"public final function CommandConfigInfo ResolveCommandForUserID(BaseText itemName, UserID id) {"
)
);
assert_eq!(
line_text(&file, 806).as_deref(),
Some(" _.memory.Free(wrapper);")
);
assert_eq!(
line_text(&file, 1274).as_deref(),
Some("/// Method must be called after [`Voting`] with a given name is added.")
);
assert_eq!(
line_text(&file, 14).as_deref(),
Some(" * Acedia is distributed in the hope that it will be useful,")
);
let token = token_on_line(&file, 22, 0).unwrap();
assert_eq!(token, Token::Keyword(Keyword::Class));
let token = token_on_line(&file, 1577, 0).unwrap();
assert_eq!(token, Token::Keyword(Keyword::DefaultProperties));
let token = token_on_line(&file, 649, 4).unwrap();
assert_eq!(token, Token::Whitespace);
}
#[test]
fn dbrecord_fixture_queries() {
let source = read_fixture_source("DBRecord.uc");
let file = TokenizedFile::tokenize(&source);
assert_eq!(file.line_count(), 1199);
assert_eq!(
line_text(&file, 149).as_deref(),
Some(" * However, JSON pointers are not convenient or efficient enough for that,")
);
assert_eq!(
line_text(&file, 787).as_deref(),
Some(" * 3. 'number' -> either `IntBox` or `FloatBox`, depending on")
);
assert_eq!(
line_text(&file, 1023).as_deref(),
Some(" bool makeMutable)")
);
assert_eq!(
line_text(&file, 29).as_deref(),
Some(" config(AcediaDB);")
);
let token = token_on_line(&file, 565, 0).unwrap();
assert_eq!(token, Token::BlockComment);
let token = token_on_line(&file, 467, 10).unwrap();
assert_eq!(token, Token::Identifier);
let token = token_on_line(&file, 467, 9).unwrap();
assert_eq!(token, Token::LeftParenthesis);
}
#[test]
fn kvehicle_fixture_queries() {
let source = read_fixture_source("KVehicle.uc");
let file = TokenizedFile::tokenize(&source);
assert_eq!(file.line_count(), 326);
assert_eq!(
line_text(&file, 12).as_deref(),
Some(" virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV);")
);
assert_eq!(
line_text(&file, 127).as_deref(),
Some(" pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow;")
);
assert_eq!(
line_text(&file, 264).as_deref(),
Some(" //////////////////////////////////////////////////////")
);
assert_eq!(
line_text(&file, 299).as_deref(),
Some(" ExitPositions(0)=(X=0,Y=0,Z=0)")
);
let token = token_on_line(&file, 17, 0).unwrap();
assert_eq!(token, Token::Newline);
let token = token_on_line(&file, 20, 7).unwrap();
assert_eq!(token, Token::Less);
let token = token_on_line(&file, 246, 2).unwrap();
assert_eq!(token, Token::Increment);
}

View File

@ -37,7 +37,7 @@ impl tower_lsp::LanguageServer for RottLanguageServer {
// Measure lexing performance to track parser responsiveness.
let start_time = std::time::Instant::now();
let has_errors =
rottlib::lexer::TokenizedFile::from_str(&params.text_document.text).has_errors();
rottlib::lexer::TokenizedFile::tokenize(&params.text_document.text).has_errors();
let elapsed_time = start_time.elapsed();
self.client

97
test.uc Normal file
View File

@ -0,0 +1,97 @@
/// BOF line comment
/* BOF block comment */
class TestClass extends Actor
abstract
native;
//nativereplication;
/* One blank line follows to test has_blank_line_after() */
function int fuck_you(int a, float b, string c)
{
// ---- locals with an error to trigger recovery to comma/semicolon
local int i, /* oops */ , k;
local int a, b, c;
// ---- builtins: valid + error + various initializers
int a = 1, b, , c = 3;
float f = (1.0 + 2.0) * 0.5;
bool flag = true;
string s = "hi\n\"ok\"";
name tag;
array nums;
// ---- label + goto
start:
goto start2;
// ---- if / else with tail-as-value and missing semicolons inside
if (a + c > 0) {
while (a < 5) {
if (flag) {
break;
}
a + 1; // ok
continue
} // missing ';' before '}' should be fine (SelfTerminating)
} else {
{
a + 2;
b // tail expression (no ';') becomes block tail
}
}
// ---- for with header pieces using statement-as-value
for (i; i < 10; i += 1) {
j + i;
i + j // no semicolon, next is '}' so this is a tail
}
// ---- assert with a block-as-value (statement-as-value)
assert {
i = i + 1;
i // tail is the value of the block
};
// ---- foreach (paren and no-paren forms)
foreach (nums) {
i++
}
foreach nums {
--i; // prefix and postfix in play
j--
}
// ---- do ... until (paren and no-paren) + semicolon handling
do {
i = i + 1
} until (i > 3);
do i = i + 1; until i > 5;
// ---- switch with multi-label case, recovery, and default
switch (a + c) {
case 0:
case 1:
a = a + 10
// missing ';' here forces recovery to next boundary (case/default/})
case 2:
assert (a > 0); // regular statement
break;
case 3, 4:
break;
default:
// some stray token sequence to poke "unexpected token in switch body"
/* block comment with
newlines */
a + ; // malformed expr; recover to boundary
continue; // legal statement after recovery
}
// ---- second label target for goto
start2:
return a; // final return
}
// EOF trailing line comment
/* EOF trailing block comment */

10
test_full.uc Normal file
View File

@ -0,0 +1,10 @@
// #[config(JustConfig)]
abstract class NewWay extends AcediaObject {
// #[config(MaxWavesAmount)]
var int _value;
}
class TestClass extends Actor
abstract
native
nativereplication;