Compare commits

...

2 Commits

Author SHA1 Message Date
588790b9b4 Refactor everything
Huge dump of refactored code. Still in the middle of the changes that
are to be squashed later in a one huge monster commit, because there is
no value in anything atomic here.
2026-04-05 20:32:11 +07:00
5bd9aadc55 Add DeclarationLiteral parsing
Added method for parsing simple literals for use in top-level class
declarations.

Along with this change we've also moved out methods specific for parsing
low-level literals into the same file as new method, since that
structure made more sense.
2025-09-23 20:27:12 +07:00
71 changed files with 13693 additions and 3912 deletions

287
Cargo.lock generated
View File

@ -78,6 +78,12 @@ version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
[[package]]
name = "bumpalo"
version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.10.1" version = "1.10.1"
@ -96,6 +102,73 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a48563284b67c003ba0fb7243c87fab68885e1532c605704228a80238512e31" checksum = "1a48563284b67c003ba0fb7243c87fab68885e1532c605704228a80238512e31"
[[package]]
name = "convert_case"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "crossterm"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
dependencies = [
"bitflags 2.9.1",
"crossterm_winapi",
"derive_more",
"document-features",
"mio",
"parking_lot",
"rustix",
"signal-hook",
"signal-hook-mio",
"winapi",
]
[[package]]
name = "crossterm_winapi"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "dashmap" name = "dashmap"
version = "5.5.3" version = "5.5.3"
@ -109,13 +182,38 @@ dependencies = [
"parking_lot_core", "parking_lot_core",
] ]
[[package]]
name = "derive_more"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134"
dependencies = [
"derive_more-impl",
]
[[package]]
name = "derive_more-impl"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb"
dependencies = [
"convert_case",
"proc-macro2",
"quote",
"rustc_version",
"syn",
]
[[package]] [[package]]
name = "dev_tests" name = "dev_tests"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"chardet", "chardet",
"encoding_rs", "encoding_rs",
"is-terminal",
"libc",
"rottlib", "rottlib",
"sysinfo",
"walkdir", "walkdir",
] ]
@ -130,6 +228,21 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "document-features"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
dependencies = [
"litrs",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]] [[package]]
name = "encoding_rs" name = "encoding_rs"
version = "0.8.35" version = "0.8.35"
@ -139,6 +252,16 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "errno"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [
"libc",
"windows-sys",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@ -243,6 +366,12 @@ version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "hermit-abi"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]] [[package]]
name = "httparse" name = "httparse"
version = "1.10.1" version = "1.10.1"
@ -367,6 +496,17 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "is-terminal"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
dependencies = [
"hermit-abi",
"libc",
"windows-sys",
]
[[package]] [[package]]
name = "itoa" name = "itoa"
version = "1.0.15" version = "1.0.15"
@ -385,12 +525,24 @@ version = "0.2.174"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
[[package]]
name = "linux-raw-sys"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
[[package]] [[package]]
name = "litemap" name = "litemap"
version = "0.8.0" version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]]
name = "litrs"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.13" version = "0.4.13"
@ -401,6 +553,12 @@ dependencies = [
"scopeguard", "scopeguard",
] ]
[[package]]
name = "log"
version = "0.4.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
[[package]] [[package]]
name = "logos" name = "logos"
version = "0.15.0" version = "0.15.0"
@ -470,10 +628,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
dependencies = [ dependencies = [
"libc", "libc",
"log",
"wasi", "wasi",
"windows-sys", "windows-sys",
] ]
[[package]]
name = "ntapi"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "object" name = "object"
version = "0.36.7" version = "0.36.7"
@ -577,6 +745,26 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.17" version = "0.5.17"
@ -596,6 +784,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
name = "rottlib" name = "rottlib"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"backtrace",
"bumpalo",
"crossterm",
"logos", "logos",
] ]
@ -623,6 +814,19 @@ dependencies = [
"semver", "semver",
] ]
[[package]]
name = "rustix"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
dependencies = [
"bitflags 2.9.1",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.20" version = "1.0.20"
@ -693,6 +897,27 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "signal-hook"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-mio"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
dependencies = [
"libc",
"mio",
"signal-hook",
]
[[package]] [[package]]
name = "signal-hook-registry" name = "signal-hook-registry"
version = "1.4.5" version = "1.4.5"
@ -752,6 +977,21 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "sysinfo"
version = "0.30.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
dependencies = [
"cfg-if",
"core-foundation-sys",
"libc",
"ntapi",
"once_cell",
"rayon",
"windows",
]
[[package]] [[package]]
name = "tinystr" name = "tinystr"
version = "0.8.1" version = "0.8.1"
@ -903,6 +1143,12 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unicode-segmentation"
version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
[[package]] [[package]]
name = "url" name = "url"
version = "2.5.4" version = "2.5.4"
@ -937,6 +1183,22 @@ version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]] [[package]]
name = "winapi-util" name = "winapi-util"
version = "0.1.9" version = "0.1.9"
@ -946,6 +1208,31 @@ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
dependencies = [
"windows-core",
"windows-targets",
]
[[package]]
name = "windows-core"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
dependencies = [
"windows-targets",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.59.0" version = "0.59.0"

View File

@ -3,22 +3,21 @@ name = "dev_tests"
version = "0.1.0" version = "0.1.0"
edition = "2024" edition = "2024"
[[bin]]
name = "dump_tokens"
path = "src/dump_tokens.rs"
[[bin]] [[bin]]
name = "uc_lexer_verify" name = "uc_lexer_verify"
path = "src/uc_lexer_verify.rs" path = "src/uc_lexer_verify.rs"
[[bin]] [[bin]]
name = "temp" name = "verify_expr"
path = "src/temp.rs" path = "src/verify_expr.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
rottlib = { version = "0", path = "../rottlib", features = ["debug"] } rottlib = { version = "0", path = "../rottlib", features = ["debug"] }
is-terminal = "0.4"
libc = "0.2"
sysinfo = "0.30"
walkdir="2.5" walkdir="2.5"
encoding_rs="0.8" encoding_rs="0.8"
chardet="0.2" chardet="0.2"

View File

@ -1,76 +0,0 @@
use std::{
fs,
path::{Path, PathBuf},
};
use encoding_rs::{Encoding, UTF_8};
use rottlib::lexer::{DebugTools, TokenizedFile};
/// Recursively search `root` for the first file whose *basename* matches
/// `needle` (case-sensitive).
///
/// Returns the absolute path.
fn find_file(root: &Path, needle: &str) -> Option<PathBuf> {
for entry in walkdir::WalkDir::new(root)
.into_iter()
.filter_map(Result::ok)
{
let path = entry.path();
if path.is_file() && (path.file_name().and_then(|name| name.to_str()) == Some(needle)) {
return fs::canonicalize(path).ok();
}
}
None
}
/// CLI: `dump_tokens <root_dir> <file_name>` - searches for `<file_name>`
/// recursively inside `<root_dir>`.
///
/// This utility takes *root directory* and *file name* instead of the full path
/// to help us avoid searching for them typing names out:
///
/// - We know where all the sources are;
/// - We usually just know the name of the file that is being problematic.
fn main() {
let mut args = std::env::args().skip(1);
let root_dir = args.next().unwrap_or_else(|| {
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
std::process::exit(1);
});
let file_name = args.next().unwrap_or_else(|| {
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
std::process::exit(1);
});
let root = PathBuf::from(&root_dir);
if !root.exists() {
eprintln!("Root directory '{root_dir}' does not exist.");
std::process::exit(1);
}
let found_path = find_file(&root, &file_name).map_or_else(
|| {
eprintln!("File '{file_name}' not found under '{root_dir}'.");
std::process::exit(1);
},
|path| path,
);
// Read & decode
let raw_bytes = match fs::read(&found_path) {
Ok(sources) => sources,
Err(error) => {
eprintln!("Could not read {}: {error}", found_path.display());
std::process::exit(1);
}
};
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
let encoding = Encoding::for_label(encoding_label.as_bytes()).unwrap_or(UTF_8);
let (decoded_str, _, _) = encoding.decode(&raw_bytes);
let source_text = decoded_str.to_string();
let tokenized_file = TokenizedFile::from_str(&source_text);
tokenized_file.dump_debug_layout();
}

14
dev_tests/src/pretty.rs Normal file
View File

@ -0,0 +1,14 @@
// diagnostics_render.rs
use rottlib::diagnostics::{Diagnostic};
use rottlib::lexer::TokenizedFile;
pub fn render_diagnostic(
diag: &Diagnostic,
_file: &TokenizedFile,
file_name: Option<&str>,
colors: bool,
) -> String {
diag.render(_file, file_name.unwrap_or("<default>"));
"fuck it".to_string()
}

View File

@ -1,129 +0,0 @@
//! src/main.rs
//! --------------------------------------------
//! Build & run:
//! cargo run
//! --------------------------------------------
use std::env;
use std::fs;
use std::io::{self, Read, Write};
use std::path::Path;
use rottlib::arena::Arena;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::{ParseError, Parser, pretty::ExprTree};
/*
- Convenient array definitions: [1, 3, 5, 2, 4]
- Boolean dynamic arrays
- Structures in default properties
- Auto conversion of arrays into strings
- Making 'var' and 'local' unnecessary
- Allowing variable creation in 'for' loops
- Allowing variable creation at any place inside a function
- Default parameters for functions
- Function overloading?
- repeat/until
- The syntax of the default properties block is pretty strict for an arcane reason. Particularly adding spaces before or after the "=" will lead to errors in pre-UT2003 versions.
- Scopes
- different names for variables and in config file
- anonymous pairs (objects?) and value destruction
>>> AST > HIR > MIR > byte code
*/
/// Closest plan:
/// - Add top-level declaration parsing
/// - Handle pretty.rs shit somehow
/// - COMMITS
/// ---------------------------------------
/// - Add fancy error reporting
/// - Make a fancy REPL
/// - Add evaluation
///
/// WARNINGS:
/// - Empty code/switch blocks
fn parse_and_print(src: &str) -> Result<(), ParseError> {
let tokenized = TokenizedFile::from_str(src);
let arena = Arena::new();
let mut parser = Parser::new(&tokenized, &arena);
let expr = parser.parse_expression(); // ArenaNode<Expression>
println!("{}", ExprTree(&*expr)); // if ArenaNode<Deref>
// or: println!("{}", ExprTree(expr.as_ref())); // if no Deref
Ok(())
}
fn repl_once() -> Result<(), ParseError> {
print!("Enter an statement > ");
io::stdout().flush().unwrap();
let mut input = String::new();
if io::stdin().read_line(&mut input).is_err() {
eprintln!("failed to read input");
return Ok(());
}
if input.trim().is_empty() {
return Ok(());
}
parse_and_print(&input)
}
fn read_stdin_all() -> io::Result<String> {
let mut buf = String::new();
io::stdin().read_to_string(&mut buf)?;
Ok(buf)
}
fn read_file_to_string(path: &Path) -> io::Result<String> {
fs::read_to_string(path)
}
fn main() -> Result<(), ParseError> {
// Accept a single positional arg as the input path.
// "-" means read all of stdin.
let mut args = env::args().skip(1);
if let Some(arg1) = args.next() {
if arg1 == "-h" || arg1 == "--help" {
println!("Usage:");
println!(
" {} # REPL",
env::args().next().unwrap_or_else(|| "prog".into())
);
println!(
" {} <file> # parse file",
env::args().next().unwrap_or_else(|| "prog".into())
);
println!(
" {} - # read source from stdin",
env::args().next().unwrap_or_else(|| "prog".into())
);
return Ok(());
}
if arg1 == "-" {
match read_stdin_all() {
Ok(src) => return parse_and_print(&src),
Err(e) => {
eprintln!("stdin read error: {}", e);
return Ok(());
}
}
} else {
let path = Path::new(&arg1);
match read_file_to_string(path) {
Ok(src) => return parse_and_print(&src),
Err(e) => {
eprintln!("file read error ({}): {}", path.display(), e);
return Ok(());
}
}
}
}
// No filename provided -> keep REPL behavior
repl_once()
}

View File

@ -1,122 +1,341 @@
use std::{collections::HashSet, fs, path::PathBuf}; #![allow(
clippy::all,
clippy::pedantic,
clippy::nursery,
clippy::cargo,
clippy::restriction
)]
use rottlib::lexer::{DebugTools, TokenizedFile}; use std::{
collections::HashSet,
fs,
io::{self, Write},
path::PathBuf,
time::Instant,
};
/// Read `ignore.txt` (one path per line, `#` for comments) from root directory use encoding_rs::Encoding;
/// and turn it into a canonicalized [`HashSet<PathBuf>`]. use rottlib::diagnostics::Diagnostic as Diag;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::Parser;
mod pretty;
// ---------- CONFIG ----------
const FILE_LIMIT: usize = 10000; // cap on files scanned
const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics
const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics
/// If true, print the old debug struct dump after each pretty diagnostic.
const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true;
// Cargo.toml additions:
// is-terminal = "0.4"
// sysinfo = { version = "0.30", features = ["multithread"] }
// walkdir = "2"
// chardet = "0.2"
// encoding_rs = "0.8"
// Linux-only accurate RSS in MB. Fallback uses sysinfo.
fn rss_mb() -> u64 {
#[cfg(target_os = "linux")]
{
use std::io::Read;
let mut s = String::new();
if let Ok(mut f) = std::fs::File::open("/proc/self/statm")
&& f.read_to_string(&mut s).is_ok()
&& let Some(rss_pages) = s
.split_whitespace()
.nth(1)
.and_then(|x| x.parse::<u64>().ok())
{
let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 };
return (rss_pages * page) / (1024 * 1024);
}
}
use sysinfo::{System, get_current_pid};
let mut sys = System::new();
sys.refresh_processes();
let Ok(pid) = get_current_pid() else { return 0 };
sys.process(pid).map_or(0, |p| p.memory() / 1024)
}
fn mark(label: &str, t0: Instant) {
println!(
"[{:>14}] t={:>8.2?} rss={} MB",
label,
t0.elapsed(),
rss_mb()
);
}
/// Read `ignore.txt` next to `root` and build a canonicalized set.
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> { fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
let ignore_file = root.join("ignore.txt"); let ignore_file = root.join("ignore.txt");
if !ignore_file.exists() { if !ignore_file.exists() {
return HashSet::new(); return HashSet::new();
} }
let content = match fs::read_to_string(&ignore_file) { let content = match fs::read_to_string(&ignore_file) {
Ok(content) => content, Ok(s) => s,
Err(error) => { Err(e) => {
eprintln!("Could not read {}: {error}", ignore_file.display()); eprintln!("Could not read {}: {e}", ignore_file.display());
return HashSet::new(); return HashSet::new();
} }
}; };
content content
.lines() .lines()
.map(str::trim) .map(str::trim)
.filter(|line| !line.is_empty() && !line.starts_with('#')) .filter(|l| !l.is_empty() && !l.starts_with('#'))
.filter_map(|line| { .filter_map(|line| {
let next_path = PathBuf::from(line); let p = PathBuf::from(line);
let absolute_path = if next_path.is_absolute() { let abs = if p.is_absolute() { p } else { root.join(p) };
next_path fs::canonicalize(abs).ok()
} else {
root.join(next_path)
};
fs::canonicalize(absolute_path).ok()
}) })
.collect() .collect()
} }
/// CLI: `verify_uc <root_dir>` - find all `.uc` files in the provided directory /// Wait for Enter if running in a TTY, shown before printing errors.
/// (except those listed in `ignore.txt` in the root) and test them all. fn wait_before_errors(msg: &str) {
/// let _ = io::stdout().flush();
/// Reported execution time is the tokenization time, without considering time if is_terminal::is_terminal(io::stdin()) {
/// it takes to read files from disk. eprint!("{msg}");
/// let _ = io::stderr().flush();
/// `ignore.txt` is for listing specific files, not directories. let mut s = String::new();
fn main() { let _ = io::stdin().read_line(&mut s);
let root_dir = std::env::args().nth(1).unwrap(); // it is fine to crash debug utility }
let root = PathBuf::from(&root_dir); }
/// CLI: `verify_uc <root_dir> [file_name]`
///
fn main() {
let mut args = std::env::args().skip(1);
let root_dir = args.next().unwrap_or_else(|| {
eprintln!("Usage: verify_uc <root_dir> [file_name]");
std::process::exit(1);
});
let target_raw = args.next(); // optional file name hint
let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase());
let single_mode = target_ci.is_some();
let root = PathBuf::from(&root_dir);
if !root.exists() { if !root.exists() {
eprintln!("Root directory '{root_dir}' does not exist."); eprintln!("Root directory '{root_dir}' does not exist.");
std::process::exit(1); std::process::exit(1);
} }
// Load files let t0 = Instant::now();
let ignored_paths = load_ignore_set(&root); mark("baseline", t0);
// Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode
let ignored = load_ignore_set(&root);
let mut uc_files: Vec<(PathBuf, String)> = Vec::new(); let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
let mut seen = 0usize;
let mut picked_any = false;
for entry in walkdir::WalkDir::new(&root) for entry in walkdir::WalkDir::new(&root)
.into_iter() .into_iter()
.filter_map(Result::ok) // for debug tool this is ok .filter_map(Result::ok)
.filter(|entry| { .filter(|e| {
let path = entry.path(); let path = e.path();
// Skip anything explicitly ignored if let Ok(abs) = fs::canonicalize(path)
if let Ok(absolute_path) = fs::canonicalize(path) { && ignored.contains(&abs)
if ignored_paths.contains(&absolute_path) { {
return false; return false;
}
} }
// Must be *.uc
path.is_file() path.is_file()
&& path && path
.extension() .extension()
.and_then(|extension| extension.to_str()) .and_then(|e| e.to_str())
.is_some_and(|extension| extension.eq_ignore_ascii_case("uc")) .is_some_and(|e| e.eq_ignore_ascii_case("uc"))
}) })
{ {
if !single_mode && seen >= FILE_LIMIT {
break;
}
// If in single-file mode, keep only the first whose file name matches.
if let Some(needle) = target_ci.as_deref() {
let fname = entry
.path()
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("");
let fname_lc = fname.to_ascii_lowercase();
if !(fname_lc == needle || fname_lc.contains(needle)) {
continue;
}
}
seen += 1;
let path = entry.path(); let path = entry.path();
match fs::read(path) { match fs::read(path) {
Ok(raw_bytes) => { Ok(raw) => {
// Auto-detect encoding for old Unreal script sources let (label, _, _) = chardet::detect(&raw);
let (encoding_label, _, _) = chardet::detect(&raw_bytes); let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8);
let encoding = encoding_rs::Encoding::for_label(encoding_label.as_bytes()) let (txt, _, _) = enc.decode(&raw);
.unwrap_or(encoding_rs::UTF_8); uc_files.push((path.to_path_buf(), txt.into_owned()));
let (decoded_text, _, _) = encoding.decode(&raw_bytes); picked_any = true;
uc_files.push((path.to_path_buf(), decoded_text.into_owned())); if single_mode {
// Only the first match.
break;
}
} }
Err(error) => { Err(e) => {
eprintln!("Failed to read `{}`: {error}", path.display()); wait_before_errors("Read error detected. Press Enter to print details...");
eprintln!("Failed to read `{}`: {e}", path.display());
std::process::exit(1); std::process::exit(1);
} }
} }
} }
println!("Loaded {} .uc files into memory.", uc_files.len());
// Tokenize and measure performance if single_mode && !picked_any {
let start_time = std::time::Instant::now(); let needle = target_raw.as_deref().unwrap();
let tokenized_files: Vec<(PathBuf, TokenizedFile)> = uc_files eprintln!(
.iter() "No .uc file matching '{needle}' found under '{}'.",
.map(|(path, source_code)| { root.display()
let tokenized_file = TokenizedFile::from_str(source_code); );
if tokenized_file.has_errors() { std::process::exit(1);
println!("TK: {}", path.display()); }
}
(path.clone(), tokenized_file) println!(
}) "Loaded {} .uc files into memory (cap={}, reached={}).",
.collect(); uc_files.len(),
let elapsed_time = start_time.elapsed(); FILE_LIMIT,
if !single_mode && uc_files.len() >= FILE_LIMIT {
"yes"
} else {
"no"
}
);
mark("after_read", t0);
// Stage 1: tokenize all
let t_tok = Instant::now();
let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len());
let mut tk_error_idx: Option<usize> = None;
for (i, (path, source)) in uc_files.iter().enumerate() {
let tf = TokenizedFile::tokenize(source);
if tk_error_idx.is_none() && tf.has_errors() {
tk_error_idx = Some(i);
}
tokenized.push((path.clone(), tf));
}
println!( println!(
"Tokenized {} files in {:.2?}", "Tokenized {} files in {:.2?}",
tokenized_files.len(), tokenized.len(),
elapsed_time t_tok.elapsed()
); );
mark("after_tokenize", t0);
// Round-trip check // If tokenization error: wait, dump tokens for the first failing file, then exit.
for ((path, original), (_, tokenized_file)) in uc_files.iter().zip(tokenized_files.iter()) { if let Some(idx) = tk_error_idx {
let reconstructed = tokenized_file.reconstruct_source(); let (bad_path, _) = &tokenized[idx];
if original != &reconstructed { wait_before_errors("Tokenization error found. Press Enter to dump tokens...");
eprintln!("Reconstruction mismatch in `{}`!", path.display()); eprintln!("--- Tokenization error in: {}", bad_path.display());
std::process::exit(1); //bad_tf.dump_debug_layout(); // from DebugTools
std::process::exit(1);
}
// Stage 2: parse all with ONE arena kept alive
let arena = rottlib::arena::Arena::new();
let t_parse = Instant::now();
// First failing parse: (tokenized_index, diagnostics, fatal)
let mut first_fail: Option<(usize, Vec<Diag>, Option<String>)> = None;
for (i, (path, tk)) in tokenized.iter().enumerate() {
// --- progress line BEFORE parsing this file ---
{
use std::io::Write;
eprint!(
"Parsing [{}/{}] {} | rss={} MB\r\n",
i + 1,
tokenized.len(),
path.display(),
rss_mb()
);
let _ = io::stderr().flush();
}
let mut parser = Parser::new(tk, &arena);
match parser.parse_source_file() {
Ok(_) => {
if !parser.diagnostics.is_empty() && first_fail.is_none() {
first_fail = Some((i, parser.diagnostics.clone(), None));
}
}
Err(e) => {
if first_fail.is_none() {
first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}"))));
}
}
} }
} }
println!("All .uc files matched successfully."); println!(
"Parsed {} files in {:.2?}",
tokenized.len(),
t_parse.elapsed()
);
mark("after_parse", t0);
// Summary
println!("--- Summary ---");
println!("Files processed: {}", tokenized.len());
println!("File cap: {FILE_LIMIT}");
if let Some((idx, diags, fatal)) = first_fail {
wait_before_errors("Parse issues detected. Press Enter to print diagnostics...");
let (path, tf) = &tokenized[idx];
eprintln!("--- Parse issues in first failing file ---");
eprintln!("File: {}", path.display());
if let Some(f) = &fatal {
eprintln!("Fatal parse error: {f}");
}
if diags.is_empty() && fatal.is_none() {
eprintln!("(no diagnostics captured)");
} else {
let use_colors = is_terminal::is_terminal(io::stderr());
let fname = path.display().to_string();
let total = diags.len();
let first_n = DIAG_SHOW_FIRST.min(total);
let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));
if total > first_n + last_n {
// first window
for (k, d) in diags.iter().take(first_n).enumerate() {
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
eprintln!("{s}");
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{}: {:#?}", k + 1, d);
}
}
eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
// last window
let start = total - last_n;
for (offset, d) in diags.iter().skip(start).enumerate() {
let idx_global = start + offset + 1;
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
eprintln!("{s}");
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{idx_global}: {d:#?}");
}
}
} else {
for (k, d) in diags.iter().enumerate() {
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
eprintln!("{s}");
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{}: {:#?}", k + 1, d);
}
}
}
}
std::process::exit(1);
}
println!("All files parsed without diagnostics.");
} }

View File

@ -0,0 +1,85 @@
#![allow(
clippy::all,
clippy::pedantic,
clippy::nursery,
clippy::cargo,
clippy::restriction
)]
use rottlib::arena::Arena;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::Parser;
mod pretty;
/// Expressions to test.
///
/// Add, remove, or edit entries here.
/// Using `(&str, &str)` gives each case a human-readable label.
const TEST_CASES: &[(&str, &str)] = &[
("simple_add", "1 + 2 * 3"),
("member_call", "Foo.Bar(1, 2)"),
("index_member", "arr[5].X"),
("tagged_name", "Class'MyPackage.MyThing'"),
("broken_expr", "a + (]\n//AAA\n//BBB\n//CCC\n//DDD\n//EEE\n//FFF"),
];
/// If true, print the parsed expression using Debug formatting.
const PRINT_PARSED_EXPR: bool = false;
/// If true, print diagnostics even when parsing returned a value.
const ALWAYS_PRINT_DIAGNOSTICS: bool = true;
fn main() {
let arena = Arena::new();
println!("Running {} expression test case(s)...", TEST_CASES.len());
println!();
let mut had_any_problem = false;
for (idx, (label, source)) in TEST_CASES.iter().enumerate() {
println!("============================================================");
println!("Case #{:02}: {}", idx + 1, label);
println!("Source: {}", source);
println!("------------------------------------------------------------");
let tf = TokenizedFile::tokenize(source);
let mut parser = Parser::new(&tf, &arena);
let expr = parser.parse_expression();
println!("parse_expression() returned.");
if PRINT_PARSED_EXPR {
println!("Parsed expression:");
println!("{expr:#?}");
}
if parser.diagnostics.is_empty() {
println!("Diagnostics: none");
} else {
had_any_problem = true;
println!("Diagnostics: {}", parser.diagnostics.len());
if ALWAYS_PRINT_DIAGNOSTICS {
let use_colors = false;
for (k, diag) in parser.diagnostics.iter().enumerate() {
let rendered = pretty::render_diagnostic(diag, &tf, Some(label), use_colors);
println!("Diagnostic #{}:", k + 1);
println!("{rendered}");
}
}
}
println!();
}
println!("============================================================");
if had_any_problem {
println!("Done. At least one case had tokenization or parse diagnostics.");
std::process::exit(1);
} else {
println!("Done. All cases completed without diagnostics.");
}
}

BIN
perf.data.old Normal file

Binary file not shown.

View File

@ -7,6 +7,11 @@ edition = "2024"
default = [] default = []
debug = [] debug = []
[lints]
workspace = true
[dependencies] [dependencies]
logos = "0.15" logos = "0.15"
bumpalo = { version = "3", features = ["boxed", "collections"] } bumpalo = { version = "3", features = ["boxed", "collections"] }
backtrace = "0.3"
crossterm = "0.*"

View File

@ -1,21 +1,26 @@
//! Arena submodule defining types that exist in their own memory space and //! Arena submodule defining types allocated from a dedicated bump arena,
//! allow multiple cheap allocations (both performance- and fragmentation-wise). //! allowing many cheap allocations with fast bulk reclamation.
//! //!
//! ## Memory safety //! ## Destruction and resource management
//! //!
//! Dropping the [`Arena`] frees all its memory at once and does not run //! Dropping the [`Arena`] reclaims the arena's memory in bulk. Destructors are
//! [`Drop`] for values allocated within it. Avoid storing types that implement //! not run for arena allocations that are still live at that point. Therefore,
//! [`Drop`] or own external resources inside [`ArenaNode`], [`ArenaVec`], or //! avoid storing types whose cleanup must reliably happen at arena release,
//! [`ArenaString`]. If you must, arrange an explicit "drain/drop" pass before //! especially types that own memory allocations or external resources outside
//! the arena is dropped. //! the arena.
//!
//! [`ArenaNode`], [`ArenaVec`], and [`ArenaString`] are provided so commonly
//! used owned data can keep their storage inside the arena rather than in
//! separate global-heap allocations.
use core::borrow::Borrow;
use core::fmt::{Debug, Display, Formatter, Result}; use core::fmt::{Debug, Display, Formatter, Result};
use core::ops::{Deref, DerefMut}; use core::ops::{Deref, DerefMut};
use bumpalo::{Bump, boxed, collections}; use bumpalo::{Bump, boxed, collections};
use crate::ast::AstSpan; use crate::ast::AstSpan;
use crate::lexer::TokenLocation; use crate::lexer::TokenPosition;
/// Object that manages a separate memory space, which can be deallocated all /// Object that manages a separate memory space, which can be deallocated all
/// at once after use. /// at once after use.
@ -23,11 +28,8 @@ use crate::lexer::TokenLocation;
/// All allocations borrow the arena immutably. /// All allocations borrow the arena immutably.
/// ///
/// Dropping the [`Arena`] does not run [`Drop`] for values allocated within it /// Dropping the [`Arena`] does not run [`Drop`] for values allocated within it
/// (including values contained in [`ArenaNode`], [`ArenaVec`] /// (including values contained in [`ArenaNode`], [`ArenaVec`] and
/// and [`ArenaString`]). /// [`ArenaString`]).
///
/// This arena is not thread-safe (`!Send`, `!Sync`). Values borrow the arena
/// and therefore cannot be sent across threads independently.
#[derive(Debug)] #[derive(Debug)]
pub struct Arena { pub struct Arena {
bump: Bump, bump: Bump,
@ -48,38 +50,47 @@ impl Arena {
ArenaVec(collections::Vec::new_in(&self.bump)) ArenaVec(collections::Vec::new_in(&self.bump))
} }
///Allocates a copy of `string` in this arena and returns /// Allocates a copy of `string` in this arena and returns
/// an [`ArenaString`]. /// an [`ArenaString`].
#[must_use] #[must_use]
pub fn string(&self, string: &str) -> ArenaString<'_> { pub fn string(&self, string: &str) -> ArenaString<'_> {
ArenaString(collections::String::from_str_in(string, &self.bump)) ArenaString(collections::String::from_str_in(string, &self.bump))
} }
/// Allocates `value` in this arena with the given `span`, /// Allocates `value` in this arena and attaches `span`.
/// returning an [`ArenaNode`].
/// ///
/// The node's storage borrows this arena and cannot outlive it. /// The returned node borrows the arena and cannot outlive it.
/// /// If it is still live when the arena is dropped, its destructor is not run.
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
#[must_use] #[must_use]
pub fn alloc<T>(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> { pub fn alloc_node<T>(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> {
ArenaNode { ArenaNode {
inner: boxed::Box::new_in(value, &self.bump), value: boxed::Box::new_in(value, &self.bump),
span, span,
} }
} }
pub fn alloc_between<T>( /// Allocates `value` in this arena and attaches the span from `start` to
/// `end`.
///
/// The returned node borrows the arena and cannot outlive it.
/// If it is still live when the arena is dropped, its destructor is not run.
#[must_use]
pub fn alloc_node_between<T>(
&self, &self,
value: T, value: T,
from: TokenLocation, start: TokenPosition,
to: TokenLocation, end: TokenPosition,
) -> ArenaNode<'_, T> { ) -> ArenaNode<'_, T> {
self.alloc(value, AstSpan { from, to }) self.alloc_node(value, AstSpan::range(start, end))
} }
pub fn alloc_at<T>(&self, value: T, at: TokenLocation) -> ArenaNode<'_, T> { /// Allocates `value` in this arena and attaches a span covering `at`.
self.alloc(value, AstSpan { from: at, to: at }) ///
/// The returned node borrows the arena and cannot outlive it.
/// If it is still live when the arena is dropped, its destructor is not run.
#[must_use]
pub fn alloc_node_at<T>(&self, value: T, at: TokenPosition) -> ArenaNode<'_, T> {
self.alloc_node(value, AstSpan::new(at))
} }
} }
@ -91,15 +102,11 @@ impl Default for Arena {
/// An arena-allocated box with an attached source span. /// An arena-allocated box with an attached source span.
/// ///
/// Equality and hashing take into account both the contained `T` and the `span` /// Dropping the node normally runs `Drop` for the inner value.
/// (when `T: Eq + Hash`). /// Dropping the arena does not itself perform a separate destructor pass.
///
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
#[derive(Hash, PartialEq, Eq)] #[derive(Hash, PartialEq, Eq)]
pub struct ArenaNode<'arena, T> { pub struct ArenaNode<'arena, T> {
/// Value allocated in the arena; this node owns it. value: boxed::Box<'arena, T>,
inner: boxed::Box<'arena, T>,
/// Token range covered by the value.
span: AstSpan, span: AstSpan,
} }
@ -108,74 +115,53 @@ impl<'arena, T> ArenaNode<'arena, T> {
#[must_use] #[must_use]
pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self { pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self {
Self { Self {
inner: boxed::Box::new_in(value, &arena.bump), value: boxed::Box::new_in(value, &arena.bump),
span, span,
} }
} }
/// Creates a new [`ArenaNode`] for an AST node that spans a single token. /// Returns a mutable reference to the token span covered by this node.
pub fn from_token_location( #[must_use]
value: T, pub const fn span_mut(&mut self) -> &mut AstSpan {
token_location: crate::lexer::TokenLocation,
arena: &'arena Arena,
) -> Self {
Self {
inner: boxed::Box::new_in(value, &arena.bump),
span: AstSpan {
from: token_location,
to: token_location,
},
}
}
pub fn span_mut(&mut self) -> &mut AstSpan {
&mut self.span &mut self.span
} }
pub fn extend_to(&mut self, to: TokenLocation) {
self.span.to = to;
}
pub fn extend_from(&mut self, from: TokenLocation) {
self.span.from = from;
}
/// Returns the token span covered by this node. /// Returns the token span covered by this node.
pub fn span(&self) -> &AstSpan { #[must_use]
pub const fn span(&self) -> &AstSpan {
&self.span &self.span
} }
} }
impl<'arena, T> Deref for ArenaNode<'arena, T> { impl<T> Deref for ArenaNode<'_, T> {
type Target = T; type Target = T;
fn deref(&self) -> &T { fn deref(&self) -> &T {
&self.inner &self.value
} }
} }
impl<'arena, T> DerefMut for ArenaNode<'arena, T> { impl<T> DerefMut for ArenaNode<'_, T> {
fn deref_mut(&mut self) -> &mut T { fn deref_mut(&mut self) -> &mut T {
&mut self.inner &mut self.value
} }
} }
impl<'arena, T: Debug> Debug for ArenaNode<'arena, T> { impl<T: Debug> Debug for ArenaNode<'_, T> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result { fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.debug_struct("ArenaNode") f.debug_struct("ArenaNode")
.field("inner", &**self) .field("inner", &**self)
.field("span", &self.span()) .field("span", self.span())
.finish() .finish()
} }
} }
/// Version of [`Vec`] that can be safely used inside a memory arena. /// Version of [`Vec`] whose backing storage lives in the arena.
/// ///
/// Elements do not have their destructors run when the arena is dropped. /// Elements are dropped when the `ArenaVec` itself is dropped normally.
/// /// Capacity growth may leave old buffers in the arena until the whole arena
/// This type dereferences to `[T]` and supports iteration by reference /// is reclaimed.
/// (`&ArenaVec` and `&mut ArenaVec` implement [`IntoIterator`]). #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct ArenaVec<'arena, T>(collections::Vec<'arena, T>); pub struct ArenaVec<'arena, T>(collections::Vec<'arena, T>);
impl<'arena, T> ArenaVec<'arena, T> { impl<'arena, T> ArenaVec<'arena, T> {
@ -190,18 +176,28 @@ impl<'arena, T> ArenaVec<'arena, T> {
/// Growth is backed by the arena; increasing capacity allocates new space /// Growth is backed by the arena; increasing capacity allocates new space
/// in the arena and never frees previous blocks. /// in the arena and never frees previous blocks.
pub fn push(&mut self, value: T) { pub fn push(&mut self, value: T) {
self.0.push(value) self.0.push(value);
} }
/// Reserves capacity for at least `additional` more elements.
///
/// The collection may reserve more space to avoid frequent reallocations.
/// If growth requires a new allocation, the previous buffer remains in the
/// arena until the arena is reclaimed.
pub fn reserve(&mut self, additional: usize) { pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional) self.0.reserve(additional);
} }
pub fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
self.0.extend(it) /// Extends the vector with the contents of `items`.
///
/// Growth may allocate a new buffer in the arena and leave the previous
/// buffer in place until the arena is reclaimed.
pub fn extend<I: IntoIterator<Item = T>>(&mut self, items: I) {
self.0.extend(items);
} }
} }
impl<'arena, T> Deref for ArenaVec<'arena, T> { impl<T> Deref for ArenaVec<'_, T> {
type Target = [T]; type Target = [T];
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
@ -209,48 +205,41 @@ impl<'arena, T> Deref for ArenaVec<'arena, T> {
} }
} }
impl<'arena, T> DerefMut for ArenaVec<'arena, T> { impl<T> DerefMut for ArenaVec<'_, T> {
fn deref_mut(&mut self) -> &mut Self::Target { fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0 &mut self.0
} }
} }
impl<'arena, 's, T> IntoIterator for &'s ArenaVec<'arena, T> { impl<'iter, T> IntoIterator for &'iter ArenaVec<'_, T> {
type Item = &'s T; type Item = &'iter T;
type IntoIter = core::slice::Iter<'s, T>; type IntoIter = core::slice::Iter<'iter, T>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
self.0.iter() self.0.iter()
} }
} }
impl<'arena, 's, T> IntoIterator for &'s mut ArenaVec<'arena, T> { impl<'iter, T> IntoIterator for &'iter mut ArenaVec<'_, T> {
type Item = &'s mut T; type Item = &'iter mut T;
type IntoIter = core::slice::IterMut<'s, T>; type IntoIter = core::slice::IterMut<'iter, T>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
self.0.iter_mut() self.0.iter_mut()
} }
} }
/// Version of [`String`] that can be safely used inside a memory arena. /// Version of [`String`] whose backing storage lives in the arena.
/// #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
/// This type dereferences to [`str`] and implements [`AsRef<str>`] and
/// [`core::borrow::Borrow<str>`] for ergonomic use with APIs expecting string
/// slices.
///
/// The string borrows the arena and cannot outlive it. Dropping the arena
/// frees its memory without running `Drop` for the string contents.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct ArenaString<'arena>(collections::String<'arena>); pub struct ArenaString<'arena>(collections::String<'arena>);
impl<'arena> ArenaString<'arena> { impl<'arena> ArenaString<'arena> {
/// Allocates a copy of `string` in `arena` and returns an [`ArenaString`]. /// Allocates a copy of `string` in `arena` and returns an [`ArenaString`].
#[must_use] #[must_use]
pub fn from_str_in(string: &str, arena: &'arena Arena) -> Self { pub fn from_str_in(text: &str, arena: &'arena Arena) -> Self {
Self(collections::String::from_str_in(string, &arena.bump)) Self(collections::String::from_str_in(text, &arena.bump))
} }
} }
impl<'arena> Deref for ArenaString<'arena> { impl Deref for ArenaString<'_> {
type Target = str; type Target = str;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
@ -258,19 +247,19 @@ impl<'arena> Deref for ArenaString<'arena> {
} }
} }
impl<'arena> AsRef<str> for ArenaString<'arena> { impl AsRef<str> for ArenaString<'_> {
fn as_ref(&self) -> &str { fn as_ref(&self) -> &str {
&self.0 &self.0
} }
} }
impl<'arena> core::borrow::Borrow<str> for ArenaString<'arena> { impl Borrow<str> for ArenaString<'_> {
fn borrow(&self) -> &str { fn borrow(&self) -> &str {
&self.0 &self.0
} }
} }
impl<'arena> Display for ArenaString<'arena> { impl Display for ArenaString<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result { fn fmt(&self, f: &mut Formatter<'_>) -> Result {
Display::fmt(&self.0, f) Display::fmt(&self.0, f)
} }

View File

@ -1,376 +0,0 @@
use crate::arena::ArenaVec;
use super::lexer::TokenLocation;
use core::fmt;
use crate::arena::{Arena, ArenaNode, ArenaString};
// All inclusive!
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct AstSpan {
pub from: TokenLocation,
pub to: TokenLocation,
}
impl AstSpan {
pub fn merge(left_span: &AstSpan, right_span: &AstSpan) -> AstSpan {
AstSpan {
from: left_span.from,
to: right_span.to,
}
}
pub fn new(single_location: TokenLocation) -> AstSpan {
AstSpan {
from: single_location,
to: single_location,
}
}
pub fn range(from: TokenLocation, to: TokenLocation) -> AstSpan {
AstSpan { from, to }
}
pub fn extend_to(&mut self, right_most_location: TokenLocation) {
if right_most_location > self.to {
self.to = right_most_location
}
}
}
#[derive(Clone, Copy, Debug)]
pub enum PrefixOperator {
Not,
Minus,
BitwiseNot,
Increment,
Decrement,
}
#[derive(Clone, Copy, Debug)]
pub enum PostfixOperator {
Increment,
Decrement,
}
#[derive(Clone, Copy, Debug)]
pub enum InfixOperator {
// Assignments
Assign,
MultiplyAssign,
DivideAssign,
ModuloAssign,
PlusAssign,
MinusAssign,
ConcatAssign,
ConcatSpaceAssign,
// String operations
ConcatSpace,
Concat,
// Logical
And,
Xor,
Or,
// Bit-wise
BitwiseAnd,
BitwiseOr,
BitwiseXor,
// Not-equal
NotEqual,
// Comparison
Equal,
ApproximatelyEqual,
Less,
LessEqual,
Greater,
GreaterEqual,
ClockwiseFrom,
// Shifts
LeftShift,
LogicalRightShift,
RightShift,
// Terms
Plus,
Minus,
// Modulo
Modulo,
// Factor
Multiply,
Divide,
Dot,
Cross,
// Exponentiation
Exponentiation,
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
pub enum Expression<'src, 'arena> {
Binary(
ExpressionRef<'src, 'arena>,
InfixOperator,
ExpressionRef<'src, 'arena>,
),
LeftUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
RightUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
Identifier(&'src str),
String(ArenaString<'arena>),
Integer(i128),
Float(f64),
Bool(bool),
None,
Parentheses(ExpressionRef<'src, 'arena>),
Block {
// All these end with `;`
statements: ArenaVec<'arena, StatementRef<'src, 'arena>>,
// Last statement, but only if it doesn't end with `;`
tail: Option<ExpressionRef<'src, 'arena>>,
},
If {
condition: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
else_body: Option<ExpressionRef<'src, 'arena>>,
},
While {
condition: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
},
DoUntil {
condition: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
},
ForEach {
iterator: ExpressionRef<'src, 'arena>,
body: ExpressionRef<'src, 'arena>,
},
For {
init: Option<ExpressionRef<'src, 'arena>>,
condition: Option<ExpressionRef<'src, 'arena>>,
step: Option<ExpressionRef<'src, 'arena>>,
body: ExpressionRef<'src, 'arena>,
},
Switch {
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, CaseRef<'src, 'arena>>,
// default case
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
// last statement of the case block
tail: Option<ExpressionRef<'src, 'arena>>,
},
Goto(ArenaString<'arena>),
Continue,
Break(Option<ExpressionRef<'src, 'arena>>),
Return(Option<ExpressionRef<'src, 'arena>>),
// For injecting in place of parts that couldn't be parsed
// (along with text that wasn't able to be parsed)
Error,
}
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
#[derive(Debug)]
pub struct VariableDeclarator<'src, 'arena> {
pub name: ArenaString<'arena>,
pub initializer: Option<ExpressionRef<'src, 'arena>>,
}
#[derive(Debug)]
pub struct SwitchCase<'src, 'arena> {
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>, // UScript allows expressions; multiple labels ok
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, // allow fallthrough unless a Break/Goto ends it
}
pub type CaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
#[derive(Debug)]
pub enum Statement<'src, 'arena> {
// For the cases where user just used too many semi-colons `;;;;`
Empty,
Expression(ExpressionRef<'src, 'arena>),
// Just declarations without assignment:
// `local int i, j, k`
LocalVariableDeclaration {
type_name: ArenaString<'arena>,
identifiers: ArenaVec<'arena, ArenaString<'arena>>,
},
// Just `int i, j = 3, k = 0`
VariableDeclaration {
type_name: ArenaString<'arena>,
declarations: ArenaVec<'arena, VariableDeclarator<'src, 'arena>>,
},
Label(ArenaString<'arena>),
// For injecting in place of parts that couldn't be parsed
// (along with text that wasn't able to be parsed)
Error,
}
pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>;
impl<'src, 'arena> Expression<'src, 'arena> {
pub fn new_prefix(
arena: &'arena Arena,
op_position: TokenLocation,
op: PrefixOperator,
rhs: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan {
from: op_position,
to: rhs.span().to,
};
ArenaNode::new_in(Self::LeftUnary(op, rhs), span, arena)
}
pub fn new_postfix(
arena: &'arena Arena,
lhs: ArenaNode<'arena, Self>,
op: PostfixOperator,
op_position: TokenLocation,
) -> ArenaNode<'arena, Self> {
let span = AstSpan {
from: lhs.span().from,
to: op_position,
};
ArenaNode::new_in(Self::RightUnary(lhs, op), span, arena)
}
pub fn new_binary(
arena: &'arena Arena,
lhs: ArenaNode<'arena, Self>,
op: InfixOperator,
rhs: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::merge(&lhs.span(), &rhs.span());
ArenaNode::new_in(Self::Binary(lhs, op, rhs), span, arena)
}
}
/// Returns `true` for expressions that require `;` when used as a statement
/// (i.e., everything except blocky control-flow forms).
pub trait NeedsSemi {
fn needs_semicolon(&self) -> bool;
}
impl<'src, 'arena> NeedsSemi for Expression<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
match self {
Expression::Block { .. }
| Expression::If { .. }
| Expression::While { .. }
| Expression::DoUntil { .. }
| Expression::ForEach { .. }
| Expression::For { .. }
| Expression::Error => false,
// All other expressions require `;` when used as a statement.
_ => true,
}
}
}
// If `ArenaNode<T>` derefs to `T`, this works as-is.
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
impl<'src, 'arena> NeedsSemi for ExpressionRef<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
(**self).needs_semicolon()
}
}
impl<'src, 'arena> NeedsSemi for Statement<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
match self {
Statement::Empty | Statement::Label { .. } | Statement::Error { .. } => false,
// All other expressions require `;` when used as a statement.
_ => true,
}
}
}
// If `ArenaNode<T>` derefs to `T`, this works as-is.
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
impl<'src, 'arena> NeedsSemi for StatementRef<'src, 'arena> {
#[inline]
fn needs_semicolon(&self) -> bool {
(**self).needs_semicolon()
}
}
impl fmt::Display for PrefixOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
PrefixOperator::Not => "!",
PrefixOperator::Minus => "-",
PrefixOperator::BitwiseNot => "~",
PrefixOperator::Increment => "++.",
PrefixOperator::Decrement => "--.",
};
write!(f, "{s}")
}
}
impl fmt::Display for PostfixOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
PostfixOperator::Increment => ".++",
PostfixOperator::Decrement => ".--",
};
write!(f, "{s}")
}
}
impl fmt::Display for InfixOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use InfixOperator::*;
let s = match self {
// Assignments
Assign => "=",
MultiplyAssign => "*=",
DivideAssign => "/=",
ModuloAssign => "%=",
PlusAssign => "+=",
MinusAssign => "-=",
ConcatAssign => "$=",
ConcatSpaceAssign => "@=",
// String operations
ConcatSpace => "@",
Concat => "$",
// Logical
And => "&&",
Xor => "^^",
Or => "||",
// Bitwise
BitwiseAnd => "&",
BitwiseOr => "|",
BitwiseXor => "^",
// Not equal
NotEqual => "!=",
// Comparison
Equal => "==",
ApproximatelyEqual => "~+",
Less => "<",
LessEqual => "<=",
Greater => ">",
GreaterEqual => ">=",
ClockwiseFrom => "ClockwiseFrom",
// Shift
LeftShift => "<<",
LogicalRightShift => ">>>",
RightShift => ">>",
// Term
Plus => "+",
Minus => "-",
// Modulo
Modulo => "%",
// Factor
Multiply => "*",
Divide => "/",
Dot => "Dot",
Cross => "Cross",
// Exp
Exponentiation => "**",
};
write!(f, "{s}")
}
}

View File

@ -0,0 +1,235 @@
//! Callable-declaration AST nodes.
//!
//! This module defines function-like declarations together with their
//! parameter lists and callable modifiers.
//!
//! The language groups several callable forms under a largely shared header
//! structure, including ordinary functions, events, delegates, and operator
//! declarations. This module preserves those forms as AST nodes together with
//! source-relevant modifier and parameter information.
use super::{
AstSpan, BlockBody, ExpressionRef, IdentifierToken, InfixOperatorName, PostfixOperatorName,
PrefixOperatorName, TypeSpecifierRef,
};
use crate::arena::ArenaVec;
use crate::lexer::{Keyword, TokenPosition};
use crate::arena::ArenaNode;
use core::convert::TryFrom;
/// Parameter modifier kind.
///
/// These modifiers apply to a single callable parameter and are preserved in
/// source order on the parameter node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ParameterModifierKind {
Optional,
Out,
Skip,
Coerce,
}
/// Parameter modifier together with the source position of its token.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ParameterModifier {
pub kind: ParameterModifierKind,
pub position: TokenPosition,
}
/// One callable parameter declaration.
#[derive(Debug, PartialEq)]
pub struct Parameter<'src, 'arena> {
/// Parameter modifiers in source order.
pub modifiers: ArenaVec<'arena, ParameterModifier>,
/// Declared parameter type.
pub type_specifier: TypeSpecifierRef<'src, 'arena>,
/// Declared parameter name.
pub name: IdentifierToken,
/// Optional array-size expression from `[expr]`.
pub array_size: Option<ExpressionRef<'src, 'arena>>,
/// Optional default-value expression after `=`.
pub default_value: Option<ExpressionRef<'src, 'arena>>,
}
/// Stable arena reference to a parameter node.
pub type ParameterRef<'src, 'arena> = ArenaNode<'arena, Parameter<'src, 'arena>>;
/// Syntactic callable declaration kind.
///
/// This enum distinguishes ordinary callable declarations from operator
/// declarations and preserves operator fixity / precedence where applicable.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum CallableKind {
/// Ordinary function declaration: `function`.
Function,
/// Event declaration: `event`.
Event,
/// Delegate declaration: `delegate`.
Delegate,
/// Prefix operator declaration: `preoperator`.
PrefixOperator,
/// Infix operator declaration: `operator(<precedence>)`.
///
/// Precedence can be skipped as all supported operators already have
/// built-in precedence value that can't actually be changed in
/// `UnrealScript`. So omitting precedence when redefining operators is
/// a better approach.
InfixOperator(Option<u128>),
/// Postfix operator declaration: `postoperator`.
PostfixOperator,
}
impl TryFrom<Keyword> for CallableKind {
type Error = ();
/// Converts a keyword into a [`CallableKind`] when the callable form
/// is fully determined by the keyword alone.
///
/// Returns `Err(())` for keywords that either do not represent callable
/// declarations or require additional syntax to determine the final kind
/// (for example `operator(<precedence>)`).
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
let kind = match keyword {
Keyword::Function => Self::Function,
Keyword::Event => Self::Event,
Keyword::Delegate => Self::Delegate,
Keyword::PreOperator => Self::PrefixOperator,
Keyword::PostOperator => Self::PostfixOperator,
_ => return Err(()),
};
Ok(kind)
}
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum CallableName {
Identifier(IdentifierToken),
PrefixOperator(PrefixOperatorName),
InfixOperator(InfixOperatorName),
PostfixOperator(PostfixOperatorName),
}
/// Callable definition.
///
/// This node represents the common syntactic shape shared by function-like
/// declarations, including ordinary functions, events, delegates, and
/// operator forms.
#[derive(Debug, PartialEq)]
pub struct CallableDefinition<'src, 'arena> {
/// Declared callable name.
pub name: CallableName,
/// Callable declaration form.
pub kind: CallableKind,
/// Optional return type.
///
/// Some callable forms may omit a return type entirely.
pub return_type_specifier: Option<TypeSpecifierRef<'src, 'arena>>,
/// Declaration modifiers attached to the callable header.
pub modifiers: ArenaVec<'arena, CallableModifier>,
/// Formal parameters in source order.
pub parameters: ArenaVec<'arena, ParameterRef<'src, 'arena>>,
/// Optional callable body.
///
/// `None` represents a header-only declaration terminated by `;`.
/// `Some(...)` stores the parsed block statements belonging to the body.
pub body: Option<BlockBody<'src, 'arena>>,
}
/// Stable arena reference to a callable definition node.
pub type CallableDefinitionRef<'src, 'arena> = ArenaNode<'arena, CallableDefinition<'src, 'arena>>;
/// Callable declaration modifier kind.
///
/// These modifiers apply to the callable declaration itself rather than to an
/// individual parameter.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CallableModifierKind {
Final,
/// `native` or `native(<index>)`
Native(Option<u128>),
Abstract,
Transient,
Public,
Protected,
Private,
Static,
/// `config(<name>)`
Config(IdentifierToken),
Const,
Deprecated,
NoExport,
Export,
Simulated,
Latent,
Iterator,
Singular,
Exec,
Reliable,
Unreliable,
NativeReplication,
}
impl TryFrom<Keyword> for CallableModifierKind {
type Error = ();
/// Converts a keyword into a [`CallableModifierKind`] when the modifier
/// is fully determined by the keyword alone.
///
/// Returns `Err(())` for keywords that either do not represent callable
/// modifiers or require additional syntax
/// (e.g. `native(...)`, `config(...)`).
#[allow(clippy::enum_glob_use)]
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
use CallableModifierKind::*;
let kind = match keyword {
Keyword::Final => Final,
Keyword::Abstract => Abstract,
Keyword::Transient => Transient,
Keyword::Public => Public,
Keyword::Protected => Protected,
Keyword::Private => Private,
Keyword::Static => Static,
Keyword::Const => Const,
Keyword::Deprecated => Deprecated,
Keyword::NoExport => NoExport,
Keyword::Export => Export,
Keyword::Simulated => Simulated,
Keyword::Latent => Latent,
Keyword::Iterator => Iterator,
Keyword::Singular => Singular,
Keyword::Exec => Exec,
Keyword::Reliable => Reliable,
Keyword::Unreliable => Unreliable,
Keyword::NativeReplication => NativeReplication,
_ => return Err(()),
};
Ok(kind)
}
}
/// Callable modifier together with its full source span.
///
/// A modifier may occupy more than one token in source, for example when it
/// carries an argument like `native(12)` or `config(System)`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct CallableModifier {
/// Modifier kind.
pub kind: CallableModifierKind,
/// Span covering the full modifier syntax.
pub span: AstSpan,
}
impl Keyword {
#[must_use]
pub fn is_callable_modifier(self) -> bool {
matches!(self, Self::Native | Self::Config) || CallableModifierKind::try_from(self).is_ok()
}
#[must_use]
pub fn is_callable_kind_keyword(self) -> bool {
matches!(self, Self::Operator) || CallableKind::try_from(self).is_ok()
}
}

View File

@ -0,0 +1,290 @@
//! Expression AST nodes.
//!
//! This module defines ordinary expressions together with expression-shaped
//! control-flow and block forms parsed by the language.
use super::{
AstSpan, IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator,
QualifiedIdentifierRef, StatementRef,
};
use crate::arena::ArenaVec;
use super::super::lexer::TokenPosition;
use crate::arena::{Arena, ArenaNode, ArenaString};
/// Expression node used for both ordinary expressions and expression-shaped
/// statement/control-flow forms.
///
/// This AST is intentionally broad: besides operators and literals, it also
/// includes blocks and control-flow constructs that syntactically occupy
/// expression parsing positions in the language.
#[allow(clippy::large_enum_variant)]
#[derive(Debug, PartialEq)]
pub enum Expression<'src, 'arena> {
/// Plain identifier expression.
Identifier(IdentifierToken),
/// String literal.
///
/// The contents stored in arena memory are transformed (unescaped) version
/// of raw strings from the source.
String(ArenaString<'arena>),
/// Integer literal.
Integer(u128),
/// Floating-point literal.
Float(f64),
/// Boolean literal.
Bool(bool),
/// `None` literal / null-like language value.
None,
/// Explicit parenthesized subexpression: `(expr)`.
///
/// Parentheses are preserved as a node instead of being discarded so later
/// stages can retain grouping information for diagnostics, formatting, or
/// source-faithful reconstruction.
Parentheses(ExpressionRef<'src, 'arena>),
/// Class-type reference parsed as a qualified identifier path.
///
/// This is used for class-like type mentions that are not represented as a
/// tagged name literal.
ClassType(QualifiedIdentifierRef<'arena>),
/// Tagged or untagged quoted name literal.
///
/// Examples:
/// - `class'Foo'`
/// - `Texture'Pkg.Group.Name'`
/// - `'Pkg.Group.Name'` if the grammar permits an untagged form
///
/// `tag` stores the leading identifier token when present. `name` is the
/// raw content between quotes and is preserved exactly as written.
NameLiteral {
tag: Option<IdentifierToken>,
name: &'src str,
},
/// Indexing operation: `target[index]`.
///
/// This is produced after postfix parsing and binds tighter than any infix
/// operator.
Index {
target: ExpressionRef<'src, 'arena>,
index: ExpressionRef<'src, 'arena>,
},
/// Member access: `target.name`.
///
/// The member name is stored as a token reference rather than an owned
/// string so later stages can resolve exact spelling and source location
/// from the lexer/token stream.
Member {
target: ExpressionRef<'src, 'arena>,
name: IdentifierToken,
},
/// Call expression: `callee(arg1, arg2, ...)`.
///
/// Arguments are stored as `Option<ExpressionRef>` to preserve omitted
/// arguments in syntaxes that allow empty slots.
Call {
callee: ExpressionRef<'src, 'arena>,
arguments: ArenaVec<'arena, Option<ExpressionRef<'src, 'arena>>>,
},
/// Prefix unary operator application: `op rhs`.
PrefixUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
/// Postfix unary operator application: `lhs op`.
PostfixUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
/// Binary operator application: `lhs op rhs`.
Binary(
ExpressionRef<'src, 'arena>,
InfixOperator,
ExpressionRef<'src, 'arena>,
),
/// Block expression / statement block: `{ ... }`.
///
/// The contained statements are preserved in source order.
Block(StatementList<'src, 'arena>),
/// Conditional expression / statement.
///
/// Both arms use `BranchBody` so the parser can preserve legacy one-line
/// bodies, optional trailing semicolons, and recovery anchors.
If {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
else_body: Option<BranchBody<'src, 'arena>>,
},
/// `while (condition) body`
While {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// `do body until (condition)`
DoUntil {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// `foreach iterator body`
///
/// The iteration source / iterator expression is stored as a normal
/// expression node because the language permits nontrivial syntax there.
ForEach {
iterated_expression: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// Traditional three-part `for` loop.
///
/// Each header component is optional to support forms such as:
/// - `for (;;)`
/// - `for (init;;)`
/// - `for (;cond;)`
/// - `for (;;step)`
For {
initialization: Option<ExpressionRef<'src, 'arena>>,
condition: Option<ExpressionRef<'src, 'arena>>,
step: Option<ExpressionRef<'src, 'arena>>,
body: BranchBody<'src, 'arena>,
},
/// `switch` construct.
///
/// `cases` contains all explicit case arms in source order.
/// `default_arm` stores the statements of the default branch, if present.
Switch {
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
},
/// `goto` statement.
///
/// Stores the token position of the target token rather than duplicating
/// its textual representation in the AST. On successful parsing refers to
/// either identifier or name literal.
Goto(TokenPosition),
/// `continue` statement.
Continue,
/// `break` statement, optionally with an attached expression if the
/// language form allows one.
Break(Option<ExpressionRef<'src, 'arena>>),
/// `return` statement, optionally carrying a returned expression.
Return(Option<ExpressionRef<'src, 'arena>>),
/// Object construction / allocation form using the language's `new` syntax.
///
/// The first three arguments are optional positional control arguments.
/// `class_specifier` is the required class expression that identifies what
/// should be constructed.
New {
outer_argument: Option<ExpressionRef<'src, 'arena>>,
name_argument: Option<ExpressionRef<'src, 'arena>>,
flags_argument: Option<ExpressionRef<'src, 'arena>>,
class_specifier: ExpressionRef<'src, 'arena>,
},
/// Recovery placeholder inserted when an expression could not be parsed.
///
/// This allows the parser to continue building a larger AST and report more
/// than one error in a single pass.
Error,
}
/// Statements contained in a `{ ... }` block.
pub type StatementList<'src, 'arena> = ArenaVec<'arena, StatementRef<'src, 'arena>>;
/// Statements contained in a `{ ... }` block with a span.
#[derive(Debug, PartialEq)]
pub struct BlockBody<'src, 'arena> {
pub statements: StatementList<'src, 'arena>,
pub span: AstSpan,
}
/// Stable arena reference to an expression node.
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
/// Optional expression payload used in grammar positions where an expression
/// may be omitted entirely.
pub type OptionalExpression<'src, 'arena> = Option<ExpressionRef<'src, 'arena>>;
/// Body of a control-flow branch.
///
/// Branch bodies are stored separately so constructs such as `if`, `while`,
/// and `for` can preserve both the parsed body and branch-specific source
/// details.
#[derive(Debug, PartialEq)]
pub struct BranchBody<'src, 'arena> {
/// Parsed branch payload.
///
/// This is `None` when the body is absent or could not be parsed in a
/// recoverable way.
pub expression: Option<ExpressionRef<'src, 'arena>>,
/// Optional semicolon that appears immediately after a non-block branch
/// body in legacy constructs such as `if`, `for`, `while`, etc.
///
/// This is intentionally preserved rather than normalized away so later
/// stages can diagnose or reproduce source structure more precisely.
pub semicolon_position: Option<TokenPosition>,
/// Token position that can be used as a fallback end anchor for spans and
/// diagnostics when the body itself is missing.
///
/// In malformed constructs this may be the only reliable location attached
/// to the branch.
pub end_anchor_token_position: TokenPosition,
}
/// One `case` arm inside a `switch`.
///
/// UnrealScript-style syntax allows each arm to have multiple labels and uses
/// statement lists as bodies, with fallthrough being possible unless control
/// flow terminates explicitly.
#[derive(Debug, PartialEq)]
pub struct SwitchCase<'src, 'arena> {
/// Case labels associated with this arm.
///
/// Labels are stored as expressions because the language allows
/// expression-valued labels rather than only simple constants.
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>,
/// Statements belonging to the arm body.
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
}
/// Stable arena reference to a `switch` case arm.
pub type SwitchCaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
impl<'arena> Expression<'_, 'arena> {
/// Construct a binary expression and assign it a span from `left_hand_side`
/// through `right_hand_side`.
#[must_use]
pub fn new_binary(
arena: &'arena Arena,
left_hand_side: ArenaNode<'arena, Self>,
op: InfixOperator,
right_hand_side: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::merge(left_hand_side.span(), right_hand_side.span());
ArenaNode::new_in(
Self::Binary(left_hand_side, op, right_hand_side),
span,
arena,
)
}
/// Construct a prefix unary expression and assign it a span from the
/// operator token through the end of `right_hand_side`.
#[must_use]
pub fn new_prefix(
arena: &'arena Arena,
operation_position: TokenPosition,
operation: PrefixOperator,
right_hand_side: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::range(operation_position, right_hand_side.span().token_to);
ArenaNode::new_in(Self::PrefixUnary(operation, right_hand_side), span, arena)
}
/// Construct a postfix unary expression and assign it a span from the start
/// of `left_hand_side` through the operator token.
#[must_use]
pub fn new_postfix(
arena: &'arena Arena,
left_hand_side: ArenaNode<'arena, Self>,
operation: PostfixOperator,
operation_position: TokenPosition,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::range(left_hand_side.span().token_from, operation_position);
ArenaNode::new_in(Self::PostfixUnary(left_hand_side, operation), span, arena)
}
}

343
rottlib/src/ast/mod.rs Normal file
View File

@ -0,0 +1,343 @@
// `;` are encoded in spans of statement nodes as very last token
// Need to do a proper check to figure out what should and shouldn't be a node
use crate::arena::ArenaVec;
use super::lexer::TokenPosition;
use crate::arena::{Arena, ArenaNode, ArenaString};
pub mod callables;
pub mod expressions;
pub mod operators;
pub mod types;
pub use callables::*;
pub use expressions::*;
pub use operators::*;
pub use types::*;
// Get rid of identifier field
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct IdentifierToken(pub TokenPosition);
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct OperatorToken(pub TokenPosition);
#[derive(Debug, Hash, PartialEq, Eq)]
pub struct QualifiedIdentifier<'arena> {
pub head: IdentifierToken,
pub tail: Option<ArenaVec<'arena, IdentifierToken>>, // None => single segment
}
pub type QualifiedIdentifierRef<'arena> = ArenaNode<'arena, QualifiedIdentifier<'arena>>;
// All inclusive!
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub struct AstSpan {
pub token_from: TokenPosition,
pub token_to: TokenPosition,
}
impl AstSpan {
// -------- existing coord-based API (unchanged externally) --------
#[must_use]
pub const fn merge(left_span: &Self, right_span: &Self) -> Self {
Self {
// assumes both were constructed in the same style; good enough for the refactor
token_from: left_span.token_from,
token_to: right_span.token_to,
}
}
// -------- NEW: 4 constructors based on TokenIndex --------
/// Single-token span from an index (coords are dummy for now).
#[inline]
#[must_use]
pub const fn new(single_index: TokenPosition) -> Self {
Self {
token_from: single_index,
token_to: single_index,
}
}
/// Span from two indices (coords are dummy for now).
#[inline]
#[must_use]
pub const fn range(from: TokenPosition, to: TokenPosition) -> Self {
Self {
token_from: from,
token_to: to,
}
}
/// Immutable extension by index (keeps coords as-is).
#[inline]
#[must_use]
pub fn extended(&self, right_most_index: TokenPosition) -> Self {
Self {
token_from: self.token_from,
token_to: std::cmp::max(self.token_to, right_most_index),
}
}
/// In-place extension by index (coords unchanged).
#[inline]
pub fn extend_to(&mut self, right_most_index: TokenPosition) {
if right_most_index > self.token_to {
self.token_to = right_most_index;
}
}
}
impl<'arena> QualifiedIdentifier<'arena> {
#[inline]
#[must_use]
pub const fn is_single(&self) -> bool {
self.tail.is_none()
}
#[inline]
#[allow(clippy::len_without_is_empty)] // Suppress useless suggestion for `is_empty()`
#[must_use]
pub fn len(&self) -> usize {
1 + self.tail.as_ref().map_or(0, |v| v.len())
}
#[inline]
#[must_use]
pub const fn head(&self) -> IdentifierToken {
self.head
}
/// Iterates all identifier segments in order without allocating.
pub fn iter(&self) -> impl Iterator<Item = IdentifierToken> + '_ {
core::iter::once(self.head).chain(self.tail.iter().flat_map(|v| v.iter().copied()))
}
/// Cheap constructor from a single identifier. No Vec allocated.
pub fn from_ident(arena: &'arena Arena, id: IdentifierToken) -> QualifiedIdentifierRef<'arena> {
let span = AstSpan::new(id.0);
ArenaNode::new_in(
Self {
head: id,
tail: None,
},
span,
arena,
)
}
/// Cheap constructor from a single identifier. No Vec allocated.
pub fn from_position(
arena: &'arena Arena,
position: TokenPosition,
) -> QualifiedIdentifierRef<'arena> {
let span = AstSpan::new(position);
ArenaNode::new_in(
Self {
head: IdentifierToken(position),
tail: None,
},
span,
arena,
)
}
}
#[derive(Debug, PartialEq)]
pub enum Statement<'src, 'arena> {
// For the cases where user just used too many semi-colons `;;;;`
Empty,
Expression(ExpressionRef<'src, 'arena>),
// Just declarations without assignment:
// `local int i, j, k`
LocalVariableDeclaration {
type_spec: TypeSpecifierRef<'src, 'arena>,
declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // CHANGED
},
Label(ArenaString<'arena>),
/// Nested function definitions inside blocks or states.
Function(CallableDefinitionRef<'src, 'arena>),
// For injecting in place of parts that couldn't be parsed
Error,
}
pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>;
#[derive(Debug)]
pub enum DeclarationLiteral<'src, 'arena> {
None,
Bool(bool),
Integer(i128),
Float(f64),
String(ArenaString<'arena>),
Identifier(&'src str),
TaggedName {
tag: IdentifierToken,
quoted: ArenaString<'arena>,
}, // NEW
}
#[derive(Debug)]
pub struct DeclarationLiteralRef<'src, 'arena> {
pub literal: DeclarationLiteral<'src, 'arena>,
pub position: TokenPosition,
}
impl IdentifierToken {
#[must_use]
pub const fn span(self) -> AstSpan {
AstSpan::new(self.0)
}
}
pub enum ClassModifier<'arena> {
Final,
Native,
Abstract,
Transient,
Public,
Protected,
Private,
Static,
Config(Option<IdentifierToken>),
NativeReplication,
ExportStructs,
SafeReplace,
Const,
Deprecated,
NoExport,
Export,
Localized,
Placeable,
NotPlaceable,
Instanced,
EditConst,
EditInline,
EditInlineNew,
NotEditInlineNew,
CollapseCategories,
DontCollapseCategories,
HideCategories(ArenaVec<'arena, IdentifierToken>),
ShowCategories(ArenaVec<'arena, IdentifierToken>),
Within(IdentifierToken),
DependsOn(IdentifierToken),
GlobalConfig,
PerObjectConfig,
DynamicRecompile,
HideDropdown,
ParseConfig,
CacheExempt,
}
pub type ClassModifierRef<'arena> = ArenaNode<'arena, ClassModifier<'arena>>;
pub struct ClassDeclaration<'arena> {
pub name: IdentifierToken,
pub parent: Option<IdentifierToken>,
pub modifiers: Vec<ClassModifierRef<'arena>>,
}
// --- in ast.rs ---
#[derive(Debug)]
pub struct ClassVarDecl<'src, 'arena> {
/// var(<...>) e.g. var(Display, "Advanced")
/// Each item is an `ArenaNode`, so token locations are preserved.
pub paren_specs: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
/// variable modifiers like public/protected/private/static/const/...
/// Each modifier is an `ArenaNode` capturing its span; order preserved.
pub modifiers: ArenaVec<'arena, VarModifier>,
pub type_spec: TypeSpecifierRef<'src, 'arena>, // Named/InlineEnum/InlineStruct
pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // a, b=expr
pub span: AstSpan,
}
pub type ClassVarDeclRef<'src, 'arena> = ArenaNode<'arena, ClassVarDecl<'src, 'arena>>;
#[derive(Debug)]
pub struct ClassConstDecl<'src, 'arena> {
pub name: IdentifierToken,
pub value: DeclarationLiteralRef<'src, 'arena>,
pub span: AstSpan,
}
pub type ClassConstDeclRef<'src, 'arena> = ArenaNode<'arena, ClassConstDecl<'src, 'arena>>;
pub enum ClassMember<'src, 'arena>
where
'src: 'arena,
{
Function(CallableDefinitionRef<'src, 'arena>),
TypeDefEnum(EnumDefRef<'src, 'arena>),
TypeDefStruct(StructDefRef<'src, 'arena>),
Var(ClassVarDeclRef<'src, 'arena>),
Replication(ReplicationBlockRef<'src, 'arena>),
State(StateDeclRef<'src, 'arena>),
Const(ClassConstDeclRef<'src, 'arena>),
Exec(ExecDirectiveRef<'arena>),
}
pub type ClassMemberRef<'src, 'arena> = ArenaNode<'arena, ClassMember<'src, 'arena>>;
#[derive(Clone, Copy, Debug)]
pub enum Reliability {
Reliable,
Unreliable,
}
#[derive(Debug)]
pub struct ReplicationRule<'src, 'arena> {
pub reliability: Reliability, // reliable|unreliable
pub condition: Option<ExpressionRef<'src, 'arena>>, // if (<expr>) or None
pub members: ArenaVec<'arena, IdentifierToken>, // a, b, Foo()
pub span: AstSpan,
}
pub type ReplicationRuleRef<'src, 'arena> = ArenaNode<'arena, ReplicationRule<'src, 'arena>>;
#[derive(Debug)]
pub struct ReplicationBlock<'src, 'arena> {
pub rules: ArenaVec<'arena, ReplicationRuleRef<'src, 'arena>>,
pub span: AstSpan,
}
pub type ReplicationBlockRef<'src, 'arena> = ArenaNode<'arena, ReplicationBlock<'src, 'arena>>;
// ---------- States ----------
#[derive(Clone, Copy, Debug)]
pub enum StateModifier {
Auto, // 'auto'
Simulated, // 'simulated'
}
#[derive(Debug)]
pub struct StateDecl<'src, 'arena> {
pub name: IdentifierToken,
pub parent: Option<IdentifierToken>, // 'extends BaseState'
pub modifiers: ArenaVec<'arena, StateModifier>, // auto, simulated
pub ignores: Option<ArenaVec<'arena, IdentifierToken>>, // 'ignores Foo, Bar;'
/// Body: ordinary statements plus nested function definitions (see `Statement::Function`).
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
pub span: AstSpan,
}
pub type StateDeclRef<'src, 'arena> = ArenaNode<'arena, StateDecl<'src, 'arena>>;
// NEW: exec directive node
#[derive(Debug)]
pub struct ExecDirective<'arena> {
pub text: ArenaString<'arena>, // full line without trailing newline(s)
pub span: AstSpan,
}
pub type ExecDirectiveRef<'arena> = ArenaNode<'arena, ExecDirective<'arena>>;
/// Keep your existing `ClassDeclaration` as the header.
/// Optionally: `pub type ClassHeader<'src, 'arena> = ClassDeclaration<'src, 'arena>;`
pub struct ClassDefinition<'src, 'arena>
where
'src: 'arena,
{
pub header: ClassDeclaration<'arena>, // or ClassHeader if you rename
pub members: ArenaVec<'arena, ClassMemberRef<'src, 'arena>>,
}

View File

@ -0,0 +1,268 @@
//! Operator AST nodes.
//!
//! This module defines the prefix, postfix, and infix operator kinds used by
//! expression AST nodes.
//!
//! The enums here represent only the *syntactic operator category* recorded in
//! the AST. They do not encode precedence, associativity, overload behavior,
//! or token spelling details beyond the normalized operator kind itself.
//! Those concerns are handled by the expression parser and precedence tables.
use crate::lexer::{Keyword, Token, TokenPosition};
use core::convert::TryFrom;
/// Prefix unary operators.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum PrefixOperator {
/// Logical negation: `!expr`.
Not,
/// Arithmetic negation: `-expr`.
Minus,
/// Unary plus: `+expr`.
Plus,
/// Bitwise negation: `~expr`.
BitwiseNot,
/// Prefix increment: `++expr`.
Increment,
/// Prefix decrement: `--expr`.
Decrement,
}
/// Postfix unary operators.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum PostfixOperator {
/// Postfix increment: `expr++`.
Increment,
/// Postfix decrement: `expr--`.
Decrement,
}
/// Binary / infix operators.
///
/// These operators appear between left-hand side and right-hand side operands.
/// This enum stores only the normalized AST-level operator kind.
///
/// The parser assigns precedence and associativity separately.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum InfixOperator {
/// Simple assignment: `left_hand_side = right_hand_side`.
Assign,
/// Multiplicative assignment: `left_hand_side *= right_hand_side`.
MultiplyAssign,
/// Division assignment: `left_hand_side /= right_hand_side`.
DivideAssign,
/// Modulo assignment: `left_hand_side %= right_hand_side`.
ModuloAssign,
/// Additive assignment: `left_hand_side += right_hand_side`.
PlusAssign,
/// Subtractive assignment: `left_hand_side -= right_hand_side`.
MinusAssign,
/// String concatenation assignment: `left_hand_side $= right_hand_side`.
ConcatAssign,
/// Space-concatenation assignment: `left_hand_side @= right_hand_side`.
ConcatSpaceAssign,
/// String concatenation without inserted whitespace:
/// `left_hand_side $ right_hand_side`.
Concat,
/// String concatenation with an inserted space:
/// `left_hand_side @ right_hand_side`.
ConcatSpace,
/// Logical conjunction: `left_hand_side && right_hand_side`.
And,
/// Logical exclusive-or: `left_hand_side ^^ right_hand_side`.
Xor,
/// Logical disjunction: `left_hand_side || right_hand_side`.
Or,
/// Bitwise AND: `left_hand_side & right_hand_side`.
BitwiseAnd,
/// Bitwise OR: `left_hand_side | right_hand_side`.
BitwiseOr,
/// Bitwise XOR: `left_hand_side ^ right_hand_side`.
BitwiseXor,
/// Inequality test: `left_hand_side != right_hand_side`.
NotEqual,
/// Equality test: `left_hand_side == right_hand_side`.
Equal,
/// Approximate equality test: `left_hand_side ~= right_hand_side`.
ApproximatelyEqual,
/// Less-than comparison: `left_hand_side < right_hand_side`.
Less,
/// Less-than-or-equal comparison: `left_hand_side <= right_hand_side`.
LessEqual,
/// Greater-than comparison: `left_hand_side > right_hand_side`.
Greater,
/// Greater-than-or-equal comparison: `left_hand_side >= right_hand_side`.
GreaterEqual,
/// UnrealScript-specific directional comparison:
/// `left_hand_side ClockwiseFrom right_hand_side`.
ClockwiseFrom,
/// Left shift: `left_hand_side << right_hand_side`.
LeftShift,
/// Logical right shift: `left_hand_side >>> right_hand_side`.
LogicalRightShift,
/// Arithmetic / ordinary right shift: `left_hand_side >> right_hand_side`.
RightShift,
/// Addition: `left_hand_side + right_hand_side`.
Plus,
/// Subtraction: `left_hand_side - right_hand_side`.
Minus,
/// Remainder / modulo: `left_hand_side % right_hand_side`.
Modulo,
/// Multiplication: `left_hand_side * right_hand_side`.
Multiply,
/// Division: `left_hand_side / right_hand_side`.
Divide,
/// Dot product: `left_hand_side Dot right_hand_side`.
///
/// This is spelled as a keyword-level operator in source.
Dot,
/// Cross product: `left_hand_side Cross right_hand_side`.
///
/// This is spelled as a keyword-level operator in source.
Cross,
/// Exponentiation: `left_hand_side ** right_hand_side`.
Exponentiation,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PrefixOperatorName {
pub kind: PrefixOperator,
pub position: TokenPosition,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct InfixOperatorName {
pub kind: InfixOperator,
pub position: TokenPosition,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PostfixOperatorName {
pub kind: PostfixOperator,
pub position: TokenPosition,
}
impl TryFrom<Token> for PostfixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PostfixOperator::{Decrement, Increment};
match token {
Token::Increment => Ok(Increment),
Token::Decrement => Ok(Decrement),
_ => Err(()),
}
}
}
impl TryFrom<Token> for PrefixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PrefixOperator::{BitwiseNot, Decrement, Increment, Minus, Not, Plus};
match token {
Token::Not => Ok(Not),
Token::Minus => Ok(Minus),
Token::Plus => Ok(Plus),
Token::BitwiseNot => Ok(BitwiseNot),
Token::Increment => Ok(Increment),
Token::Decrement => Ok(Decrement),
_ => Err(()),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) struct InfixOperatorInfo {
pub operator: InfixOperator,
pub right_precedence_rank: u8,
}
pub(crate) const fn infix_operator_info(token: Token) -> Option<InfixOperatorInfo> {
use InfixOperator::{
And, ApproximatelyEqual, Assign, BitwiseAnd, BitwiseOr, BitwiseXor, ClockwiseFrom, Concat,
ConcatAssign, ConcatSpace, ConcatSpaceAssign, Cross, Divide, DivideAssign, Dot, Equal,
Exponentiation, Greater, GreaterEqual, LeftShift, Less, LessEqual, LogicalRightShift,
Minus, MinusAssign, Modulo, ModuloAssign, Multiply, MultiplyAssign, NotEqual, Or, Plus,
PlusAssign, RightShift, Xor,
};
let (precedence_rank, operator) = match token {
Token::Exponentiation => (12, Exponentiation),
Token::Multiply => (16, Multiply),
Token::Divide => (16, Divide),
Token::Keyword(Keyword::Cross) => (16, Cross),
Token::Keyword(Keyword::Dot) => (16, Dot),
Token::Modulo => (18, Modulo),
Token::Plus => (20, Plus),
Token::Minus => (20, Minus),
Token::LeftShift => (22, LeftShift),
Token::RightShift => (22, RightShift),
Token::LogicalRightShift => (22, LogicalRightShift),
Token::Less => (24, Less),
Token::LessEqual => (24, LessEqual),
Token::Greater => (24, Greater),
Token::GreaterEqual => (24, GreaterEqual),
Token::Equal => (24, Equal),
Token::ApproximatelyEqual => (24, ApproximatelyEqual),
Token::Keyword(Keyword::ClockwiseFrom) => (24, ClockwiseFrom),
Token::NotEqual => (26, NotEqual),
Token::BitwiseAnd => (28, BitwiseAnd),
Token::BitwiseXor => (28, BitwiseXor),
Token::BitwiseOr => (28, BitwiseOr),
Token::LogicalAnd => (30, And),
Token::LogicalXor => (30, Xor),
Token::LogicalOr => (32, Or),
Token::MultiplyAssign => (34, MultiplyAssign),
Token::DivideAssign => (34, DivideAssign),
Token::PlusAssign => (34, PlusAssign),
Token::MinusAssign => (34, MinusAssign),
Token::Assign => (34, Assign),
Token::ModuloAssign => (34, ModuloAssign),
Token::Concat => (40, Concat),
Token::ConcatSpace => (40, ConcatSpace),
Token::ConcatAssign => (44, ConcatAssign),
Token::ConcatSpaceAssign => (44, ConcatSpaceAssign),
_ => return None,
};
Some(InfixOperatorInfo {
operator,
right_precedence_rank: precedence_rank,
})
}
impl TryFrom<Token> for InfixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
infix_operator_info(token)
.map(|info| info.operator)
.ok_or(())
}
}

277
rottlib/src/ast/types.rs Normal file
View File

@ -0,0 +1,277 @@
//! Type-specifier and declaration AST nodes.
//!
//! This module defines syntactic forms used to represent type names, inline
//! type declarations, variable declarators, and declaration modifiers.
use super::{AstSpan, ExpressionRef, IdentifierToken, QualifiedIdentifierRef};
use crate::arena::{ArenaNode, ArenaString, ArenaVec};
use crate::lexer::{Keyword, Token, TokenPosition};
use core::convert::TryFrom;
/// Type syntax used in declarations, fields, and other type-annotated grammar
/// positions.
///
/// This enum covers both named types and inline type-definition forms supported
/// by the language.
#[derive(Debug, PartialEq)]
pub enum TypeSpecifier<'src, 'arena> {
/// Named type reference such as `EDrawType` or `Pkg.Group.Type`.
Named(QualifiedIdentifierRef<'arena>),
/// Inline enum definition used directly in type position.
///
/// Example:
/// `enum EMyKind { A, B, C }`
InlineEnum(EnumDefRef<'src, 'arena>),
/// Inline struct definition used directly in type position.
///
/// Example:
/// `struct SMyData { var int X; }`
InlineStruct(StructDefRef<'src, 'arena>),
/// Generic array type: `array<...>`.
///
/// The parser currently allows a sequence of variable-style modifiers to
/// appear before the inner type and preserves them here.
Array {
/// Modifiers parsed before the inner type inside `array<...>`.
element_modifiers: ArenaVec<'arena, VarModifier>,
/// Element / inner type.
element_type: TypeSpecifierRef<'src, 'arena>,
},
/// `class` or `class<SomeType>`.
///
/// `None` represents a bare `class` with no type argument.
Class(Option<QualifiedIdentifierRef<'arena>>),
}
/// Stable arena reference to a type-specifier node.
pub type TypeSpecifierRef<'src, 'arena> = ArenaNode<'arena, TypeSpecifier<'src, 'arena>>;
/// Enum definition used either inline in a type position or elsewhere in the
/// declaration grammar.
#[derive(Debug, PartialEq, Eq)]
pub struct EnumDefinition<'arena> {
/// Declared enum name.
pub name: IdentifierToken,
/// Enum variants in source order.
pub variants: ArenaVec<'arena, IdentifierToken>,
}
/// Stable arena reference to an enum definition.
pub type EnumDefRef<'src, 'arena> = ArenaNode<'arena, EnumDefinition<'arena>>;
/// Struct-level modifier kind.
///
/// These are modifiers that apply to the struct declaration itself rather than
/// to an individual field.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum StructModifierKind {
Native,
Export,
NoExport,
Transient,
Deprecated,
Init,
Long,
}
/// Struct declaration modifier together with its source token position.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct StructModifier {
/// Modifier kind.
pub kind: StructModifierKind,
/// Position of the modifier token in the source stream.
pub position: TokenPosition,
}
impl StructModifier {
/// Span covering just this modifier token.
#[must_use]
pub const fn span(self) -> AstSpan {
AstSpan::new(self.position)
}
/// Construct a struct modifier from kind and token position.
#[must_use]
pub const fn new(kind: StructModifierKind, token: TokenPosition) -> Self {
Self {
kind,
position: token,
}
}
}
/// Struct field declaration.
///
/// A field stores the declared type together with one or more declarators
/// sharing that type, plus optional `var(...)` editor specifiers and ordinary
/// declaration modifiers.
#[derive(Debug, PartialEq)]
pub struct StructField<'src, 'arena> {
/// Field type.
pub type_specifier: TypeSpecifierRef<'src, 'arena>,
/// One or more declarators declared with the same type.
///
/// Examples:
/// - `var int A;`
/// - `var int A, B[4], C = 10;`
pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
/// Optional `var(...)` editor specifiers attached to the field declaration.
///
/// Example:
/// `var(Display, "Advanced/Hidden")`
pub editor_specifiers: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
/// Declaration modifiers attached to the field.
///
/// These are preserved in source order.
pub declaration_modifiers: ArenaVec<'arena, VarModifier>,
}
/// Stable arena reference to a struct field declaration.
pub type StructFieldRef<'src, 'arena> = ArenaNode<'arena, StructField<'src, 'arena>>;
/// Struct definition used either inline in a type position or elsewhere in the
/// declaration grammar.
#[derive(Debug, PartialEq)]
pub struct StructDefinition<'src, 'arena> {
/// Struct name, if present.
///
/// Anonymous inline structs use `None`.
pub name: Option<IdentifierToken>,
/// Optional base struct after `extends`.
pub base_type_name: Option<QualifiedIdentifierRef<'arena>>,
/// Modifiers attached to the struct declaration itself.
pub modifiers: ArenaVec<'arena, StructModifier>,
/// Struct fields in source order.
pub fields: ArenaVec<'arena, StructFieldRef<'src, 'arena>>,
}
/// Stable arena reference to a struct definition.
pub type StructDefRef<'src, 'arena> = ArenaNode<'arena, StructDefinition<'src, 'arena>>;
/// One declared variable name together with optional array size and initializer.
///
/// This node represents one declarator inside a declaration that may contain
/// several comma-separated declarators sharing the same type.
#[derive(Debug, PartialEq)]
pub struct VariableDeclarator<'src, 'arena> {
/// Declared variable name.
pub name: IdentifierToken,
/// Optional initializer after `=`.
pub initializer: Option<ExpressionRef<'src, 'arena>>,
/// Optional array-size expression from `[expr]`.
pub array_size: Option<ExpressionRef<'src, 'arena>>,
}
/// Stable arena reference to a variable declarator.
///
/// The node span is expected to cover the entire declarator, not only the
/// identifier token.
pub type VariableDeclaratorRef<'src, 'arena> = ArenaNode<'arena, VariableDeclarator<'src, 'arena>>;
/// One item inside `var(...)` editor specifiers.
#[derive(Debug, PartialEq, Eq)]
pub enum VarEditorSpecifier<'arena> {
/// Identifier-like editor specifier such as `Display` or `Advanced`.
Identifier(IdentifierToken),
/// String editor specifier such as `"Category/Sub"`.
String(ArenaString<'arena>),
}
/// Stable arena reference to an editor specifier.
pub type VarEditorSpecifierRef<'src, 'arena> = ArenaNode<'arena, VarEditorSpecifier<'arena>>;
/// Field / variable declaration modifier kind.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum VarModifierKind {
Transient,
Public,
Protected,
Private,
Static,
Const,
Deprecated,
NoExport,
Export,
Config,
Localized,
GlobalConfig,
PerObjectConfig,
Input,
EdFindable,
EditConst,
EditConstArray,
EditInline,
EditInlineUse,
EditInlineNew,
EditInlineNotify,
NotEditInlineNew,
Automated,
Native,
Travel,
Cache,
}
/// Variable-style declaration modifier together with its token position.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct VarModifier {
/// Modifier kind.
pub kind: VarModifierKind,
/// Position of the modifier token in the source stream.
pub position: TokenPosition,
}
impl TryFrom<Keyword> for VarModifierKind {
type Error = ();
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
use VarModifierKind::{
Automated, Cache, Config, Const, Deprecated, EdFindable, EditConst, EditConstArray,
EditInline, EditInlineNew, EditInlineNotify, EditInlineUse, Export, GlobalConfig,
Input, Localized, Native, NoExport, NotEditInlineNew, PerObjectConfig, Private,
Protected, Public, Static, Transient, Travel,
};
let kind = match keyword {
Keyword::Transient => Transient,
Keyword::Public => Public,
Keyword::Protected => Protected,
Keyword::Private => Private,
Keyword::Static => Static,
Keyword::Const => Const,
Keyword::Deprecated => Deprecated,
Keyword::NoExport => NoExport,
Keyword::Export => Export,
Keyword::Config => Config,
Keyword::Localized => Localized,
Keyword::GlobalConfig => GlobalConfig,
Keyword::PerObjectConfig => PerObjectConfig,
Keyword::EdFindable => EdFindable,
Keyword::EditConst => EditConst,
Keyword::EditConstArray => EditConstArray,
Keyword::EditInline => EditInline,
Keyword::EditInlineUse => EditInlineUse,
Keyword::EditInlineNew => EditInlineNew,
Keyword::EditInlineNotify => EditInlineNotify,
Keyword::NotEditInlineNew => NotEditInlineNew,
Keyword::Automated => Automated,
Keyword::Native => Native,
Keyword::Input => Input,
Keyword::Travel => Travel,
Keyword::Cache => Cache,
_ => return Err(()),
};
Ok(kind)
}
}
impl TryFrom<(Token, TokenPosition)> for VarModifier {
type Error = ();
fn try_from((token, position): (Token, TokenPosition)) -> Result<Self, Self::Error> {
let Token::Keyword(keyword) = token else {
return Err(());
};
let kind = VarModifierKind::try_from(keyword)?;
Ok(Self { kind, position })
}
}

View File

@ -0,0 +1,190 @@
use super::{Diagnostic, DiagnosticBuilder};
use crate::ast::AstSpan;
use crate::lexer::TokenPosition;
use crate::parser::{ParseError, ParseErrorKind};
use std::convert::From;
fn diagnostic_parenthesized_expression_empty(
error: ParseError,
left_parenthesis_position: TokenPosition,
) -> Diagnostic {
DiagnosticBuilder::error("empty parenthesized expression")
.primary_label(error.blame_span, "expected an expression before this `)`")
.secondary_label(
AstSpan::new(left_parenthesis_position),
"parenthesized expression starts here",
)
.help("Remove the parentheses or put an expression inside them.")
.build()
}
fn diagnostic_class_type_missing_type_argument(
error: ParseError,
left_angle_bracket_position: TokenPosition,
) -> Diagnostic {
DiagnosticBuilder::error("missing type argument in `class<...>`")
.primary_label(error.blame_span, "expected a type name here")
.secondary_label(
AstSpan::new(left_angle_bracket_position),
"type argument list starts here",
)
.help("Write a type name, for example `class<Pawn>`.")
.build()
}
fn diagnostic_class_type_missing_closing_angle_bracket(
error: ParseError,
left_angle_bracket_position: TokenPosition,
) -> Diagnostic {
DiagnosticBuilder::error("missing closing `>` in `class<...>`")
.primary_label(error.blame_span, "expected `>` here")
.secondary_label(
AstSpan::new(left_angle_bracket_position),
"this `<` starts the type argument",
)
.help("Add `>` to close the class type expression.")
.build()
}
fn diagnostic_parenthesized_expression_missing_closing_parenthesis(
error: ParseError,
left_parenthesis_position: TokenPosition,
) -> Diagnostic {
DiagnosticBuilder::error("missing closing `)`")
.primary_label(error.blame_span, "expected `)` here")
.secondary_label(
AstSpan::new(left_parenthesis_position),
"this `(` starts the parenthesized expression",
)
.help("Add `)` to close the expression.")
.build()
}
fn diagnostic_expression_expected(error: ParseError) -> Diagnostic {
let mut builder = DiagnosticBuilder::error("expected expression")
.primary_label(error.blame_span, "this token cannot start an expression")
.help(
"Expressions can start with literals, identifiers, `(`, `{`, or expression keywords.",
);
if let Some(related_span) = error.related_span {
builder = builder.secondary_label(related_span, "expression context starts here");
}
builder.build()
}
fn diagnostic_class_type_invalid_type_argument(
error: ParseError,
left_angle_bracket_position: TokenPosition,
) -> Diagnostic {
DiagnosticBuilder::error("invalid type argument in `class<...>`")
.primary_label(error.blame_span, "expected a qualified type name here")
.secondary_label(
AstSpan::new(left_angle_bracket_position),
"type argument list starts here",
)
.note("Only a qualified type name is accepted between `<` and `>` here.")
.build()
}
fn diagnostic_new_too_many_arguments(
error: ParseError,
left_parenthesis_position: TokenPosition,
) -> Diagnostic {
DiagnosticBuilder::error("too many arguments in `new(...)`")
.primary_label(error.blame_span, "unexpected extra argument")
.secondary_label(
AstSpan::new(left_parenthesis_position),
"this argument list accepts at most three arguments",
)
.note("The three slots are `outer`, `name`, and `flags`.")
.help("Remove the extra argument.")
.build()
}
fn diagnostic_new_missing_closing_parenthesis(
error: ParseError,
left_parenthesis_position: TokenPosition,
) -> Diagnostic {
DiagnosticBuilder::error("missing closing `)` in `new(...)`")
.primary_label(error.blame_span, "expected `)` here")
.secondary_label(
AstSpan::new(left_parenthesis_position),
"this argument list starts here",
)
.help("Add `)` to close the argument list.")
.build()
}
fn diagnostic_new_missing_class_specifier(
error: ParseError,
new_keyword_position: TokenPosition,
) -> Diagnostic {
let mut builder = DiagnosticBuilder::error("missing class specifier in `new` expression")
.primary_label(
error.blame_span,
"expected the class or expression to instantiate here",
)
.secondary_label(
AstSpan::new(new_keyword_position),
"`new` expression starts here",
)
.help("Add the class or expression to instantiate after `new` or `new(...)`.");
if let Some(related_span) = error.related_span {
builder = builder.secondary_label(related_span, "optional `new(...)` arguments end here");
}
builder.build()
}
impl From<ParseError> for Diagnostic {
fn from(error: ParseError) -> Self {
match error.kind {
ParseErrorKind::ParenthesizedExpressionEmpty {
left_parenthesis_position,
} => diagnostic_parenthesized_expression_empty(error, left_parenthesis_position),
ParseErrorKind::ClassTypeMissingTypeArgument {
left_angle_bracket_position,
} => diagnostic_class_type_missing_type_argument(error, left_angle_bracket_position),
ParseErrorKind::ClassTypeMissingClosingAngleBracket {
left_angle_bracket_position,
} => diagnostic_class_type_missing_closing_angle_bracket(
error,
left_angle_bracket_position,
),
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
left_parenthesis_position,
} => diagnostic_parenthesized_expression_missing_closing_parenthesis(
error,
left_parenthesis_position,
),
ParseErrorKind::ExpressionExpected => diagnostic_expression_expected(error),
ParseErrorKind::ClassTypeInvalidTypeArgument {
left_angle_bracket_position,
} => diagnostic_class_type_invalid_type_argument(error, left_angle_bracket_position),
ParseErrorKind::NewTooManyArguments {
left_parenthesis_position,
} => diagnostic_new_too_many_arguments(error, left_parenthesis_position),
ParseErrorKind::NewMissingClosingParenthesis {
left_parenthesis_position,
} => diagnostic_new_missing_closing_parenthesis(error, left_parenthesis_position),
ParseErrorKind::NewMissingClassSpecifier {
new_keyword_position,
} => diagnostic_new_missing_class_specifier(error, new_keyword_position),
_ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind))
.primary_label(error.covered_span, "happened here")
.build(),
}
}
}

View File

@ -4,6 +4,9 @@
//! parsing or doing lightweight frontend checks. They are intentionally small, //! parsing or doing lightweight frontend checks. They are intentionally small,
//! depend only on [`AstSpan`], and are easy to construct and store. //! depend only on [`AstSpan`], and are easy to construct and store.
mod expression;
mod render;
use crate::ast::AstSpan; use crate::ast::AstSpan;
/// Classification of a diagnostic by its impact. /// Classification of a diagnostic by its impact.
@ -110,43 +113,51 @@ impl Diagnostic {
} }
/// Returns `true` iff severity is [`Severity::Error`]. /// Returns `true` iff severity is [`Severity::Error`].
#[must_use]
pub fn stops_compilation(&self) -> bool { pub fn stops_compilation(&self) -> bool {
self.severity == Severity::Error self.severity == Severity::Error
} }
/// Returns the diagnostic code if present. /// Returns the diagnostic code if present.
/// ///
/// See [DiagnosticBuilder::code] for code scheme. /// See [`DiagnosticBuilder::code`] for code scheme.
#[must_use]
pub fn code(&self) -> Option<&str> { pub fn code(&self) -> Option<&str> {
self.code.as_deref() self.code.as_deref()
} }
/// Returns the primary label, if any. /// Returns the primary label, if any.
pub fn primary_label(&self) -> Option<&Label> { #[must_use]
pub const fn primary_label(&self) -> Option<&Label> {
self.primary_label.as_ref() self.primary_label.as_ref()
} }
/// Returns the secondary labels in insertion order. /// Returns the secondary labels in insertion order.
#[must_use]
pub fn secondary_labels(&self) -> &[Label] { pub fn secondary_labels(&self) -> &[Label] {
&self.secondary_labels &self.secondary_labels
} }
/// Returns the headline. /// Returns the headline.
#[must_use]
pub fn headline(&self) -> &str { pub fn headline(&self) -> &str {
&self.headline &self.headline
} }
/// Returns the severity. /// Returns the severity.
pub fn severity(&self) -> Severity { #[must_use]
pub const fn severity(&self) -> Severity {
self.severity self.severity
} }
/// Returns the notes. /// Returns the notes.
#[must_use]
pub fn notes(&self) -> &[String] { pub fn notes(&self) -> &[String] {
&self.notes &self.notes
} }
/// Returns the help message, if any. /// Returns the help message, if any.
#[must_use]
pub fn help(&self) -> Option<&str> { pub fn help(&self) -> Option<&str> {
self.help.as_deref() self.help.as_deref()
} }

View File

@ -0,0 +1,491 @@
use crate::ast::AstSpan;
use crate::diagnostics::{self, Diagnostic, Severity};
use crate::lexer::TokenizedFile;
use core::convert::Into;
use crossterm::style::Stylize;
use crossterm::terminal::disable_raw_mode;
use std::cmp::max;
use std::collections::HashMap;
use std::ops::RangeInclusive;
const INDENT: &str = " ";
const MAX_LINES_LIMIT: usize = 10;
/*
error: expected one of `,`, `:`, or `}`, found `token_to`
--> rottlib/src/ast/mod.rs:80:13
|
78 | Self {
| ---- while parsing this struct
79 | token_from: self.token_from,scd
| --- while parsing this struct field
80 | token_to: std::cmp::max(self.token_to, right_most_index),
| ^^^^^^^^ expected one of `,`, `:`, or `}`
*/
/*
|
76 | / "asdasdas
77 | | asd1
78 | | asd2
79 | | asdasd"
| |___________________^ expected `()`, found `&str`
*/
/*
1. Get each span's range and total lines covered by spans as ranges;
2. We need `+N` more lines for `N` labels;
3.
*/
// These are abstract rendering events, not self-contained draw commands.
// They are emitted in increasing order of "significant lines" (range starts/ends).
// The actual source span for a label is recovered later from its LabelType.
#[derive(PartialEq, Eq, Clone, Copy)]
enum RendererCommands {
StartRange {
label_type: LabelType,
column: usize,
},
FinishRange {
label_type: LabelType,
column: usize,
},
SingleRange {
label_type: LabelType,
},
}
enum LineIndexType {
Normal(usize),
Missing,
Ellipsis,
}
// Label ordering is semantic: primary first, then secondaries in diagnostic order.
// That order is also used to break visual ties when multiple labels would otherwise
// start or end on the same source line.
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
enum LabelType {
Primary,
Secondary(usize),
}
struct RangeSet {
primary_range: Option<RangeInclusive<usize>>,
secondary_ranges: Vec<RangeInclusive<usize>>,
}
impl RangeSet {
fn get(&self, index: usize) -> Option<&RangeInclusive<usize>> {
if self.primary_range.is_some() {
if index == 0 {
return self.primary_range.as_ref();
} else {
self.secondary_ranges.get(index - 1)
}
} else {
self.secondary_ranges.get(index)
}
}
fn len(&self) -> usize {
self.secondary_ranges.len() + if self.primary_range.is_some() { 1 } else { 0 }
}
fn iter(&self) -> impl Iterator<Item = &RangeInclusive<usize>> {
self.primary_range
.iter()
.chain(self.secondary_ranges.iter())
}
fn iter_labeled(&self) -> impl Iterator<Item = (LabelType, &RangeInclusive<usize>)> {
self.primary_range
.iter()
.map(|range| (LabelType::Primary, range))
.chain(
self.secondary_ranges
.iter()
.enumerate()
.map(|(index, range)| (LabelType::Secondary(index), range)),
)
}
fn get_first_bound_above(&self, line_number: Option<usize>) -> Option<usize> {
self.iter()
.filter_map(|range| {
let start = *range.start();
let end = *range.end();
let start_ok = line_number.is_none_or(|n| start > n).then_some(start);
let end_ok = line_number.is_none_or(|n| end > n).then_some(end);
match (start_ok, end_ok) {
(Some(a), Some(b)) => Some(a.min(b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
}
})
.min()
}
}
// Converts labeled line ranges into an ordered stream of renderer events.
//
// Important invariants:
//
// 1. Commands are ordered by increasing significant line.
// A significant line is any line on which some label starts or ends.
//
// 2. If multiple labels would visually terminate on the same source line,
// the renderer treats them as ending on distinct phantom rows, ordered by
// diagnostic priority (primary/secondary order). This prevents intersections
// and means that same-line closings are intentionally linearized rather than
// treated as a geometric tie.
//
// 3. RendererCommands do not store source line numbers directly.
// Later rendering recovers the underlying span from LabelType and uses the
// event order to know when labels become active/inactive.
//
// 4. When a label starts on the same significant line where another label ends,
// starts are processed first. This is intentional: longer-lived/opening labels
// must occupy earlier columns so that shorter-lived/closing labels bend around
// them without intersecting.
fn make_renderer_commands(ranges: RangeSet) -> Vec<(usize, RendererCommands)> {
// Maps currently-open labels to the index of their StartRange command so that
// we can patch in the final column once the label closes.
let mut open_ranges = HashMap::new();
let mut commands = Vec::new();
let mut current_line = None;
while let Some(next_significant_line) = ranges.get_first_bound_above(current_line) {
current_line = Some(next_significant_line);
// First process all new ranges because they'll live longer and have
// to have earlier columns
for (label, range) in ranges.iter_labeled() {
if *range.start() == next_significant_line {
if range.start() != range.end() {
commands.push((
*range.start(),
RendererCommands::StartRange {
label_type: label,
column: 0,
},
));
open_ranges.insert(label, commands.len() - 1);
} else {
commands.push((
*range.start(),
RendererCommands::SingleRange { label_type: label },
));
}
}
}
// Closing pass.
// The assigned column is the number of ranges that remain open after removing
// this label. Because same-line visual ties are already linearized by label
// priority / phantom rows, processing labels in iter_labeled() order is
// intentional here.
for (label, range) in ranges.iter_labeled() {
if *range.end() == next_significant_line {
if let Some(index) = open_ranges.remove(&label) {
// Column meaning:
// 0 = outermost / earliest lane
// larger values = further inward lanes
//
// We assign the column at close time, not at open time, because the final lane
// depends on which other ranges outlive this one.
let column = open_ranges.len();
if let Some((line_number, RendererCommands::StartRange { .. })) =
commands.get(index)
{
commands[index] = (
*line_number,
RendererCommands::StartRange {
label_type: label,
column,
},
);
}
commands.push((
*range.end(),
RendererCommands::FinishRange {
label_type: label,
column,
},
));
}
}
}
}
commands
}
fn max_line_number_width(ranges: &RangeSet) -> usize {
let max_line = ranges.iter().map(|range| *range.end()).max().unwrap_or(0);
if max_line == 0 {
1
} else {
max_line.ilog10() as usize + 1
}
}
fn span_to_range<'src>(span: AstSpan, file: &TokenizedFile<'src>) -> Option<RangeInclusive<usize>> {
let start_line = file.token_line(span.token_from)?;
let end_line = file.token_line(span.token_to)?;
if start_line <= end_line {
Some(start_line..=end_line)
} else {
None
}
}
fn make_ranges<'src>(file: &TokenizedFile<'src>, diagnostic: &Diagnostic) -> RangeSet {
let mut result = RangeSet {
primary_range: None,
secondary_ranges: Vec::new(),
};
result.primary_range = diagnostic
.primary_label()
.and_then(|label| span_to_range(label.span, file));
for secondary in diagnostic.secondary_labels() {
if let Some(range) = span_to_range(secondary.span, file) {
result.secondary_ranges.push(range);
}
}
result
}
impl Diagnostic {
pub fn render<'src>(&self, file: &TokenizedFile<'src>, file_path: impl Into<String>) {
self.render_header();
println!("{INDENT}{}: {}", "in file".blue().bold(), file_path.into());
self.render_lines(file);
}
/*StartRange {
label_type: LabelType,
column: usize,
},
FinishRange {
label_type: LabelType,
},
SingleRange {
label_type: LabelType,
}, */
fn label_data(&self, label_type: LabelType) -> Option<(AstSpan, String)> {
match label_type {
LabelType::Primary => self
.primary_label()
.map(|label| (label.span, label.message.clone())),
LabelType::Secondary(id) => Some((
self.secondary_labels()[id].span,
self.secondary_labels()[id].message.clone(),
)),
}
}
fn render_lines<'src>(&self, file: &TokenizedFile<'src>) {
let ranges = make_ranges(file, &self);
let max_line_number_width = max(max_line_number_width(&ranges), 3);
let commands = make_renderer_commands(ranges);
let mut max_column = 0;
for command in &commands {
if let (_, RendererCommands::StartRange { column, .. }) = command {
max_column = max(max_column, *column);
}
}
let mut vertical_stack = Vec::new();
vertical_stack.resize(max_column + 1, None);
let mut i = 0;
while i < commands.len() {
let mut current_line = commands[i].0;
let mut single_commands = Vec::new();
let mut start_commands = Vec::new();
let mut finish_commands = Vec::new();
while i < commands.len() && current_line == commands[i].0 {
match commands[i].1 {
RendererCommands::SingleRange { label_type } => {
single_commands.push(label_type)
}
RendererCommands::StartRange { label_type, column } => {
start_commands.push((label_type, column));
}
RendererCommands::FinishRange { label_type, column } => {
finish_commands.push((label_type, column))
}
}
i += 1;
}
// !!!!!!!!!!!!!!!!
// First - update line drawing stack
for (label_type, column) in start_commands {
vertical_stack[column] = Some(label_type);
}
// Next - draw the line
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
for label_type in single_commands {
self.render_single_command(
label_type,
max_line_number_width,
file,
&vertical_stack,
);
}
// Next - render finish commands (drop for now)
for (label_type, column) in finish_commands {
self.render_single_command(
label_type,
max_line_number_width,
file,
&vertical_stack,
);
vertical_stack[column] = None;
}
// !!!!!!!!!!!!!!!!
// Render some more lines
let mut countdown = 3;
current_line += 1;
while current_line < commands[i].0 {
if countdown == 0 {
if current_line + 1 == commands[i].0 {
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
} else {
println!(
"{}",
self.make_line_prefix(
LineIndexType::Ellipsis,
max_line_number_width,
&vertical_stack
)
);
}
break;
} else {
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
}
current_line += 1;
countdown -= 1;
}
}
}
fn render_single_command<'src>(
&self,
label_type: LabelType,
max_line_number_width: usize,
file: &TokenizedFile<'src>,
vertical_stack: &[Option<LabelType>],
) {
let Some((span, message)) = self.label_data(label_type) else {
return;
};
let Some(visible) = file.span_visible_on_line(span) else {
return;
};
let mut builder = self.make_line_prefix(
LineIndexType::Missing,
max_line_number_width,
vertical_stack,
);
builder.push_str(&" ".repeat(visible.columns.start));
let underline_width = (visible.columns.end - visible.columns.start).max(1);
let mut underline_label = "^".repeat(underline_width);
underline_label.push_str(&format!(" {}", message));
match label_type {
LabelType::Primary => {
if self.severity == Severity::Error {
builder.push_str(&underline_label.red().bold().to_string());
} else {
builder.push_str(&underline_label.yellow().bold().to_string());
}
}
LabelType::Secondary(_) => {
builder.push_str(&underline_label.blue().bold().to_string());
}
}
println!("{builder}");
}
fn draw_line<'src>(
&self,
current_line: usize,
max_line_number_width: usize,
file: &TokenizedFile<'src>,
vertical_stack: &[Option<LabelType>],
) {
println!(
"{}{}",
self.make_line_prefix(
LineIndexType::Normal(current_line),
max_line_number_width,
vertical_stack
),
file.line_text(current_line).unwrap_or_default()
);
}
fn make_line_prefix<'src>(
&self,
current_line: LineIndexType,
max_line_number_width: usize,
vertical_stack: &[Option<LabelType>],
) -> String {
let line_text = match current_line {
LineIndexType::Normal(current_line) => (current_line + 1).to_string(),
LineIndexType::Missing => "".to_string(),
LineIndexType::Ellipsis => "...".to_string(),
};
let line_padding = " ".repeat(max_line_number_width - line_text.len());
let mut builder = format!(" {}{} | ", line_padding, line_text)
.blue()
.bold()
.to_string();
for vertical_line in vertical_stack {
if let Some(label) = vertical_line {
let piece = match label {
LabelType::Primary => {
if self.severity == Severity::Error {
" |".red()
} else {
" |".yellow()
}
}
LabelType::Secondary(_) => " |".blue(),
}
.to_string();
builder.push_str(&piece);
} else {
builder.push_str(" ");
}
}
builder
}
fn render_header(&self) {
let severity_label = match self.severity {
Severity::Error => "error".red(),
Severity::Warning => "warning".yellow(),
};
if let Some(ref code) = self.code {
println!(
"{}",
format!("{}[{}]: {}", severity_label, code, self.headline).bold()
);
} else {
println!(
"{}",
format!("{}: {}", severity_label, self.headline).bold()
);
}
}
}

View File

@ -1,83 +0,0 @@
//! Debug-only helpers for [`TokenizedFile`]
//!
//! This module is **compiled only if**
//!
//! * the current build profile has `debug_assertions` enabled, or
//! * the crate is built with the `debug` cargo feature.
//!
//! These checks have been moved to the parent module.
/// A technical trait that adds debug helpers to the lexer.
pub trait DebugTools {
/// Pretty-prints the internal layout of the tokenised file - useful when
/// writing new passes or hunting lexer bugs.
///
/// This method writes the layout directly to standard output.
///
/// The format is unspecified, may change, and is not intended for
/// external tools.
///
/// Each line in the printed layout starts with its 0-based number for
/// convenience.
fn dump_debug_layout(&self);
/// Reconstructs the exact, lossless source text that was fed to
/// [`super::TokenizedFile::from_source`] from internal representation -
/// useful for manually verifying that the lexer works.
fn reconstruct_source(&self) -> String;
}
impl<'src> DebugTools for super::TokenizedFile<'src> {
fn reconstruct_source(&self) -> String {
self.buffer.iter().map(|span| span.lexeme).collect()
}
fn dump_debug_layout(&self) {
for (row_idx, line) in self.lines.iter().enumerate() {
println!("Line {}", row_idx + 1);
match (line.continued_from, line.local_range()) {
// Stand-alone line (all tokens start here)
(None, Some(range)) => {
println!("\t[Standalone]");
dump_spans(&self.buffer[range.clone()]);
}
// Pure continuation - the only thing on this line is
// the remainder of a multi-line token that started earlier.
(Some(origin_row), None) => {
println!(
"\t[Continued from line {} - no new tokens here]",
origin_row + 1
);
}
// Continuation **plus** some fresh tokens that begin here.
(Some(origin_row), Some(range)) => {
println!("\t[Continued from line {} + new tokens]", origin_row + 1);
dump_spans(&self.buffer[range.clone()]);
}
// An empty physical line (should be rare, but let's be safe).
(None, None) => {
println!("\t[Empty line]");
}
}
}
}
}
/// Helper that prints every span in `spans` together with its UTF-16
/// column boundaries.
fn dump_spans<'src>(spans: &[super::TokenPiece<'src>]) {
let mut col_utf16 = 0usize;
for span in spans {
let start = col_utf16;
let end = start + span.length_utf16;
println!(
"\t\t{:?} @ {}-{}: {:?}",
span.token, start, end, span.lexeme
);
col_utf16 = end;
}
}

View File

@ -1,200 +0,0 @@
//! Sub-module that adds an iterator to [`TokenizedFile`] which yields tokens in
//! the order they appear in the source code.
//!
//! ## Examples
//!
//! ```rust
//! let iter = TokenizedFile::from_str("0 / 0").tokens().without_whitespace();
//! ```
//!
//! ## Terminology: continued tokens
//!
//! Some [`super::Token`]s (e.g. [`super::Token::CppText`] or
//! [`super::Token::BlockComment`] can span multiple lines and are recorded on
//! every line on which they appear (usually as the first, and sometimes
//! the only, token).
//! In this module these are referred to as "continued" or
//! "carried-over" tokens.
//! Since our iterator needs to return each token only once, we take special
//! care to skip such continued tokens during iteration.
use super::{TokenLocation, TokenPiece, TokenizedFile};
/// An immutable iterator over all tokens in a [`TokenizedFile`], preserving
/// their order of appearance in the original source file.
///
/// After exhaustion it keeps returning [`None`].
#[must_use]
#[derive(Clone, Debug)]
pub struct Tokens<'src> {
/// [`TokenLocation`] of the next token to be returned.
cursor: TokenLocation,
/// [`TokenizedFile`] whose tokens we're iterating over.
source_file: &'src TokenizedFile<'src>,
/// When `true`, whitespace tokens are skipped.
skip_whitespace: bool,
}
// Because we can only return [`None`] after we've returned it once.
impl<'src> std::iter::FusedIterator for Tokens<'src> {}
impl<'src> Tokens<'src> {
/// Makes the iterator skip all whitespace tokens.
#[must_use]
#[inline]
pub fn without_whitespace(mut self) -> Self {
self.skip_whitespace = true;
self
}
// Returns the position of the next new token, skipping carried-over pieces
// and blank lines.
fn advance_position(&self, position: TokenLocation) -> TokenLocation {
let TokenLocation::Position {
mut line,
mut column,
} = position
else {
return TokenLocation::EndOfFile;
};
if let Some(current_line) = self.source_file.lines.get(line) {
// `Line::len()` also counts a possible token that continued from
// the previous line.
if column + 1 < current_line.len() {
column += 1;
return TokenLocation::Position { line, column };
}
}
// Current line is exhausted: walk downward until we find the first line
// that **owns local tokens**, because we only want *new* token,
// not continued from previous lines (they were already iterated over).
line += 1;
while let Some(next_line) = self.source_file.lines.get(line) {
if next_line.local_range().is_some() {
// Start at the first *local* token,
// skipping any carried-over one
column = if next_line.continued_from.is_some() {
1
} else {
0
};
return TokenLocation::Position { line, column };
}
line += 1; // keep skipping empty / pure-carried lines
}
// No more tokens.
TokenLocation::EndOfFile
}
// Creates a new iterator.
fn new(source_file: &'src TokenizedFile) -> Tokens<'src> {
let mut new_iterator = Tokens {
source_file,
cursor: TokenLocation::Position { line: 0, column: 0 },
skip_whitespace: false,
};
// We need to land on the first existing token so [`Iterator::next`]
// can assume cursor is valid.
while new_iterator.cursor != TokenLocation::EndOfFile {
if new_iterator.source_file.get(new_iterator.cursor).is_some() {
break;
}
new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor);
}
new_iterator
}
}
impl<'src> Iterator for Tokens<'src> {
type Item = (TokenLocation, TokenPiece<'src>);
fn next(&mut self) -> Option<Self::Item> {
// We only ever loop to discard whitespaces when the flag is on
while self.cursor != TokenLocation::EndOfFile {
let token_location = self.cursor;
let token_piece = *self.source_file.get(self.cursor)?;
self.cursor = self.advance_position(self.cursor);
// Optional whitespace-skip
if !self.skip_whitespace || !token_piece.token.is_whitespace() {
return Some((token_location, token_piece));
}
}
None
}
}
impl<'src> TokenizedFile<'src> {
// Returns the final local token in `line_number`
// (used to resolve column 0 of a continued line).
fn last_piece_in_line(&self, line_number: usize) -> Option<&TokenPiece> {
self.lines
.get(line_number)
.and_then(|line| line.local_range())
// `Line::local_range()` is guaranteed to return non-empty `Range`.
.and_then(|range| self.buffer.get(range.end - 1))
}
/// Returns [`TokenPiece`] at a given location if it exists.
///
/// If the line specified by [`TokenLocation`] starts with a token that
/// continues from the previous line - column `0` refers to that token.
///
/// Never panics, invalid position returns [`None`].
///
/// ## Examples
///
/// ```rust
/// use super::{TokenizedFile, TokenLocation, Token};
/// let file = TokenizedFile::from_str("0 / 0");
/// assert_eq!(
/// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),
/// Some(Token::Divide),
/// );
/// ```
#[track_caller]
pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> {
let TokenLocation::Position { line, column } = position else {
return None;
};
let line = self.lines.get(line)?;
let column = column;
if column >= line.len() {
return None;
}
if let Some(spanned_line_number) = line.continued_from
&& column == 0
{
self.last_piece_in_line(spanned_line_number)
} else {
// If we have a token that continued from the previous line,
// then, relative to `self.buffer`, our `column` is actually 1-based
// and we need to shift it back to being 0-based.
let token_position =
line.local_range.start + column - if line.continued_from.is_some() { 1 } else { 0 };
self.buffer.get(token_position)
}
}
/// Returns an iterator over all contained tokens in the order they appear
/// in the original source file.
///
/// By default includes all tokens, including whitespace and comments.
///
/// Returns the same iterator as [`TokenizedFile::into_iter`]
#[must_use]
#[inline]
pub fn tokens(&'src self) -> Tokens<'src> {
Tokens::new(self)
}
}
impl<'src> IntoIterator for &'src TokenizedFile<'src> {
type Item = (TokenLocation, TokenPiece<'src>);
type IntoIter = Tokens<'src>;
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.tokens()
}
}

View File

@ -1,526 +0,0 @@
//! Lexer for UnrealScript that understands inline `cpptext { ... }` blocks.
//!
//! ## Notable details
//!
//! Lexer for UnrealScript that recognizes inline `cpptext { ... }` blocks.
//!
//! In UnrealScript, `cpptext` lets authors embed raw C++ between braces.
//! Because whitespace, newlines, or comments may appear between the
//! `cpptext` keyword and the opening `{`, the lexer must remember that
//! it has just seen `cpptext` - hence a state machine.
//!
//! ## Modes
//!
//! - **Normal** - ordinary UnrealScript tokens.
//! - **AwaitingCppBlock** - after `cpptext`, waiting for the next `{`.
//!
//! When that brace arrives, the lexer consumes the entire C++ block as
//! one token (`Token::Brace(BraceKind::CppBlock)`), tracking nested
//! braces, strings, and comments on the way. If the closing `}` is
//! missing, everything to EOF is treated as C++; downstream parsers must
//! handle that gracefully.
use logos::Lexer;
/// Which lexer mode we're in. See the module docs for the full story.
#[derive(Default, Clone, Copy, PartialEq, Eq)]
enum LexerMode {
/// Lexing regular UnrealScript.
#[default]
Normal,
/// Saw `cpptext`; waiting for the opening `{` of a C++ block.
AwaitingCppBlock,
}
/// Extra per-lexer state. Currently just holds the [`Mode`].
///
/// This is a logos-specific implementation detail.
#[derive(Default)]
pub struct LexerState {
mode: LexerMode,
}
/// Are these braces "real" UnrealScript braces, or the start/end of a C++ block?
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum BraceKind {
Normal,
CppBlock,
}
/// All UnrealScript tokens that our compiler distinguishes.
#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[logos(extras = LexerState)]
pub enum Token {
// # Compiler/directive keywords
#[regex(r"(?i)#exec[^\r\n]*(\r|\n|\r\n)")]
ExecDirective,
#[regex("(?i)cpptext", |lex| { lex.extras.mode = LexerMode::AwaitingCppBlock; })]
CppText,
// # Declaration & structural keywords
#[regex("(?i)class")]
Class,
#[regex("(?i)struct")]
Struct,
#[regex("(?i)enum")]
Enum,
#[regex("(?i)state")]
State,
#[regex("(?i)function")]
Function,
#[regex("(?i)event")]
Event,
#[regex("(?i)delegate")]
Delegate,
#[regex("(?i)var")]
Var,
#[regex("(?i)local")]
Local,
// # Inheritance, interface, dependencies
#[regex("(?i)extends")]
Extends,
#[regex("(?i)dependson")]
DependsOn,
// # Access modifiers & properties
#[regex("(?i)private")]
Private,
#[regex("(?i)protected")]
Protected,
#[regex("(?i)public")]
Public,
#[regex("(?i)const")]
Const,
#[regex("(?i)static")]
Static,
#[regex("(?i)native")]
Native,
#[regex("(?i)abstract")]
Abstract,
#[regex("(?i)deprecated")]
Deprecated,
// # UnrealScript metadata/specifiers
#[regex("(?i)default")]
Default,
#[regex("(?i)defaultproperties")]
DefaultProperties,
#[regex("(?i)optional")]
Optional,
#[regex("(?i)config")]
Config,
#[regex("(?i)perobjectconfig")]
PerObjectConfig,
#[regex("(?i)globalconfig")]
GlobalConfig,
#[regex("(?i)collapsecategories")]
CollapseCategories,
#[regex("(?i)dontcollapsecategories")]
DontCollapseCategories,
#[regex("(?i)hidecategories")]
HideCategories,
#[regex("(?i)localized")]
Localized,
#[regex("(?i)placeable")]
Placeable,
#[regex("(?i)notplaceable")]
NotPlaceable,
#[regex("(?i)editinlinenew")]
EditInlineNew,
#[regex("(?i)noteditinlinenew")]
NotEditInlineNew,
#[regex("(?i)dynamicrecompile")]
DynamicRecompile,
#[regex("(?i)transient")]
Transient,
#[regex("(?i)operator")]
Operator,
#[regex("(?i)simulated")]
Simulated,
#[regex("(?i)latent")]
Latent,
#[regex("(?i)iterator")]
Iterator,
#[regex("(?i)out")]
Out,
#[regex("(?i)skip")]
Skip,
#[regex("(?i)singular")]
Singular,
#[regex("(?i)coerce")]
Coerce,
#[regex("(?i)assert")]
Assert,
#[regex("(?i)ignores")]
Ignores,
#[regex("(?i)within")]
Within,
#[regex("(?i)noexport")]
NoExport,
// # Replication-related
#[regex("(?i)reliable")]
Reliable,
#[regex("(?i)unreliable")]
Unreliable,
#[regex("(?i)replication")]
Replication,
#[regex("(?i)nativereplication")]
NativeReplication,
// # Control-flow keywords
#[regex("(?i)goto")]
Goto,
#[regex("(?i)if")]
If,
#[regex("(?i)else")]
Else,
#[regex("(?i)switch")]
Switch,
#[regex("(?i)case")]
Case,
#[regex("(?i)for")]
For,
#[regex("(?i)foreach")]
ForEach,
#[regex("(?i)while")]
While,
#[regex("(?i)do")]
Do,
#[regex("(?i)until")]
Until,
#[regex("(?i)break")]
Break,
#[regex("(?i)continue")]
Continue,
#[regex("(?i)return")]
Return,
// # Built-in types
#[regex("(?i)int")]
Int,
#[regex("(?i)float")]
Float,
#[regex("(?i)bool")]
Bool,
#[regex("(?i)byte")]
Byte,
#[regex("(?i)string")]
String,
#[regex("(?i)array")]
Array,
#[regex("(?i)name")]
Name,
// # Literals & identifiers
#[regex(r"0[xX][0-9A-Fa-f]+|[0-9]+")]
IntegerLiteral,
#[regex(r"[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?")]
FloatLiteral,
#[regex(r#""([^"\\\r\n]|\\.)*""#)]
StringLiteral,
#[regex(r"'[a-zA-Z0-9_\. \-]*'")]
NameLiteral,
#[regex("(?i)true")]
True,
#[regex("(?i)false")]
False,
#[regex("(?i)none")]
None,
#[regex("(?i)self")]
SelfKeyword,
#[regex("(?i)new")]
New,
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
// # Operations
// ## Exponentiation
#[token("**")]
Exponentiation,
// ## Unary
#[token("++")]
Increment,
#[token("--")]
Decrement,
#[token("!")]
Not,
#[token("~")]
BitwiseNot,
// ## Vector
#[regex("(?i)dot")]
Dot,
#[regex("(?i)cross")]
Cross,
// ## Multiplicative
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("%")]
Modulo,
// ## Additive
#[token("+")]
Plus,
#[token("-")]
Minus,
// ## String manipulation
#[token("@")]
ConcatSpace,
#[token("$")]
Concat,
// ## Shifts
#[token("<<")]
LeftShift,
#[token(">>>")]
LogicalRightShift,
#[token(">>")]
RightShift,
// ## Relational
#[token("<")]
Less,
#[token("<=")]
LessEqual,
#[token(">")]
Greater,
#[token(">=")]
GreaterEqual,
#[token("==")]
Equal,
#[token("!=")]
NotEqual,
#[token("~=")]
ApproximatelyEqual,
#[regex("(?i)clockwisefrom")]
ClockwiseFrom,
// ## Bitwise
#[token("&")]
BitwiseAnd,
#[token("|")]
BitwiseOr,
#[token("^")]
BitwiseXor,
// ## Logical
#[token("&&")]
And,
#[token("^^")]
Xor,
#[token("||")]
Or,
// ## Assigments
#[token("=")]
Assign,
#[token("*=")]
MultiplyAssign,
#[token("/=")]
DivideAssign,
#[token("%=")]
ModuloAssign,
#[token("+=")]
PlusAssign,
#[token("-=")]
MinusAssign,
#[token("$=")]
ConcatAssign,
#[token("@=")]
ConcatSpaceAssign,
// # Punctuation & delimiters
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("{", handle_brace)]
Brace(BraceKind),
#[token("}")]
RightBrace,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token(";")]
Semicolon,
#[token(",")]
Comma,
#[token(".")]
Period,
#[token(":")]
Colon,
#[token("#")]
Hash,
#[token("?")]
Question,
// # Comments & whitespaces
#[regex(r"//[^\r\n]*")]
LineComment,
#[regex(r"/\*", handle_block_comment)]
BlockComment,
#[regex(r"\r\n|\n|\r")]
Newline,
#[regex(r"[ \t]+")]
Whitespace,
// # Technical
Error,
}
impl Token {
/// Returns `true` if this token is a newline (`Token::NewLine`).
pub fn is_newline(&self) -> bool {
matches!(self, Token::Newline)
}
/// Returns `true` if this token is trivia whitespace
/// (`Token::Whitespace` or `Token::NewLine`).
///
/// Note: comments are **not** considered whitespace.
pub fn is_whitespace(&self) -> bool {
matches!(&self, Token::Whitespace | Token::Newline)
}
/// Returns `true` if this token may span multiple physical lines
/// (i.e. can contain newline characters).
pub fn can_span_lines(&self) -> bool {
matches!(
self,
Token::BlockComment | Token::Brace(BraceKind::CppBlock) | Token::Error
)
}
/// Returns `true` if this token can appear in type position
/// (either a built-in type keyword or an identifier).
pub fn is_valid_type_name_token(&self) -> bool {
matches!(
self,
Token::Int
| Token::Float
| Token::Bool
| Token::Byte
| Token::String
| Token::Array
| Token::Name
| Token::Identifier
)
}
}
/// Consume a /* ... */ block comment with arbitrary nesting
/// (like UnrealScript allows).
///
/// Matches the whole comment (delimiters included) or [`None`] if the file ends
/// before every `/*` is closed.
fn handle_block_comment(lexer: &mut Lexer<Token>) -> Option<()> {
let mut comment_depth = 1;
while let Some(next_char) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("/*") {
comment_depth += 1;
lexer.bump(2);
continue;
}
if lexer.remainder().starts_with("*/") {
comment_depth -= 1;
lexer.bump(2);
if comment_depth == 0 {
return Some(());
}
continue;
}
lexer.bump(next_char.len_utf8());
}
// Unterminated comment
None
}
/// Called for every `{`.
///
/// This method either emits an opening brace or token for `cppblock`,
/// depending on lexer's current state.
fn handle_brace(lexer: &mut Lexer<Token>) -> Option<BraceKind> {
match lexer.extras.mode {
LexerMode::Normal => Some(BraceKind::Normal),
LexerMode::AwaitingCppBlock => {
lexer.extras.mode = LexerMode::Normal;
consume_cpp_block(lexer);
Some(BraceKind::CppBlock)
}
}
}
/// Consumes a complete C++ block, handling:
/// - Nested `{...}` pairs
/// - String literals (`"..."` and `'...'`), including escaped quotes
/// - Line comments (`// ...\n`)
/// - Block comments (`/* ... */`)
///
/// Leaves the lexer positioned immediately after the closing `}` of the block.
/// The opening `{` must have already been consumed by the caller.
fn consume_cpp_block(lexer: &mut Lexer<Token>) {
let mut depth = 1;
while let Some(ch) = lexer.remainder().chars().next() {
match ch {
'{' => {
depth += 1;
lexer.bump(1);
}
'}' => {
depth -= 1;
lexer.bump(1);
if depth == 0 {
break;
}
}
'/' if lexer.remainder().starts_with("/*") => {
lexer.bump(2); // consuming two-byte sequence `/*`
consume_c_comment(lexer)
}
'/' if lexer.remainder().starts_with("//") => {
lexer.bump(2); // consuming two-byte sequence `//`
while let Some(c) = lexer.remainder().chars().next() {
lexer.bump(c.len_utf8());
if c == '\n' {
break;
}
}
}
'"' | '\'' => {
lexer.bump(1); // skip `'` or `"`
consume_string_literal(lexer, ch);
}
_ => lexer.bump(ch.len_utf8()),
}
}
}
/// Consume over a C-style `/* ... */` comment (without nesting).
///
/// Assumes that opener `/*` is already consumed.
fn consume_c_comment(lexer: &mut Lexer<Token>) {
while let Some(next_character) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("*/") {
lexer.bump(2);
break;
} else {
lexer.bump(next_character.len_utf8());
}
}
}
/// Consume a string literal from C++ code.
///
/// Assumes that opening quotation mark is already consumed.
fn consume_string_literal(lexer: &mut Lexer<Token>, delimiter: char) {
while let Some(next_character) = lexer.remainder().chars().next() {
lexer.bump(next_character.len_utf8());
if next_character == '\\' {
// Skip the escaped character
if let Some(next) = lexer.remainder().chars().next() {
lexer.bump(next.len_utf8());
}
} else if next_character == delimiter {
return;
}
}
}

View File

@ -2,7 +2,8 @@
//! //!
//! Converts raw source text into a lossless, position-aware stream of lexical //! Converts raw source text into a lossless, position-aware stream of lexical
//! [`Token`]s, grouped *per physical line*, and returns it as //! [`Token`]s, grouped *per physical line*, and returns it as
//! a [`TokenizedFile`]. //! a [`TokenizedFile`]. A trailing newline terminates the last physical line
//! rather than introducing an additional empty line.
//! //!
//! Design goals: //! Design goals:
//! //!
@ -12,191 +13,184 @@
//! precompute lengths of each token in that encoding, making interfacing //! precompute lengths of each token in that encoding, making interfacing
//! easier. //! easier.
//! //!
//! ## Iteration over tokens
//!
//! For simplicity we've moved out code for iterating over tokens of
//! [`TokenizedFile`] into a separate submodule [`iterator`].
//!
//! ## Opt-in debug helpers //! ## Opt-in debug helpers
//! //!
//! Extra diagnostics become available in **debug builds** or when the crate is //! Extra diagnostics become available in **debug builds** or when the crate is
//! compiled with `debug` feature enabled. They live in the [`debug_tools`] //! compiled with `debug` feature enabled. They live in the [`debug_tools`]
//! extension trait, implemented for [`TokenizedFile`]. //! extension trait, implemented for [`TokenizedFile`].
//!
//! ```rust
//! // bring the trait into scope
//! use lexer::DebugTools;
//!
//! let file = TokenizedFile::from_str("local int myValue;");
//! file.debug_dump(); // pretty-print token layout
//! let text = file.to_source(); // reconstruct original text
//! ```
mod debug_tools; mod queries;
mod iterator; mod raw_lexer;
mod lexing; #[cfg(test)]
mod tests;
mod token;
use std::collections::HashMap;
use std::ops::Range; use std::ops::Range;
use logos::Logos; use logos::Logos;
#[cfg(any(debug_assertions, feature = "debug"))] use raw_lexer::RawToken;
pub use debug_tools::DebugTools;
pub use iterator::Tokens; pub use raw_lexer::BraceKind;
pub use lexing::{BraceKind, Token}; pub use token::Keyword;
pub use token::Token;
/// Empirically chosen starting size for token buffer (used during tokenization) /// Empirically chosen starting size for token buffer (used during tokenization)
/// that provides good performance. /// that provides good performance.
const DEFAULT_TOKEN_BUFFER_CAPACITY: usize = 20_000; const DEFAULT_TOKEN_BUFFER_CAPACITY: usize = 20_000;
/// A slice tagged with its token kind plus two length counters. // TODO: check this!!!
/// Visible fragment of a token on one physical line.
/// ///
/// *No absolute coordinates* are stored - they are recomputed per line. /// `columns` is an end-exclusive range inside the string returned by
#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)] /// [`TokenizedFile::line_text`] for that line.
pub struct TokenPiece<'src> { #[derive(Clone, Debug, Hash, PartialEq, Eq)]
/// Token, represented by this [`TokenPiece`]. pub struct VisibleLineSpan {
pub line: usize,
pub columns: std::ops::Range<usize>,
}
/// A token together with its source text and precomputed UTF-16 length.
///
/// It does not store an absolute file position.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct TokenData<'src> {
/// Kind of token that was lexed.
pub token: Token, pub token: Token,
/// Underlying text that was lexed as the corresponding token. /// Underlying text that was lexed as the corresponding token.
pub lexeme: &'src str, pub lexeme: &'src str,
/// Length of the token in UTF-16 code units for the needs of easy seeking /// Length of the token in UTF-16 code units for the needs of easy seeking
/// using given LSP cursor coordinates (line + UTF-16 offset). /// using given LSP cursor coordinates (line + UTF-16 offset).
/// Precomputed for convenience. /// Precomputed for convenience.
pub length_utf16: usize, pub utf16_length: usize,
} }
/// Defines location of a token inside [`TokenizedFile`] in a form convenient /// 0-based index of a token within the file-wide token buffer.
/// for communicating through LSP. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct TokenPosition(pub usize);
pub enum TokenLocation {
/// Actual position of some token in the file.
Position {
/// 0-based line number.
line: usize,
/// 0-based index of a token in the line, possibly including the token that
/// has continued from the previous line.
///
/// Columns count tokens, not bytes or chars.
column: usize,
},
/// Position af the end-of-file.
EndOfFile,
}
/// A tokenized, lossless representation of an UnrealScript source file. /// A tokenized, lossless representation of an `UnrealScript` source file.
#[derive(Debug)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct TokenizedFile<'src> { pub struct TokenizedFile<'src> {
/// Arena of every token span in this file. /// Arena of every token span in this file.
buffer: Vec<TokenPiece<'src>>, buffer: Vec<TokenData<'src>>,
/// Mapping that provides an easy and efficient access to tokens by /// Mapping from physical line number to the tokens that belong to it.
/// line number.
lines: Vec<Line>, lines: Vec<Line>,
/// Mapping token index to ranges of bytes that correspond to
/// visible characters (i.e. all non line terminators) in its lines.
///
/// Records only exists for multiline tokens and ranges can be empty for
/// lines that only contain line break boundary.
multi_line_map: HashMap<BufferIndex, Vec<VisibleByteRange>>,
/// Simple flag for marking erroneous state. /// Simple flag for marking erroneous state.
had_errors: bool, had_errors: bool,
} }
/// Mutable state that encapsulates data needed during the tokenization loop. /// An immutable iterator over all tokens in a [`TokenizedFile`], preserving
/// their order of appearance in the original source file.
/// ///
/// Access to stored tokens is provided through the [`iterator::Tokens`] /// After exhaustion it keeps returning [`None`].
/// iterator. #[must_use]
#[derive(Clone, Debug)]
pub struct Tokens<'file, 'src> {
/// Position of the next token to be returned in the canonical file-wide
/// token arena.
cursor: TokenPosition,
/// [`TokenizedFile`] whose tokens we're iterating over.
source_file: &'file TokenizedFile<'src>,
}
/// Type for referring to line numbers.
type LineNumber = usize;
/// Type for specific tokens inside each [`Line`].
type BufferIndex = usize;
/// Type for describing sub-range of visible characters of a single line for
/// some token.
type VisibleByteRange = Range<usize>;
/// Representation of a single physical line of the source file.
///
/// Uses ranges instead of slices to avoid a self-referential relationship
/// with [`TokenizedFile`], which Rust forbids.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct Line {
/// Token that began on an earlier line (`None` for standalone lines).
continued_from: Option<LineNumber>,
/// Contiguous tokens that started on this line (`start >= end` iff empty).
local_range: Range<BufferIndex>,
}
/// Mutable state used while tokenizing a source file.
#[derive(Debug)]
struct Tokenizer<'src> { struct Tokenizer<'src> {
/// Arena that owns every [`TokenPiece`] produced for the file. /// Arena that owns every [`TokenData`] produced for the file.
buffer: Vec<TokenPiece<'src>>, buffer: Vec<TokenData<'src>>,
/// Mapping from physical line number to the tokens that belong to it. /// Mapping from physical line number to the tokens that belong to it.
lines: Vec<Line>, lines: Vec<Line>,
/// The current 0-based physical line number. /// Mapping token index to ranges of bytes that correspond to
/// visible characters in its lines.
multi_line_map: HashMap<BufferIndex, Vec<VisibleByteRange>>,
/// The 0-based physical line number that is currently being scanned.
line_number: usize, line_number: usize,
/// Index in [`Tokenizer::buffer`] where the current *line* starts. /// Points to the first token (index in [`Tokenizer::buffer`]) not yet
slice_start_index: usize, /// committed to `lines`, e.g. where the current *line* starts.
uncommitted_start_index: usize,
/// When a multi-line token is being scanned, stores the 0-based line /// When a multi-line token is being scanned, stores the 0-based line
/// on which it started; [`None`] otherwise. /// on which it started; [`None`] otherwise.
/// ///
/// `Some(line_idx)` iff the current line is within a multi-line token that /// `Some(line_number)` iff the current line is within a multi-line token
/// started on `line_idx`; it is consumed exactly once by /// that started on `line_number`; it is consumed exactly once by
/// [`Self::commit_current_line`]. /// [`Self::commit_current_line`].
multi_line_start: Option<usize>, multi_line_start_line: Option<LineNumber>,
/// Set to [`true`] if the lexer reported any error tokens. /// Set to `true` if the lexer reported any error tokens.
had_errors: bool, had_errors: bool,
} }
impl<'src> TokenizedFile<'src> { impl<'src> TokenizedFile<'src> {
/// Tokenize `source` and return a fresh [`TokenizedFile`]. /// Tokenizes `source` and returns a fresh [`TokenizedFile`].
/// ///
/// ## Examples /// Its output is lossless and groups resulting tokens by physical lines.
/// /// Error spans are preserved as [`Token::Error`].
/// ```rust
/// let source_text = "2 + 2 * 2".to_string();
/// let tokenized_file = TokenizedFile::from_str(&source_text);
/// ```
#[must_use] #[must_use]
pub fn from_str(source: &'src str) -> TokenizedFile<'src> { pub fn tokenize(source: &'src str) -> Self {
let mut tokenizer = Self::builder(); let mut tokenizer = Tokenizer::new();
let mut lexer = Token::lexer(source); let mut lexer = RawToken::lexer(source);
while let Some(token_result) = lexer.next() { while let Some(token_result) = lexer.next() {
// Add `Token:Error` manually, since Logos won't do it for us. // Add `Token::Error` manually, since Logos won't do it for us.
let token = token_result.unwrap_or_else(|_| { let token = token_result.unwrap_or_else(|()| {
tokenizer.had_errors = true; tokenizer.had_errors = true;
Token::Error RawToken::Error
}); });
let token_piece = make_token_piece(token, lexer.slice()); let token_piece = make_token_data(Token::from(token), lexer.slice());
tokenizer.process_token_piece(token_piece); tokenizer.process_token_piece(token_piece);
} }
tokenizer.into_tokenized_file() tokenizer.into_tokenized_file()
} }
/// Returns [`true`] if any erroneous tokens were produced during building /// Returns `true` if tokenization produced any error tokens.
/// of this [`TokenizedFile`]. #[must_use]
/// pub const fn has_errors(&self) -> bool {
/// ## Examples
///
/// ```rust
/// let tokenized_file = TokenizedFile::from_str("function test() {}");
/// if tokenized_file.has_errors() {
/// println!("Error while parsing file.");
/// }
/// ```
#[inline]
pub fn has_errors(&self) -> bool {
self.had_errors self.had_errors
} }
/// Create an empty tokenizer state with tuned buffer capacity. /// Returns an iterator over all contained tokens in the order they appear
fn builder() -> Tokenizer<'src> { /// in the original source file.
Tokenizer { ///
buffer: Vec::with_capacity(DEFAULT_TOKEN_BUFFER_CAPACITY), /// Returns pairs of position and token data: `(TokenPosition, TokenData)`.
lines: Vec::new(), pub const fn iter(&self) -> Tokens<'_, 'src> {
line_number: 0, Tokens::new(self)
slice_start_index: 0,
multi_line_start: None,
had_errors: false,
}
} }
} }
/// Type for indexing lines in a [`TokenizedFile`].
type LineIdx = usize;
/// Type for specific tokens inside each [`Line`].
type TokenIdx = usize;
/// Representation of a single physical line of the source file.
///
/// [`Range<TokenIdx>`] are used instead of slices to avoid creating
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct Line {
/// Token that began on an earlier line (`None` for standalone lines).
continued_from: Option<LineIdx>,
/// Contiguous tokens that started on this line (`start >= end` iff empty).
local_range: Range<TokenIdx>,
}
impl Line { impl Line {
/// Creates a standalone line that owns a contiguous slice in /// Creates a standalone line that owns a contiguous slice in
/// the [`TokenizedFile::buffer`] arena. /// the [`TokenizedFile::buffer`] arena.
#[inline] const fn standalone(locals: Range<BufferIndex>) -> Self {
fn standalone(locals: Range<TokenIdx>) -> Line { Self {
Line {
continued_from: None, continued_from: None,
local_range: locals, local_range: locals,
} }
@ -204,9 +198,8 @@ impl Line {
/// Creates a line that is part of a multi-line token started on /// Creates a line that is part of a multi-line token started on
/// another line, referencing the 0-based index of its origin. /// another line, referencing the 0-based index of its origin.
#[inline] const fn continued(carried: LineNumber) -> Self {
fn spanned(carried: LineIdx) -> Line { Self {
Line {
continued_from: Some(carried), continued_from: Some(carried),
local_range: 0..0, local_range: 0..0,
} }
@ -214,9 +207,8 @@ impl Line {
/// Creates a line that is part of a multi-line token started on /// Creates a line that is part of a multi-line token started on
/// another line and also contains additional tokens local to itself. /// another line and also contains additional tokens local to itself.
#[inline] const fn continued_with_tokens(carried: LineNumber, locals: Range<BufferIndex>) -> Self {
fn spanned_with_tokens(carried: LineIdx, locals: Range<TokenIdx>) -> Line { Self {
Line {
continued_from: Some(carried), continued_from: Some(carried),
local_range: locals, local_range: locals,
} }
@ -227,29 +219,31 @@ impl Line {
/// ///
/// [`None`] means there are no such tokens. Otherwise range is guaranteed /// [`None`] means there are no such tokens. Otherwise range is guaranteed
/// to not be empty. /// to not be empty.
#[inline] fn local_range(&self) -> Option<Range<BufferIndex>> {
fn local_range(&self) -> Option<Range<TokenIdx>> {
if self.local_range.is_empty() { if self.local_range.is_empty() {
None None
} else { } else {
Some(self.local_range.clone()) Some(self.local_range.clone())
} }
} }
/// Returns the number of tokens on this line.
///
/// Counts both tokens that started on this line and tokens that continued
/// from previous one.
#[inline]
fn len(&self) -> usize {
(if self.continued_from.is_some() { 1 } else { 0 })
+ (self.local_range.end - self.local_range.start)
}
} }
impl<'src> Tokenizer<'src> { impl<'src> Tokenizer<'src> {
/// Returns an empty tokenizer state.
fn new() -> Self {
Self {
buffer: Vec::with_capacity(DEFAULT_TOKEN_BUFFER_CAPACITY),
lines: Vec::new(),
multi_line_map: HashMap::new(),
line_number: 0,
uncommitted_start_index: 0,
multi_line_start_line: None,
had_errors: false,
}
}
/// Handles a token span and dispatches to the appropriate handler. /// Handles a token span and dispatches to the appropriate handler.
fn process_token_piece(&mut self, token_piece: TokenPiece<'src>) { fn process_token_piece(&mut self, token_piece: TokenData<'src>) {
if token_piece.token.can_span_lines() { if token_piece.token.can_span_lines() {
self.process_multi_line_token(token_piece); self.process_multi_line_token(token_piece);
} else { } else {
@ -259,7 +253,7 @@ impl<'src> Tokenizer<'src> {
/// Handles simple tokens that *never* span multiple lines, allowing us to /// Handles simple tokens that *never* span multiple lines, allowing us to
/// skip a lot of work. /// skip a lot of work.
fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) { fn process_single_line_token(&mut self, token_piece: TokenData<'src>) {
if token_piece.token.is_newline() { if token_piece.token.is_newline() {
self.line_number += 1; self.line_number += 1;
self.buffer.push(token_piece); self.buffer.push(token_piece);
@ -270,34 +264,40 @@ impl<'src> Tokenizer<'src> {
} }
/// Handles tokens that might contain one or more newline characters. /// Handles tokens that might contain one or more newline characters.
fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) { fn process_multi_line_token(&mut self, token_piece: TokenData<'src>) {
let start_line = self.line_number; let start_line = self.line_number;
let newline_count = count_line_breaks(token_piece.lexeme); let line_break_map = split_visible_line_segments(token_piece.lexeme);
let newline_count = line_break_map.len().saturating_sub(1);
// Did this token end in a newline? // Needed for unterminated multi-line error tokens that reach EOF right
// This can happen if this is an `Error` token that ends the file. // after a line break.
let ends_with_newline = let ends_with_newline =
token_piece.lexeme.ends_with('\n') || token_piece.lexeme.ends_with('\r'); token_piece.lexeme.ends_with('\n') || token_piece.lexeme.ends_with('\r');
let multi_line_token_index = self.buffer.len();
self.buffer.push(token_piece); self.buffer.push(token_piece);
// We only need to commit the line if this token actually ended the line if !line_break_map.is_empty() {
self.multi_line_map
.insert(multi_line_token_index, line_break_map);
}
// A line is committed only once the token stream has actually crossed
// a physical line boundary.
if newline_count > 0 { if newline_count > 0 {
// This clears `multi_line_start_line`
self.commit_current_line(); self.commit_current_line();
// We only need to insert one `Line::spanned(start_line)` per // We only need to insert one `Line::continued(start_line)` per
// *interior* line: // *interior* line:
// //
// standalone | local int i = /* Now we start long comment // standalone | local int i = /* Now we start long comment
// spanned | with three line breaks and *exactly* two // continued | with three line breaks and *exactly* two
// spanned | inner lines that contain nothing but // continued | inner lines that contain nothing but
// spanned_with_tokens | comment bytes! */ = 0; // continued_with_tokens| comment bytes! */ = 0;
let inner_lines_count = newline_count - 1; let inner_lines_count = newline_count - 1;
for _ in 0..inner_lines_count { for _ in 0..inner_lines_count {
self.lines.push(Line::spanned(start_line)); self.lines.push(Line::continued(start_line));
} }
// This is called *after* `commit_current_line()` cleared previous self.multi_line_start_line = if ends_with_newline {
// stored value None
self.multi_line_start = if ends_with_newline {
None // we're done at this point
} else { } else {
Some(start_line) Some(start_line)
}; };
@ -309,32 +309,34 @@ impl<'src> Tokenizer<'src> {
/// Commits the tokens of the current physical line into `self.lines`. /// Commits the tokens of the current physical line into `self.lines`.
fn commit_current_line(&mut self) { fn commit_current_line(&mut self) {
let slice_end = self.buffer.len(); let slice_end = self.buffer.len();
if slice_end > self.slice_start_index { // A trailing newline terminates the current physical line rather than
let slice = self.slice_start_index..slice_end; // creating an additional empty line entry.
if slice_end > self.uncommitted_start_index {
let slice = self.uncommitted_start_index..slice_end;
// If we were in the middle of a multi-line token, we // If we were in the middle of a multi-line token, we
// *always* consume `multi_line_start` here, ensuring that each call // *always* consume `multi_line_start` here, ensuring that each call
// to `commit_current_line()` only applies it once. // to `commit_current_line()` only applies it once.
// This guarantees no "bleed" between adjacent multi-line tokens. // This guarantees no "bleed" between adjacent multi-line tokens.
if let Some(from) = self.multi_line_start.take() { if let Some(from) = self.multi_line_start_line.take() {
self.lines.push(Line::spanned_with_tokens(from, slice)); self.lines.push(Line::continued_with_tokens(from, slice));
} else { } else {
self.lines.push(Line::standalone(slice)); self.lines.push(Line::standalone(slice));
} }
self.slice_start_index = slice_end; self.uncommitted_start_index = slice_end;
} }
} }
/// Finishes tokenization, converting accumulated data into /// Finishes tokenization, converting accumulated data into
/// [`TokenizedFile`]. /// [`TokenizedFile`].
fn into_tokenized_file(mut self) -> TokenizedFile<'src> { fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
// Flush trailing tokens for which `commit` wasn't auto triggered // Commits the final line when the file does not end with a newline.
self.commit_current_line(); self.commit_current_line();
// If we still have a `multi_line_start` // If we still have a `multi_line_start`
// (i.e. a pure multi-line token with no local tokens on its last line), // (i.e. a pure multi-line token with no local tokens on its last line),
// push a bare `Line::spanned` entry. // push a bare `Line::continued` entry.
if let Some(from) = self.multi_line_start.take() { if let Some(from) = self.multi_line_start_line.take() {
self.lines.push(Line::spanned(from)); self.lines.push(Line::continued(from));
} }
self.buffer.shrink_to_fit(); self.buffer.shrink_to_fit();
@ -343,40 +345,105 @@ impl<'src> Tokenizer<'src> {
TokenizedFile { TokenizedFile {
buffer: self.buffer, buffer: self.buffer,
lines: self.lines, lines: self.lines,
multi_line_map: self.multi_line_map,
had_errors: self.had_errors, had_errors: self.had_errors,
} }
} }
} }
fn make_token_piece<'src>(token: Token, text: &'src str) -> TokenPiece<'src> { fn make_token_data(token: Token, text: &str) -> TokenData<'_> {
let length_utf16 = text.encode_utf16().count(); let length_utf16 = text.encode_utf16().count();
TokenPiece { TokenData {
lexeme: text, lexeme: text,
token, token,
length_utf16, utf16_length: length_utf16,
} }
} }
/// Counts the number of newlines in given text. /// Returns byte ranges of visible text characters for each physical line
fn count_line_breaks(text: &str) -> usize { /// spanned by `text`.
let mut bytes_iterator = text.as_bytes().iter().peekable(); ///
let mut newline_count = 0; /// Returns an empty vector if `text` contains no line breaks.
while let Some(&next_byte) = bytes_iterator.next() { fn split_visible_line_segments(text: &str) -> Vec<Range<usize>> {
let bytes = text.as_bytes();
let mut segments = Vec::new();
let mut segment_start = 0usize;
let mut saw_line_break = false;
let mut bytes_iterator = bytes.iter().enumerate().peekable();
while let Some((next_byte_index, &next_byte)) = bytes_iterator.next() {
// Logos' regex rule is "\r\n|\n|\r", so we agree with it on new line // Logos' regex rule is "\r\n|\n|\r", so we agree with it on new line
// character treatment // character treatment
match next_byte { match next_byte {
b'\r' => { b'\r' => {
newline_count += 1; saw_line_break = true;
if let Some(&&b'\n') = bytes_iterator.peek() { let visible_end = next_byte_index;
// skip the '\n' in a CRLF let next_start =
bytes_iterator.next(); if let Some((next_line_break_index, b'\n')) = bytes_iterator.peek().copied() {
} bytes_iterator.next(); // consume '\n' of `\r\n`
next_line_break_index + 1
} else {
next_byte_index + 1
};
segments.push(segment_start..visible_end);
segment_start = next_start;
} }
b'\n' => { b'\n' => {
newline_count += 1; saw_line_break = true;
let visible_end = next_byte_index;
segments.push(segment_start..visible_end);
segment_start = next_byte_index + 1;
} }
_ => (), _ => (),
} }
} }
newline_count // If the token contained at least one line break, include the visible
// segment of its final physical line as well. This may be empty, e.g.
// for text ending with '\n' or '\r\n'.
if saw_line_break {
segments.push(segment_start..bytes.len());
}
segments
}
// Because once `cursor` moves past the end of `buffer`, it can never become
// valid again.
impl std::iter::FusedIterator for Tokens<'_, '_> {}
impl<'file, 'src> Tokens<'file, 'src> {
/// Advances the iterator cursor by one token.
const fn advance(&mut self) {
self.cursor.0 += 1;
}
/// Creates a new iterator.
const fn new(source_file: &'file TokenizedFile<'src>) -> Self {
Self {
source_file,
cursor: TokenPosition(0),
}
}
}
impl<'src> Iterator for Tokens<'_, 'src> {
type Item = (TokenPosition, TokenData<'src>);
fn next(&mut self) -> Option<Self::Item> {
if let Some(&token_piece) = self.source_file.buffer.get(self.cursor.0) {
let position = self.cursor;
self.advance();
Some((position, token_piece))
} else {
None
}
}
}
impl<'file, 'src> IntoIterator for &'file TokenizedFile<'src> {
type Item = (TokenPosition, TokenData<'src>);
type IntoIter = Tokens<'file, 'src>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
} }

View File

@ -0,0 +1,291 @@
//! # Query helpers
//!
//! Read-only convenience APIs for inspecting a [`TokenizedFile`] without
//! exposing its internal representation.
use crate::lexer::{Line, TokenData, TokenPosition, TokenizedFile, VisibleLineSpan};
impl<'src> TokenizedFile<'src> {
/// Returns the number of physical lines stored in this file.
///
/// Empty line after the trailing newline sequence isn't counted as a line
/// by this method.
#[must_use]
pub const fn line_count(&self) -> usize {
self.lines.len()
}
/// Returns an iterator over tokens that *start* on physical line
/// `line_number`.
///
/// The yielded items are `(TokenPosition, TokenData)` pairs, matching the
/// canonical file-wide token arena.
///
/// If the line ends with a newline token, that newline token is included.
///
/// If the line begins with a carried fragment of a multi-line token that
/// started on an earlier line, that fragment is **not** yielded here.
/// Use [`TokenizedFile::line_text`] to reconstruct the visible content of
/// the full line.
///
/// If `line_number` is out of bounds, the returned iterator is empty.
#[must_use]
pub fn line_tokens(
&self,
line_number: usize,
) -> std::vec::IntoIter<(TokenPosition, TokenData<'src>)> {
let Some(line) = self.lines.get(line_number) else {
return Vec::new().into_iter();
};
let Some(local_range) = line.local_range() else {
return Vec::new().into_iter();
};
let mut out = Vec::with_capacity(local_range.len());
for buffer_index in local_range {
// Invariant:
// `Line::local_range()` is always constructed from contiguous
// slices of `self.buffer` during tokenization, so every index in
// this range must be valid for `self.buffer`.
let token_data = self.buffer[buffer_index];
out.push((TokenPosition(buffer_index), token_data));
}
out.into_iter()
}
/// Returns the token stored at `position`, if that position is valid.
///
/// This is a direct lookup into the file-wide token buffer.
#[must_use]
pub fn token_at(&self, position: TokenPosition) -> Option<TokenData<'src>> {
self.buffer.get(position.0).copied()
}
/// Reconstructs the visible text of physical line `line_index`.
///
/// The returned string does **not** include a trailing line terminator.
///
/// Unlike [`TokenizedFile::line_tokens`], this method includes the visible
/// fragment of a multi-line token carried from an earlier line.
///
/// Returns [`None`] iff `line_index >= self.line_count()`.
#[must_use]
pub fn line_text(&self, line_index: usize) -> Option<String> {
let line = self.lines.get(line_index)?;
let mut out = String::new();
if let Some(piece) = self.carried_piece_for_line(line_index) {
out.push_str(piece);
}
let Some(range) = line.local_range() else {
return Some(out);
};
for buffer_index in range.clone() {
let token_piece = self.buffer[buffer_index];
if token_piece.token.is_newline() {
// Must be last token
debug_assert_eq!(buffer_index + 1, range.end);
break;
}
if token_piece.token.can_span_lines()
&& let Some(first_segment) = self
.multi_line_map
.get(&buffer_index)
.and_then(|segments| segments.first())
{
out.push_str(&token_piece.lexeme[first_segment.clone()]);
// Must be last token
debug_assert_eq!(buffer_index + 1, range.end);
break;
}
out.push_str(token_piece.lexeme);
}
Some(out)
}
/// Returns the 0-based physical line on which the token at `position`
/// starts.
///
/// For multi-line tokens, this is the line where the token begins, not
/// every physical line it spans.
///
/// Returns `None` if `position` is out of bounds.
#[must_use]
pub fn token_line(&self, position: TokenPosition) -> Option<usize> {
// Reject invalid token positions early.
self.buffer.get(position.0)?;
let line_index = self
.lines
.partition_point(|line| self.line_search_upper_bound(line) <= position.0);
(line_index < self.lines.len()).then_some(line_index)
}
/// Returns the exclusive upper token index bound for binary-searching
/// lines by token position.
///
/// In other words: every token that "belongs" to this line in start-line
/// terms has index `< returned_value`.
fn line_search_upper_bound(&self, line: &Line) -> usize {
if let Some(local_range) = line.local_range() {
local_range.end
} else {
// Pure continuation line: it contains only the carried fragment of
// a multi-line token that started earlier.
//
// That token is always the last local token on the origin line, so
// its token index + 1 acts as the exclusive upper bound.
let origin_line = line
.continued_from
.expect("empty line entry must be a continuation line");
self.carried_token_index(origin_line)
.expect("continuation line must point to a valid origin token")
+ 1
}
}
/// If `line_index` begins with a fragment of a multi-line token that
/// started earlier, returns the visible slice of that token for this line.
fn carried_piece_for_line(&self, line_index: usize) -> Option<&'src str> {
// Find carried, multiline token
let origin_line = self.lines.get(line_index)?.continued_from?;
let carried_token_index = self.carried_token_index(origin_line)?;
// Find right part of the multiline token's lexeme
let segments = self.multi_line_map.get(&carried_token_index)?;
let segment_index = line_index.checked_sub(origin_line)?;
let boundary = segments.get(segment_index)?;
self.buffer
.get(carried_token_index)?
.lexeme
.get(boundary.clone())
}
/// Recovers the token index of the multi-line token that started on
/// `origin_line` and is carried into later lines.
///
/// In the current representation, this is always the last local token that
/// started on the origin line.
fn carried_token_index(&self, origin_line: usize) -> Option<usize> {
let range = self.lines.get(origin_line)?.local_range()?;
let token_index = range.end.checked_sub(1)?;
debug_assert!(self.buffer[token_index].token.can_span_lines());
Some(token_index)
}
/// Returns the visible per-line spans occupied by the token at `position`.
///
/// Coordinates are expressed in visible character columns inside
/// `line_text(line)`, with an exclusive end bound.
///
/// Newline-only tokens have no visible text, so they return an empty vector.
///
/// Returns `None` if `position` is invalid.
#[must_use]
pub fn token_visible_spans(&self, position: TokenPosition) -> Option<Vec<VisibleLineSpan>> {
let token_piece = self.buffer.get(position.0).copied()?;
let start_line = self.token_line(position)?;
let start_column = self.token_start_visible_column(position)?;
if token_piece.token.is_newline() {
return Some(Vec::new());
}
// True multi-line token: reuse already computed visible byte segments,
// then convert them into visible character columns.
if let Some(segments) = self.multi_line_map.get(&position.0) {
let mut out = Vec::with_capacity(segments.len());
for (segment_index, byte_range) in segments.iter().enumerate() {
let visible_text = &token_piece.lexeme[byte_range.clone()];
let width = visible_text.chars().count();
// Empty visible fragment: skip it.
// This matters for things like a token ending with '\n'.
if width == 0 {
continue;
}
let line = start_line + segment_index;
// A trailing newline does not create an extra stored physical line.
if line >= self.line_count() {
break;
}
let column_start = if segment_index == 0 { start_column } else { 0 };
out.push(VisibleLineSpan {
line,
columns: column_start..(column_start + width),
});
}
return Some(out);
}
// Single-line token, including "can_span_lines" tokens that happen not
// to contain a line break.
let width = token_piece.lexeme.chars().count();
Some(vec![VisibleLineSpan {
line: start_line,
columns: start_column..(start_column + width),
}])
}
/// Returns the visible start column of the token at `position` inside
/// `line_text(token_line(position))`.
///
/// Column is measured in visible characters, excluding line terminators.
fn token_start_visible_column(&self, position: TokenPosition) -> Option<usize> {
let line_index = self.token_line(position)?;
let line = self.lines.get(line_index)?;
let mut column = self
.carried_piece_for_line(line_index)
.map_or(0, |text| text.chars().count());
let local_range = line.local_range()?;
for buffer_index in local_range {
if buffer_index == position.0 {
return Some(column);
}
let token_piece = self.buffer.get(buffer_index)?;
if token_piece.token.is_newline() {
break;
}
if token_piece.token.can_span_lines() && self.multi_line_map.contains_key(&buffer_index)
{
//debug_assert_eq!(buffer_index + 1, local_range.end);
return None;
}
column += token_piece.lexeme.chars().count();
}
None
}
#[must_use]
pub fn span_visible_on_line(&self, span: crate::ast::AstSpan) -> Option<VisibleLineSpan> {
let start = self
.token_visible_spans(span.token_from)?
.into_iter()
.next()?;
let end = self
.token_visible_spans(span.token_to)?
.into_iter()
.last()?;
if start.line != end.line {
return None;
}
Some(VisibleLineSpan {
line: start.line,
columns: start.columns.start..end.columns.end,
})
}
}

View File

@ -0,0 +1,632 @@
//! Lexer for `UnrealScript` that understands inline `cpptext { ... }` blocks.
//!
//! ## Notable details
//!
//! Lexer for `UnrealScript` that recognizes inline `cpptext { ... }` blocks.
//!
//! In `UnrealScript`, `cpptext` lets authors embed raw C++ between braces.\
//! Because whitespace, newlines, or comments may appear between the
//! `cpptext` keyword and the opening `{`, the lexer must remember that
//! it has just seen `cpptext` - hence a state machine.
//!
//! ## Modes
//!
//! - **Normal** - ordinary `UnrealScript` `RawTokens`.
//! - **`AwaitingCppBlock`** - after `cpptext`, waiting for the next `{`.
//!
//! When that brace arrives, the lexer consumes the entire C++ block as
//! one `RawToken` (`RawToken::Brace(BraceKind::CppBlock)`), tracking nested
//! braces, strings, and comments on the way. If the closing `}` is
//! missing, everything to EOF is treated as C++; downstream parsers must
//! handle that gracefully.
use logos::Lexer;
/// Which lexer mode we're in. See the module docs for the full story.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Default)]
enum LexerMode {
/// Lexing regular `UnrealScript`.
#[default]
Normal,
/// Saw `cpptext`; waiting for the opening `{` of a C++ block.
AwaitingCppBlock,
}
/// Extra per-lexer state. Currently just holds the [`LexerMode`].
///
/// This is a logos-specific implementation detail.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
pub struct LexerState {
mode: LexerMode,
}
/// Distinguishes an ordinary `{` token from one that starts
/// an embedded C++ block.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum BraceKind {
/// An ordinary `UnrealScript` `{`.
Normal,
/// A `{` that starts an embedded C++ block and consumes through its
/// matching `}`.
CppBlock,
}
/// Tokens produced by the `UnrealScript` lexer.
///
/// Includes both syntactic tokens and trivia such as whitespace, newlines,
/// and comments.
#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[logos(extras = LexerState)]
pub enum RawToken {
// # Compiler/directive keywords
#[regex(r"(?i)#exec[^\r\n]*(?:\r\n|\n|\r)?")]
ExecDirective,
#[regex("(?i)cpptext", |lex| {
if is_next_nontrivia_left_brace(lex) {
lex.extras.mode = LexerMode::AwaitingCppBlock;
} else {
lex.extras.mode = LexerMode::Normal;
}
})]
CppText,
#[regex("(?i)cppstruct", |lex| {
if is_next_nontrivia_left_brace(lex) {
lex.extras.mode = LexerMode::AwaitingCppBlock;
} else {
lex.extras.mode = LexerMode::Normal;
}
})]
CppStruct,
// # Declaration & structural keywords
//#[regex("(?i)class")]
#[token("class", ignore(case))]
Class,
#[token("struct", ignore(case))]
Struct,
#[token("enum", ignore(case))]
Enum,
#[token("state", ignore(case))]
State,
#[token("auto", ignore(case))]
Auto,
#[token("function", ignore(case))]
Function,
#[token("event", ignore(case))]
Event,
#[token("delegate", ignore(case))]
Delegate,
#[token("var", ignore(case))]
Var,
#[token("local", ignore(case))]
Local,
// # Inheritance, interface, dependencies
#[token("extends", ignore(case))]
Extends,
#[token("dependson", ignore(case))]
DependsOn,
// # Access modifiers & properties
#[token("private", ignore(case))]
Private,
#[token("protected", ignore(case))]
Protected,
#[token("public", ignore(case))]
Public,
#[token("const", ignore(case))]
Const,
#[token("static", ignore(case))]
Static,
#[token("native", ignore(case))]
Native,
#[token("abstract", ignore(case))]
Abstract,
#[token("deprecated", ignore(case))]
Deprecated,
#[token("safereplace", ignore(case))]
SafeReplace,
#[token("exportstructs", ignore(case))]
ExportStructs,
#[token("input", ignore(case))]
Input,
// # UnrealScript metadata/specifiers
#[token("final", ignore(case))]
Final,
#[token("default", ignore(case))]
Default,
#[token("defaultproperties", ignore(case))]
DefaultProperties,
#[token("object", ignore(case))]
Object,
#[token("begin", ignore(case))]
Begin,
#[token("end", ignore(case))]
End,
#[token("optional", ignore(case))]
Optional,
#[token("config", ignore(case))]
Config,
#[token("perobjectconfig", ignore(case))]
PerObjectConfig,
#[token("globalconfig", ignore(case))]
GlobalConfig,
#[token("collapsecategories", ignore(case))]
CollapseCategories,
#[token("dontcollapsecategories", ignore(case))]
DontCollapseCategories,
#[token("hidecategories", ignore(case))]
HideCategories,
#[token("showcategories", ignore(case))]
ShowCategories,
#[token("localized", ignore(case))]
Localized,
#[token("placeable", ignore(case))]
Placeable,
#[token("notplaceable", ignore(case))]
NotPlaceable,
#[token("instanced", ignore(case))]
Instanced,
#[token("editconst", ignore(case))]
EditConst,
#[token("editconstarray", ignore(case))]
EditConstArray,
#[token("editinline", ignore(case))]
EditInline,
#[token("editinlineuse", ignore(case))]
EditInlineUse,
#[token("editinlinenew", ignore(case))]
EditInlineNew,
#[token("noteditinlinenew", ignore(case))]
NotEditInlineNew,
#[token("edfindable", ignore(case))]
EdFindable,
#[token("editinlinenotify", ignore(case))]
EditInlineNotify,
#[token("parseconfig", ignore(case))]
ParseConfig,
#[token("automated", ignore(case))]
Automated,
#[token("dynamicrecompile", ignore(case))]
DynamicRecompile,
#[token("transient", ignore(case))]
Transient,
#[token("long", ignore(case))]
Long,
#[token("operator", ignore(case))]
Operator,
#[token("preoperator", ignore(case))]
PreOperator,
#[token("postoperator", ignore(case))]
PostOperator,
#[token("simulated", ignore(case))]
Simulated,
#[token("exec", ignore(case))]
Exec,
#[token("latent", ignore(case))]
Latent,
#[token("iterator", ignore(case))]
Iterator,
#[token("out", ignore(case))]
Out,
#[token("skip", ignore(case))]
Skip,
#[token("singular", ignore(case))]
Singular,
#[token("coerce", ignore(case))]
Coerce,
#[token("assert", ignore(case))]
Assert,
#[token("ignores", ignore(case))]
Ignores,
#[token("within", ignore(case))]
Within,
#[token("init", ignore(case))]
Init,
#[token("export", ignore(case))]
Export,
#[token("noexport", ignore(case))]
NoExport,
#[token("hidedropdown", ignore(case))]
HideDropdown,
#[token("travel", ignore(case))]
Travel,
#[token("cache", ignore(case))]
Cache,
#[token("cacheexempt", ignore(case))]
CacheExempt,
// # Replication-related
#[token("reliable", ignore(case))]
Reliable,
#[token("unreliable", ignore(case))]
Unreliable,
#[token("replication", ignore(case))]
Replication,
#[token("nativereplication", ignore(case))]
NativeReplication,
// # Control-flow keywords
#[token("goto", ignore(case))]
Goto,
#[token("if", ignore(case))]
If,
#[token("else", ignore(case))]
Else,
#[token("switch", ignore(case))]
Switch,
#[token("case", ignore(case))]
Case,
#[token("for", ignore(case))]
For,
#[token("foreach", ignore(case))]
ForEach,
#[token("while", ignore(case))]
While,
#[token("do", ignore(case))]
Do,
#[token("until", ignore(case))]
Until,
#[token("break", ignore(case))]
Break,
#[token("continue", ignore(case))]
Continue,
#[token("return", ignore(case))]
Return,
// # Built-in types
#[token("int", ignore(case))]
Int,
#[token("float", ignore(case))]
Float,
#[token("bool", ignore(case))]
Bool,
#[token("byte", ignore(case))]
Byte,
#[token("string", ignore(case))]
String,
#[token("array", ignore(case))]
Array,
#[token("name", ignore(case))]
Name,
// FloatLiteral must come before IntegerLiteral and '.'
// to have higher priority.
// It also recognizes things like: `1.foo``, `1.foo.bar`, `1.2.3`.
// It has to. Because UnrealScript is a pile of-... wonderful language,
// where everything is possible.
#[regex(r"[0-9]+(?:\.(?:[0-9]+|[A-Za-z_][A-Za-z0-9_]*))+[fF]?")]
#[regex(r"(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[eE][+-]?[0-9]+)?[fF]?")]
#[regex(r"[0-9]+[eE][+-]?[0-9]+[fF]?")]
FloatLiteral,
#[regex(r"0b[01](?:_?[01])*")]
#[regex(r"0o[0-7](?:_?[0-7])*")]
#[regex(r"0x[0-9A-Fa-f](?:_?[0-9A-Fa-f])*")]
#[regex(r"[0-9][0-9]*")]
IntegerLiteral,
#[regex(r#""([^"\\\r\n]|\\.)*""#)]
StringLiteral,
#[regex(r"'[a-zA-Z0-9_\. \-]*'")]
NameLiteral,
#[token("true", ignore(case))]
True,
#[token("false", ignore(case))]
False,
#[token("none", ignore(case))]
None,
#[token("self", ignore(case))]
SelfValue,
#[token("new", ignore(case))]
New,
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
Identifier,
// # Operations
// ## Exponentiation
#[token("**")]
Exponentiation,
// ## Unary
#[token("++")]
Increment,
#[token("--")]
Decrement,
#[token("!")]
Not,
#[token("~")]
BitwiseNot,
// ## Vector
#[token("dot", ignore(case))]
Dot,
#[token("cross", ignore(case))]
Cross,
// ## Multiplicative
#[token("*")]
Multiply,
#[token("/")]
Divide,
#[token("%")]
Modulo,
// ## Additive
#[token("+")]
Plus,
#[token("-")]
Minus,
// ## String manipulation
#[token("@")]
ConcatSpace,
#[token("$")]
Concat,
// ## Shifts
#[token("<<")]
LeftShift,
#[token(">>>")]
LogicalRightShift,
#[token(">>")]
RightShift,
// ## Relational
#[token("<")]
Less,
#[token("<=")]
LessEqual,
#[token(">")]
Greater,
#[token(">=")]
GreaterEqual,
#[token("==")]
Equal,
#[token("!=")]
NotEqual,
#[token("~=")]
ApproximatelyEqual,
#[token("clockwisefrom", ignore(case))]
ClockwiseFrom,
// ## Bitwise
#[token("&")]
BitwiseAnd,
#[token("|")]
BitwiseOr,
#[token("^")]
BitwiseXor,
// ## Logical
#[token("&&")]
LogicalAnd,
#[token("^^")]
LogicalXor,
#[token("||")]
LogicalOr,
// ## Assignments
#[token("=")]
Assign,
#[token("*=")]
MultiplyAssign,
#[token("/=")]
DivideAssign,
#[token("%=")]
ModuloAssign,
#[token("+=")]
PlusAssign,
#[token("-=")]
MinusAssign,
#[token("$=")]
ConcatAssign,
#[token("@=")]
ConcatSpaceAssign,
// # Punctuation & delimiters
#[token("(")]
LeftParenthesis,
#[token(")")]
RightParenthesis,
#[token("{", process_left_brace)]
Brace(BraceKind),
#[token("}")]
RightBrace,
#[token("[")]
LeftBracket,
#[token("]")]
RightBracket,
#[token(";")]
Semicolon,
#[token(",")]
Comma,
#[token(".")]
Period,
#[token(":")]
Colon,
#[token("#")]
Hash,
#[token("?")]
Question,
// # Comments & whitespaces
#[regex(r"//[^\r\n]*")]
LineComment,
#[regex(r"/\*", handle_block_comment)]
BlockComment,
#[regex(r"\r\n|\n|\r")]
Newline,
#[regex(r"[ \t]+")]
Whitespace,
// # Technical
Error,
}
/// Consumes an `UnrealScript` `/* ... */` block comment, including nested comments.
///
/// Matches the entire comment, including its delimiters.
/// If the comment is unterminated, consumes to the end of input.
fn handle_block_comment(lexer: &mut Lexer<RawToken>) {
let mut comment_depth = 1;
while let Some(next_character) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("/*") {
comment_depth += 1;
lexer.bump(2);
continue;
}
if lexer.remainder().starts_with("*/") {
comment_depth -= 1;
lexer.bump(2);
if comment_depth == 0 {
break;
}
continue;
}
lexer.bump(next_character.len_utf8());
}
}
/// Processes `{` according to the current lexer mode.
///
/// Returns [`BraceKind::Normal`] for ordinary `UnrealScript` braces.
/// After `cpptext` or `cppstruct`, consumes the embedded C++ block and returns
/// [`BraceKind::CppBlock`].
fn process_left_brace(lexer: &mut Lexer<RawToken>) -> BraceKind {
match lexer.extras.mode {
LexerMode::Normal => BraceKind::Normal,
LexerMode::AwaitingCppBlock => {
lexer.extras.mode = LexerMode::Normal;
consume_cpp_block(lexer);
BraceKind::CppBlock
}
}
}
/// Consumes a complete C++ block, handling:
/// - Nested `{...}` pairs
/// - String literals (`"..."` and `'...'`), including escaped quotes
/// - Line comments (`// ...\n`)
/// - Block comments (`/* ... */`)
///
/// Leaves the lexer positioned immediately after the closing `}` of the block.
/// The opening `{` must have already been consumed by the caller.
///
/// We target UE2-era cpp blocks, so no need for anything fancy.
fn consume_cpp_block(lexer: &mut Lexer<RawToken>) {
let mut brace_depth = 1;
while let Some(next_character) = lexer.remainder().chars().next() {
match next_character {
'{' => {
brace_depth += 1;
lexer.bump(1);
}
'}' => {
brace_depth -= 1;
lexer.bump(1);
if brace_depth == 0 {
break;
}
}
'/' if lexer.remainder().starts_with("/*") => {
lexer.bump(2); // consuming two-byte sequence `/*`
consume_c_style_block_comment(lexer);
}
'/' if lexer.remainder().starts_with("//") => {
lexer.bump(2); // consuming two-byte sequence `//`
while let Some(next_character) = lexer.remainder().chars().next() {
lexer.bump(next_character.len_utf8());
if next_character == '\n' || next_character == '\r' {
break;
}
}
}
'"' | '\'' => {
lexer.bump(1); // skip `'` or `"`
consume_quoted_cpp_literal(lexer, next_character);
}
_ => lexer.bump(next_character.len_utf8()),
}
}
}
/// Consumes a non-nesting C-style `/* ... */` comment.
///
/// Assumes that the opening `/*` has already been consumed.
fn consume_c_style_block_comment(lexer: &mut Lexer<RawToken>) {
while let Some(next_character) = lexer.remainder().chars().next() {
if lexer.remainder().starts_with("*/") {
lexer.bump(2);
break;
}
lexer.bump(next_character.len_utf8());
}
}
/// Consumes a quoted C++ string or character literal.
///
/// Assumes that the opening delimiter has already been consumed.
fn consume_quoted_cpp_literal(lexer: &mut Lexer<RawToken>, delimiter: char) {
while let Some(next_character) = lexer.remainder().chars().next() {
lexer.bump(next_character.len_utf8());
if next_character == '\\' {
// Skip the escaped character
if let Some(escaped_character) = lexer.remainder().chars().next() {
lexer.bump(escaped_character.len_utf8());
}
} else if next_character == delimiter {
return;
}
}
}
/// Peek ahead from the current lexer position, skipping "trivia", and report
/// whether the next significant character is `{`.
///
/// Trivia here means:
/// - Spaces and tabs
/// - Newlines (`\r`, `\n`, or `\r\n`)
/// - Line comments (`// ...`)
/// - Block comments (`/* ... */`), including nested ones
///
/// This is used after lexing tokens like `cpptext` or `cppstruct`, where
/// `UnrealScript` allows arbitrary trivia between the keyword and the opening
/// brace of the embedded C++ block.
///
/// Returns `true` if the next non-trivia character is `{`, otherwise `false`.
/// If the input ends while skipping trivia, returns `false`.
fn is_next_nontrivia_left_brace(lexer: &Lexer<RawToken>) -> bool {
let mut remaining = lexer.remainder();
while let Some(next_character) = remaining.chars().next() {
match next_character {
' ' | '\t' | '\r' | '\n' => {
remaining = &remaining[next_character.len_utf8()..];
}
'/' if remaining.starts_with("//") => {
remaining = &remaining[2..];
while let Some(comment_character) = remaining.chars().next() {
remaining = &remaining[comment_character.len_utf8()..];
if comment_character == '\n' || comment_character == '\r' {
break;
}
}
}
'/' if remaining.starts_with("/*") => {
remaining = &remaining[2..];
let mut comment_depth = 1;
while comment_depth > 0 {
if remaining.starts_with("/*") {
comment_depth += 1;
remaining = &remaining[2..];
continue;
}
if remaining.starts_with("*/") {
comment_depth -= 1;
remaining = &remaining[2..];
continue;
}
let Some(comment_character) = remaining.chars().next() else {
return false;
};
remaining = &remaining[comment_character.len_utf8()..];
}
}
_ => return next_character == '{',
}
}
false
}

338
rottlib/src/lexer/tests.rs Normal file
View File

@ -0,0 +1,338 @@
use super::{Keyword, Token, TokenPosition, TokenizedFile, split_visible_line_segments};
fn reconstruct_source(file: &TokenizedFile<'_>) -> String {
file.buffer.iter().map(|piece| piece.lexeme).collect()
}
fn token_kinds_and_lexemes<'src>(file: &TokenizedFile<'src>) -> Vec<(Token, &'src str)> {
file.buffer
.iter()
.map(|piece| (piece.token, piece.lexeme))
.collect()
}
#[test]
fn split_visible_line_segments_returns_empty_for_single_line_text() {
assert!(split_visible_line_segments("abcdef").is_empty());
assert!(split_visible_line_segments("").is_empty());
}
#[test]
fn split_visible_line_segments_handles_mixed_line_endings() {
let text = "ab\r\ncd\ref\n";
let segments = split_visible_line_segments(text);
assert_eq!(segments, vec![0..2, 4..6, 7..9, 10..10]);
let visible: Vec<&str> = segments.iter().map(|range| &text[range.clone()]).collect();
assert_eq!(visible, vec!["ab", "cd", "ef", ""]);
}
#[test]
fn tokenization_is_lossless_for_mixed_input() {
let source = concat!(
"class Foo extends Bar;\r\n",
"var string S;\n",
"/* block comment */\r",
"defaultproperties {}\n",
"X = 1.25e+2;\n",
);
let file = TokenizedFile::tokenize(source);
assert_eq!(reconstruct_source(&file), source);
}
#[test]
fn trailing_newline_does_not_create_extra_empty_line() {
let source = "a\n";
let file = TokenizedFile::tokenize(source);
assert_eq!(file.lines.len(), 1);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..2));
assert_eq!(
token_kinds_and_lexemes(&file),
vec![(Token::Identifier, "a"), (Token::Newline, "\n")]
);
}
#[test]
fn final_line_without_trailing_newline_is_committed() {
let source = "a\nb";
let file = TokenizedFile::tokenize(source);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..2));
assert_eq!(file.lines[1].continued_from, None);
assert_eq!(file.lines[1].local_range(), Some(2..3));
assert_eq!(
token_kinds_and_lexemes(&file),
vec![
(Token::Identifier, "a"),
(Token::Newline, "\n"),
(Token::Identifier, "b"),
]
);
}
#[test]
fn multiline_block_comment_creates_continuation_line_with_local_tokens() {
let source = "a/*x\ny*/b";
let file = TokenizedFile::tokenize(source);
assert_eq!(
token_kinds_and_lexemes(&file),
vec![
(Token::Identifier, "a"),
(Token::BlockComment, "/*x\ny*/"),
(Token::Identifier, "b"),
]
);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..2));
assert_eq!(file.lines[1].continued_from, Some(0));
assert_eq!(file.lines[1].local_range(), Some(2..3));
let block_comment_index = 1;
assert_eq!(
file.multi_line_map.get(&block_comment_index),
Some(&vec![0..3, 4..7])
);
}
#[test]
fn pure_multiline_token_finishes_with_bare_continuation_line() {
let source = "/*a\nb*/";
let file = TokenizedFile::tokenize(source);
assert_eq!(
token_kinds_and_lexemes(&file),
vec![(Token::BlockComment, "/*a\nb*/")]
);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..1));
assert_eq!(file.lines[1].continued_from, Some(0));
assert_eq!(file.lines[1].local_range(), None);
assert_eq!(file.multi_line_map.get(&0), Some(&vec![0..3, 4..7]));
}
#[test]
fn nested_block_comments_are_consumed_as_one_token() {
let source = "/* outer /* inner */ still outer */";
let file = TokenizedFile::tokenize(source);
assert!(!file.has_errors());
assert_eq!(file.buffer.len(), 1);
assert_eq!(file.buffer[0].token, Token::BlockComment);
assert_eq!(file.buffer[0].lexeme, source);
}
#[test]
fn cpptext_with_trivia_before_brace_produces_cpp_block_token() {
let source = "cpptext /* gap */\n{ int x; if (y) { z(); } }";
let file = TokenizedFile::tokenize(source);
assert_eq!(
token_kinds_and_lexemes(&file),
vec![
(Token::Keyword(Keyword::CppText), "cpptext"),
(Token::Whitespace, " "),
(Token::BlockComment, "/* gap */"),
(Token::Newline, "\n"),
(Token::CppBlock, "{ int x; if (y) { z(); } }"),
]
);
assert_eq!(file.lines.len(), 2);
assert_eq!(file.lines[0].continued_from, None);
assert_eq!(file.lines[0].local_range(), Some(0..4));
assert_eq!(file.lines[1].continued_from, None);
assert_eq!(file.lines[1].local_range(), Some(4..5));
}
#[test]
fn cpptext_without_following_brace_does_not_start_cpp_block_mode() {
let source = "cpptext Foo { bar }";
let file = TokenizedFile::tokenize(source);
let tokens = token_kinds_and_lexemes(&file);
assert!(!tokens.iter().any(|(token, _)| *token == Token::CppBlock));
assert!(
tokens
.iter()
.any(|(token, lexeme)| *token == Token::Keyword(Keyword::CppText)
&& *lexeme == "cpptext")
);
assert!(
tokens
.iter()
.any(|(token, lexeme)| *token == Token::LeftBrace && *lexeme == "{")
);
assert!(
tokens
.iter()
.any(|(token, lexeme)| *token == Token::RightBrace && *lexeme == "}")
);
}
#[test]
fn utf16_length_is_precomputed_per_token() {
let source = "\"😀\"";
let file = TokenizedFile::tokenize(source);
assert_eq!(file.buffer.len(), 1);
assert_eq!(file.buffer[0].token, Token::StringLiteral);
assert_eq!(file.buffer[0].utf16_length, source.encode_utf16().count());
assert_eq!(file.buffer[0].utf16_length, 4);
}
#[test]
fn lexer_reports_error_tokens() {
let source = "`";
let file = TokenizedFile::tokenize(source);
assert!(file.has_errors());
assert_eq!(reconstruct_source(&file), source);
assert_eq!(file.buffer.len(), 1);
assert_eq!(file.buffer[0].token, Token::Error);
assert_eq!(file.buffer[0].lexeme, "`");
}
#[test]
fn token_predicates_match_current_rules() {
assert!(Token::Identifier.is_valid_identifier_name());
assert!(Token::Keyword(Keyword::Int).is_valid_identifier_name());
assert!(Token::Keyword(Keyword::Int).is_valid_type_name());
assert!(Token::Keyword(Keyword::Delegate).is_valid_type_name());
assert!(Token::Keyword(Keyword::Exec).is_valid_function_modifier());
assert!(Token::Keyword(Keyword::Operator).is_valid_function_modifier());
assert!(Token::Keyword(Keyword::Config).is_valid_function_modifier());
assert!(!Token::Plus.is_valid_identifier_name());
assert!(!Token::Plus.is_valid_type_name());
assert!(!Token::Keyword(Keyword::If).is_valid_function_modifier());
}
#[test]
fn tokens_iterator_yields_positions_in_buffer_order() {
let source = "a + b";
let file = TokenizedFile::tokenize(source);
let collected: Vec<_> = file.iter().collect();
assert_eq!(collected.len(), file.buffer.len());
for (expected_index, (position, token_data)) in collected.into_iter().enumerate() {
assert_eq!(position.0, expected_index);
assert_eq!(token_data, file.buffer[expected_index]);
}
}
fn line_token_kinds_and_lexemes<'src>(
file: &TokenizedFile<'src>,
line_number: usize,
) -> Vec<(usize, Token, &'src str)> {
file.line_tokens(line_number)
.map(|(position, token_data)| (position.0, token_data.token, token_data.lexeme))
.collect()
}
#[test]
fn line_count_counts_physical_lines_without_trailing_empty_line() {
assert_eq!(TokenizedFile::tokenize("").line_count(), 0);
assert_eq!(TokenizedFile::tokenize("a").line_count(), 1);
assert_eq!(TokenizedFile::tokenize("a\n").line_count(), 1);
assert_eq!(TokenizedFile::tokenize("a\nb\n").line_count(), 2);
}
#[test]
fn line_tokens_return_only_tokens_that_start_on_that_line() {
let source = "a/*x\ny*/b\nc";
let file = TokenizedFile::tokenize(source);
assert_eq!(
line_token_kinds_and_lexemes(&file, 0),
vec![
(0, Token::Identifier, "a"),
(1, Token::BlockComment, "/*x\ny*/"),
]
);
// Important: the carried fragment "y*/" is NOT yielded here.
assert_eq!(
line_token_kinds_and_lexemes(&file, 1),
vec![(2, Token::Identifier, "b"), (3, Token::Newline, "\n"),]
);
assert_eq!(
line_token_kinds_and_lexemes(&file, 2),
vec![(4, Token::Identifier, "c")]
);
}
#[test]
fn line_tokens_are_empty_for_continuation_only_or_out_of_bounds_lines() {
let file = TokenizedFile::tokenize("/*a\nb*/");
assert_eq!(file.line_tokens(1).count(), 0);
assert_eq!(file.line_tokens(999).count(), 0);
}
#[test]
fn token_at_returns_token_for_valid_position_and_none_for_invalid_one() {
let file = TokenizedFile::tokenize("a + b");
assert_eq!(file.token_at(TokenPosition(0)), Some(file.buffer[0]));
assert_eq!(
file.token_at(TokenPosition(1)).map(|t| t.token),
Some(Token::Whitespace)
);
assert_eq!(
file.token_at(TokenPosition(2)).map(|t| t.token),
Some(Token::Plus)
);
assert_eq!(file.token_at(TokenPosition(file.buffer.len())), None);
}
#[test]
fn line_text_omits_line_terminators_and_handles_empty_lines() {
let file = TokenizedFile::tokenize("left\n\nright");
assert_eq!(file.line_text(0).as_deref(), Some("left"));
assert_eq!(file.line_text(1).as_deref(), Some(""));
assert_eq!(file.line_text(2).as_deref(), Some("right"));
assert_eq!(file.line_text(999), None);
}
#[test]
fn line_text_includes_carried_fragment_on_continued_line() {
let file = TokenizedFile::tokenize("a/*x\ny*/b");
assert_eq!(file.line_text(1).as_deref(), Some("y*/b"));
}
#[test]
fn line_text_on_origin_line_of_multiline_token_uses_only_visible_part() {
let file = TokenizedFile::tokenize("a/*x\ny*/b");
assert_eq!(file.line_text(0).as_deref(), Some("a/*x"));
}

560
rottlib/src/lexer/token.rs Normal file
View File

@ -0,0 +1,560 @@
//! Token definitions for Fermented `UnrealScript`.
//!
//! These are the tokens consumed by the parser and derived from [`RawToken`]s.
use super::{BraceKind, raw_lexer::RawToken};
/// Tokens consumed by the Fermented `UnrealScript` parser.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum Token {
ExecDirective,
Keyword(Keyword),
// Primaries
FloatLiteral,
IntegerLiteral,
StringLiteral,
NameLiteral,
Identifier,
// Operations
Exponentiation,
Increment,
Decrement,
Not,
BitwiseNot,
Multiply,
Divide,
Modulo,
Plus,
Minus,
ConcatSpace,
Concat,
LeftShift,
LogicalRightShift,
RightShift,
Less,
LessEqual,
Greater,
GreaterEqual,
Equal,
NotEqual,
ApproximatelyEqual,
BitwiseAnd,
BitwiseOr,
BitwiseXor,
LogicalAnd,
LogicalXor,
LogicalOr,
Assign,
MultiplyAssign,
DivideAssign,
ModuloAssign,
PlusAssign,
MinusAssign,
ConcatAssign,
ConcatSpaceAssign,
// Delimiters
LeftParenthesis,
RightParenthesis,
LeftBrace,
CppBlock,
RightBrace,
LeftBracket,
RightBracket,
Semicolon,
Comma,
Period,
Colon,
Hash,
Question,
// Trivia
LineComment,
BlockComment,
Newline,
Whitespace,
// Technical - for representing a very wrong sequence of characters
Error,
}
impl From<RawToken> for Token {
#![allow(clippy::too_many_lines)]
fn from(token: RawToken) -> Self {
match token {
// Non-trivial conversions
RawToken::Brace(BraceKind::Normal) => Self::LeftBrace,
RawToken::Brace(BraceKind::CppBlock) => Self::CppBlock,
// Keyword conversions
RawToken::CppText => Self::Keyword(Keyword::CppText),
RawToken::CppStruct => Self::Keyword(Keyword::CppStruct),
RawToken::Class => Self::Keyword(Keyword::Class),
RawToken::Struct => Self::Keyword(Keyword::Struct),
RawToken::Enum => Self::Keyword(Keyword::Enum),
RawToken::State => Self::Keyword(Keyword::State),
RawToken::Auto => Self::Keyword(Keyword::Auto),
RawToken::Function => Self::Keyword(Keyword::Function),
RawToken::Event => Self::Keyword(Keyword::Event),
RawToken::Delegate => Self::Keyword(Keyword::Delegate),
RawToken::Var => Self::Keyword(Keyword::Var),
RawToken::Local => Self::Keyword(Keyword::Local),
RawToken::Extends => Self::Keyword(Keyword::Extends),
RawToken::DependsOn => Self::Keyword(Keyword::DependsOn),
RawToken::Private => Self::Keyword(Keyword::Private),
RawToken::Protected => Self::Keyword(Keyword::Protected),
RawToken::Public => Self::Keyword(Keyword::Public),
RawToken::Const => Self::Keyword(Keyword::Const),
RawToken::Static => Self::Keyword(Keyword::Static),
RawToken::Native => Self::Keyword(Keyword::Native),
RawToken::Abstract => Self::Keyword(Keyword::Abstract),
RawToken::Deprecated => Self::Keyword(Keyword::Deprecated),
RawToken::SafeReplace => Self::Keyword(Keyword::SafeReplace),
RawToken::ExportStructs => Self::Keyword(Keyword::ExportStructs),
RawToken::Input => Self::Keyword(Keyword::Input),
RawToken::Final => Self::Keyword(Keyword::Final),
RawToken::Default => Self::Keyword(Keyword::Default),
RawToken::DefaultProperties => Self::Keyword(Keyword::DefaultProperties),
RawToken::Object => Self::Keyword(Keyword::Object),
RawToken::Begin => Self::Keyword(Keyword::Begin),
RawToken::End => Self::Keyword(Keyword::End),
RawToken::Optional => Self::Keyword(Keyword::Optional),
RawToken::Config => Self::Keyword(Keyword::Config),
RawToken::PerObjectConfig => Self::Keyword(Keyword::PerObjectConfig),
RawToken::GlobalConfig => Self::Keyword(Keyword::GlobalConfig),
RawToken::CollapseCategories => Self::Keyword(Keyword::CollapseCategories),
RawToken::DontCollapseCategories => Self::Keyword(Keyword::DontCollapseCategories),
RawToken::HideCategories => Self::Keyword(Keyword::HideCategories),
RawToken::ShowCategories => Self::Keyword(Keyword::ShowCategories),
RawToken::Localized => Self::Keyword(Keyword::Localized),
RawToken::Placeable => Self::Keyword(Keyword::Placeable),
RawToken::NotPlaceable => Self::Keyword(Keyword::NotPlaceable),
RawToken::Instanced => Self::Keyword(Keyword::Instanced),
RawToken::EditConst => Self::Keyword(Keyword::EditConst),
RawToken::EditConstArray => Self::Keyword(Keyword::EditConstArray),
RawToken::EditInline => Self::Keyword(Keyword::EditInline),
RawToken::EditInlineUse => Self::Keyword(Keyword::EditInlineUse),
RawToken::EditInlineNew => Self::Keyword(Keyword::EditInlineNew),
RawToken::NotEditInlineNew => Self::Keyword(Keyword::NotEditInlineNew),
RawToken::EdFindable => Self::Keyword(Keyword::EdFindable),
RawToken::EditInlineNotify => Self::Keyword(Keyword::EditInlineNotify),
RawToken::ParseConfig => Self::Keyword(Keyword::ParseConfig),
RawToken::Automated => Self::Keyword(Keyword::Automated),
RawToken::DynamicRecompile => Self::Keyword(Keyword::DynamicRecompile),
RawToken::Transient => Self::Keyword(Keyword::Transient),
RawToken::Long => Self::Keyword(Keyword::Long),
RawToken::Operator => Self::Keyword(Keyword::Operator),
RawToken::PreOperator => Self::Keyword(Keyword::PreOperator),
RawToken::PostOperator => Self::Keyword(Keyword::PostOperator),
RawToken::Simulated => Self::Keyword(Keyword::Simulated),
RawToken::Exec => Self::Keyword(Keyword::Exec),
RawToken::Latent => Self::Keyword(Keyword::Latent),
RawToken::Iterator => Self::Keyword(Keyword::Iterator),
RawToken::Out => Self::Keyword(Keyword::Out),
RawToken::Skip => Self::Keyword(Keyword::Skip),
RawToken::Singular => Self::Keyword(Keyword::Singular),
RawToken::Coerce => Self::Keyword(Keyword::Coerce),
RawToken::Assert => Self::Keyword(Keyword::Assert),
RawToken::Ignores => Self::Keyword(Keyword::Ignores),
RawToken::Within => Self::Keyword(Keyword::Within),
RawToken::Init => Self::Keyword(Keyword::Init),
RawToken::Export => Self::Keyword(Keyword::Export),
RawToken::NoExport => Self::Keyword(Keyword::NoExport),
RawToken::HideDropdown => Self::Keyword(Keyword::HideDropdown),
RawToken::Travel => Self::Keyword(Keyword::Travel),
RawToken::Cache => Self::Keyword(Keyword::Cache),
RawToken::CacheExempt => Self::Keyword(Keyword::CacheExempt),
RawToken::Reliable => Self::Keyword(Keyword::Reliable),
RawToken::Unreliable => Self::Keyword(Keyword::Unreliable),
RawToken::Replication => Self::Keyword(Keyword::Replication),
RawToken::NativeReplication => Self::Keyword(Keyword::NativeReplication),
RawToken::Goto => Self::Keyword(Keyword::Goto),
RawToken::If => Self::Keyword(Keyword::If),
RawToken::Else => Self::Keyword(Keyword::Else),
RawToken::Switch => Self::Keyword(Keyword::Switch),
RawToken::Case => Self::Keyword(Keyword::Case),
RawToken::For => Self::Keyword(Keyword::For),
RawToken::ForEach => Self::Keyword(Keyword::ForEach),
RawToken::While => Self::Keyword(Keyword::While),
RawToken::Do => Self::Keyword(Keyword::Do),
RawToken::Until => Self::Keyword(Keyword::Until),
RawToken::Break => Self::Keyword(Keyword::Break),
RawToken::Continue => Self::Keyword(Keyword::Continue),
RawToken::Return => Self::Keyword(Keyword::Return),
RawToken::Int => Self::Keyword(Keyword::Int),
RawToken::Float => Self::Keyword(Keyword::Float),
RawToken::Bool => Self::Keyword(Keyword::Bool),
RawToken::Byte => Self::Keyword(Keyword::Byte),
RawToken::String => Self::Keyword(Keyword::String),
RawToken::Array => Self::Keyword(Keyword::Array),
RawToken::Name => Self::Keyword(Keyword::Name),
RawToken::True => Self::Keyword(Keyword::True),
RawToken::False => Self::Keyword(Keyword::False),
RawToken::None => Self::Keyword(Keyword::None),
RawToken::SelfValue => Self::Keyword(Keyword::SelfValue),
RawToken::New => Self::Keyword(Keyword::New),
RawToken::Dot => Self::Keyword(Keyword::Dot),
RawToken::Cross => Self::Keyword(Keyword::Cross),
RawToken::ClockwiseFrom => Self::Keyword(Keyword::ClockwiseFrom),
// Trivial 1-to-1 conversions.
RawToken::ExecDirective => Self::ExecDirective,
RawToken::FloatLiteral => Self::FloatLiteral,
RawToken::IntegerLiteral => Self::IntegerLiteral,
RawToken::StringLiteral => Self::StringLiteral,
RawToken::NameLiteral => Self::NameLiteral,
RawToken::Identifier => Self::Identifier,
RawToken::Exponentiation => Self::Exponentiation,
RawToken::Increment => Self::Increment,
RawToken::Decrement => Self::Decrement,
RawToken::Not => Self::Not,
RawToken::BitwiseNot => Self::BitwiseNot,
RawToken::Multiply => Self::Multiply,
RawToken::Divide => Self::Divide,
RawToken::Modulo => Self::Modulo,
RawToken::Plus => Self::Plus,
RawToken::Minus => Self::Minus,
RawToken::ConcatSpace => Self::ConcatSpace,
RawToken::Concat => Self::Concat,
RawToken::LeftShift => Self::LeftShift,
RawToken::LogicalRightShift => Self::LogicalRightShift,
RawToken::RightShift => Self::RightShift,
RawToken::Less => Self::Less,
RawToken::LessEqual => Self::LessEqual,
RawToken::Greater => Self::Greater,
RawToken::GreaterEqual => Self::GreaterEqual,
RawToken::Equal => Self::Equal,
RawToken::NotEqual => Self::NotEqual,
RawToken::ApproximatelyEqual => Self::ApproximatelyEqual,
RawToken::BitwiseAnd => Self::BitwiseAnd,
RawToken::BitwiseOr => Self::BitwiseOr,
RawToken::BitwiseXor => Self::BitwiseXor,
RawToken::LogicalAnd => Self::LogicalAnd,
RawToken::LogicalXor => Self::LogicalXor,
RawToken::LogicalOr => Self::LogicalOr,
RawToken::Assign => Self::Assign,
RawToken::MultiplyAssign => Self::MultiplyAssign,
RawToken::DivideAssign => Self::DivideAssign,
RawToken::ModuloAssign => Self::ModuloAssign,
RawToken::PlusAssign => Self::PlusAssign,
RawToken::MinusAssign => Self::MinusAssign,
RawToken::ConcatAssign => Self::ConcatAssign,
RawToken::ConcatSpaceAssign => Self::ConcatSpaceAssign,
RawToken::LeftParenthesis => Self::LeftParenthesis,
RawToken::RightParenthesis => Self::RightParenthesis,
RawToken::RightBrace => Self::RightBrace,
RawToken::LeftBracket => Self::LeftBracket,
RawToken::RightBracket => Self::RightBracket,
RawToken::Semicolon => Self::Semicolon,
RawToken::Comma => Self::Comma,
RawToken::Period => Self::Period,
RawToken::Colon => Self::Colon,
RawToken::Hash => Self::Hash,
RawToken::Question => Self::Question,
RawToken::LineComment => Self::LineComment,
RawToken::BlockComment => Self::BlockComment,
RawToken::Newline => Self::Newline,
RawToken::Whitespace => Self::Whitespace,
RawToken::Error => Self::Error,
}
}
}
impl Token {
/// Returns `true` if this token is a newline.
#[must_use]
pub const fn is_newline(&self) -> bool {
matches!(self, Self::Newline)
}
/// Returns `true` if this token is trivia whitespace.
///
/// Note: comments are **not** considered whitespace.
#[must_use]
pub const fn is_whitespace(&self) -> bool {
matches!(self, Self::Whitespace | Self::Newline)
}
/// Returns `true` if this token may span multiple physical lines
/// (i.e. can contain newline characters).
#[must_use]
pub const fn can_span_lines(&self) -> bool {
matches!(self, Self::BlockComment | Self::CppBlock | Self::Error)
}
/// Returns `true` if this token can appear in type position
/// (either a built-in type keyword or an identifier).
#[must_use]
pub fn is_valid_type_name(&self) -> bool {
let Self::Keyword(keyword) = self else {
return *self == Self::Identifier;
};
keyword.is_valid_type_name()
}
/// Returns `true` if this token can be used as an identifier.
///
/// This includes [`Token::Identifier`] and certain keywords that
/// `UnrealScript` also accepts in identifier position.
#[must_use]
pub fn is_valid_identifier_name(&self) -> bool {
if *self == Self::Identifier {
return true;
}
if let Self::Keyword(keyword) = self {
return keyword.is_valid_identifier_name();
}
false
}
/// Returns `true` if this token can be used as function's modifier.
#[must_use]
pub const fn is_valid_function_modifier(&self) -> bool {
let Self::Keyword(keyword) = self else {
return false;
};
matches!(
keyword,
Keyword::Final
| Keyword::Native
| Keyword::Abstract
| Keyword::Transient
| Keyword::Public
| Keyword::Protected
| Keyword::Private
| Keyword::Static
| Keyword::Const
| Keyword::Deprecated
| Keyword::NoExport
| Keyword::Export
| Keyword::Simulated
| Keyword::Latent
| Keyword::Iterator
| Keyword::Singular
| Keyword::Reliable
| Keyword::Unreliable
| Keyword::NativeReplication
| Keyword::PreOperator
| Keyword::Operator
| Keyword::PostOperator
| Keyword::Config
| Keyword::Exec
)
}
}
/// Reserved words of Fermented `UnrealScript`.
///
/// These are represented in [`Token`] as [`Token::Keyword`].
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum Keyword {
// C++ blocks
CppText,
CppStruct,
// Top-level declaration
Class,
Struct,
Enum,
State,
Auto,
Function,
Event,
Delegate,
Var,
Local,
// Class modifiers
Extends,
DependsOn,
// Access modifiers
Private,
Protected,
Public,
Const,
// Meta data / specifiers
Static,
Native,
Abstract,
Deprecated,
SafeReplace,
ExportStructs,
Input,
Final,
Default,
DefaultProperties,
Object,
Begin,
End,
Optional,
Config,
PerObjectConfig,
GlobalConfig,
CollapseCategories,
DontCollapseCategories,
HideCategories,
ShowCategories,
Localized,
Placeable,
NotPlaceable,
Instanced,
EditConst,
EditConstArray,
EditInline,
EditInlineUse,
EditInlineNew,
NotEditInlineNew,
EdFindable,
EditInlineNotify,
ParseConfig,
Automated,
DynamicRecompile,
Transient,
Long,
Operator,
PreOperator,
PostOperator,
Simulated,
Exec,
Latent,
Iterator,
Out,
Skip,
Singular,
Coerce,
Assert,
Ignores,
Within,
Init,
Export,
NoExport,
HideDropdown,
Travel,
Cache,
CacheExempt,
// Replication
Reliable,
Unreliable,
Replication,
NativeReplication,
// Control flow
Goto,
If,
Else,
Switch,
Case,
For,
ForEach,
While,
Do,
Until,
Break,
Continue,
Return,
// Built-in types
Int,
Float,
Bool,
Byte,
String,
Array,
Name,
// Literals
True,
False,
None,
SelfValue,
New,
// Vector math operators
Dot,
Cross,
ClockwiseFrom,
}
impl Keyword {
/// Returns `true` if this keyword can be used as an identifier.
#[must_use]
pub const fn is_valid_identifier_name(self) -> bool {
matches!(
self,
// Built-in type words usable as identifiers
Self::Name
| Self::String
| Self::Byte
| Self::Int
| Self::Bool
| Self::Float
| Self::Array
| Self::Delegate
// Context keywords we've directly checked
| Self::Class
| Self::SelfValue
| Self::Default
| Self::Static
| Self::Simulated
| Self::Native
| Self::Latent
| Self::Iterator
| Self::Singular
| Self::Reliable
| Self::Unreliable
| Self::Transient
| Self::Const
| Self::Abstract
| Self::New
| Self::Extends
| Self::Within
| Self::Config
| Self::Out
| Self::Optional
| Self::Local
| Self::Var
| Self::DefaultProperties
| Self::PerObjectConfig
| Self::Object
| Self::Enum
| Self::End
| Self::Event
| Self::Switch
| Self::Goto
| Self::Cross
| Self::CppText
| Self::CppStruct
| Self::HideCategories
| Self::Auto
| Self::For
| Self::Skip
| Self::Placeable
| Self::NotPlaceable
| Self::Instanced
| Self::Function
| Self::State
| Self::Init
| Self::Export
| Self::NoExport
| Self::Dot
| Self::ClockwiseFrom
| Self::Assert
| Self::ExportStructs
| Self::SafeReplace
| Self::Input
| Self::Travel
| Self::Cache
| Self::CacheExempt
| Self::Long
| Self::Continue
)
}
/// Returns `true` if this keyword can appear in type position.
#[must_use]
pub const fn is_valid_type_name(self) -> bool {
matches!(
self,
Self::Int
| Self::Float
| Self::Bool
| Self::Byte
| Self::String
| Self::Array
| Self::Name
| Self::Object
| Self::Function
| Self::State
| Self::Delegate
)
}
}

View File

@ -1,48 +1,45 @@
//! Cursor utilities for a token stream. //! Cursor utilities for a token stream.
//! //!
//! Provides memoized lookahead over significant tokens and attaches //! Provides memoized lookahead over significant tokens and records trivia in
//! trivia to [`TriviaComponent`]. Significant tokens exclude whitespace and //! [`TriviaIndexBuilder`]. Significant tokens exclude whitespace and comments;
//! comments; see [`crate::parser::TriviaKind`]. //! see [`parser::TriviaKind`].
use crate::lexer::{Token, TokenLocation}; use std::collections::VecDeque;
use crate::parser::trivia::TriviaComponent;
use crate::{
ast::AstSpan,
lexer::{self, Keyword, Token, TokenPosition},
parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder},
};
/// Cursor over a token stream with memoized lookahead and trivia attachment. /// Cursor over a token stream with memoized lookahead and trivia attachment.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub(crate) struct CursorComponent<'src> { pub(crate) struct Cursor<'file, 'src> {
/// Underlying token stream. tokens: lexer::Tokens<'file, 'src>,
tokens: crate::lexer::Tokens<'src>, lookahead_buffer: VecDeque<(TokenPosition, lexer::TokenData<'src>)>,
/// Significant-token lookahead buffer. last_consumed_position: Option<TokenPosition>,
lookahead_buffer: std::collections::VecDeque<(TokenLocation, crate::lexer::TokenPiece<'src>)>,
/// Location of the last consumed token.
previous_location: Option<TokenLocation>,
/// Location of the last significant token.
///
/// Used to associate following trivia with the correct token.
last_significant_location: Option<TokenLocation>,
/// Scratch space for [`CursorComponent::buffer_next_significant_token`],
/// used to avoid reallocations.
trivia_buffer: Vec<crate::parser::trivia::TriviaToken<'src>>,
} }
impl<'src> CursorComponent<'src> { impl<'file, 'src> Cursor<'file, 'src> {
/// Create a [`CursorComponent`] over the tokens of `file`. /// Creates a [`Cursor`] over `tokenized_file`.
pub(crate) fn new(tokenized_file: &'src crate::lexer::TokenizedFile<'src>) -> Self { pub(crate) const fn new(tokenized_file: &'file lexer::TokenizedFile<'src>) -> Self {
Self { Self {
tokens: tokenized_file.tokens(), tokens: tokenized_file.iter(),
lookahead_buffer: std::collections::VecDeque::new(), lookahead_buffer: VecDeque::new(),
previous_location: None, last_consumed_position: None,
last_significant_location: None,
trivia_buffer: Vec::new(),
} }
} }
/// Ensure the lookahead buffer contains at least `lookahead + 1` /// Ensures that the lookahead buffer contains at least `lookahead + 1`
/// significant tokens. /// significant tokens, if available.
/// ///
/// May consume trivia from the underlying stream. /// May consume trivia from the underlying stream without consuming
/// Does not consume significant tokens. /// significant tokens.
fn ensure_min_lookahead(&mut self, lookahead: usize, trivia: &mut TriviaComponent<'src>) { fn ensure_lookahead_available(
&mut self,
lookahead: usize,
trivia: &mut TriviaIndexBuilder<'src>,
) {
while self.lookahead_buffer.len() <= lookahead { while self.lookahead_buffer.len() <= lookahead {
if !self.buffer_next_significant_token(trivia) { if !self.buffer_next_significant_token(trivia) {
break; break;
@ -50,181 +47,320 @@ impl<'src> CursorComponent<'src> {
} }
} }
/// Scan to the next significant token, recording intervening trivia. /// Buffers the next significant token and records any preceding trivia.
/// ///
/// Returns `true` if a significant token was buffered, /// Returns `true` if a significant token was buffered, or `false` if the
/// `false` on end of file. /// stream is exhausted.
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaComponent<'src>) -> bool { fn buffer_next_significant_token(&mut self, trivia: &mut TriviaIndexBuilder<'src>) -> bool {
self.trivia_buffer.clear(); for (token_position, token_data) in self.tokens.by_ref() {
while let Some((token_location, token_piece)) = self.tokens.next() { if let Ok(trivia_kind) = parser::TriviaKind::try_from(token_data.token) {
if let Ok(trivia_kind) = crate::parser::TriviaKind::try_from(token_piece.token) { trivia.record_trivia(parser::TriviaToken {
self.trivia_buffer.push(crate::parser::TriviaToken {
kind: trivia_kind, kind: trivia_kind,
text: token_piece.lexeme, text: token_data.lexeme,
location: token_location, position: token_position,
}); });
} else { } else {
// Attach trivia found after the previous significant token trivia.record_significant_token(token_position);
if !self.trivia_buffer.is_empty() {
trivia.record_between_locations(
self.last_significant_location,
token_location,
&mut self.trivia_buffer,
);
}
self.lookahead_buffer self.lookahead_buffer
.push_back((token_location, token_piece)); .push_back((token_position, token_data));
self.last_significant_location = Some(token_location);
return true; return true;
} }
} }
// Reached end-of-file: attach trailing trivia
if !self.trivia_buffer.is_empty() {
trivia.record_between_locations(
self.last_significant_location,
TokenLocation::EndOfFile,
&mut self.trivia_buffer,
);
}
false false
} }
} }
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { impl<'src, 'arena> Parser<'src, 'arena> {
/// Returns the next token without consuming it. fn peek_buffered_token(&mut self) -> Option<&(TokenPosition, lexer::TokenData<'src>)> {
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
self.cursor.lookahead_buffer.front()
}
/// Returns the next significant token without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
/// ///
/// Returns [`None`] if no tokens remain. /// Returns [`None`] if no tokens remain.
#[must_use] #[must_use]
pub(crate) fn peek_token(&mut self) -> Option<Token> { pub(crate) fn peek_token(&mut self) -> Option<Token> {
self.peek_entry().map(|(_, token_piece)| token_piece.token) self.peek_buffered_token()
.map(|(_, token_data)| token_data.token)
} }
/// Returns the next token, its lexeme, and its location /// Returns the next keyword without consuming it.
/// without consuming it. ///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain or if the next token is not
/// a keyword.
#[must_use]
pub(crate) fn peek_keyword(&mut self) -> Option<Keyword> {
match self.peek_token() {
Some(Token::Keyword(keyword)) => Some(keyword),
_ => None,
}
}
/// Returns the position of the next significant token without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
/// ///
/// Returns [`None`] if no tokens remain. /// Returns [`None`] if no tokens remain.
#[must_use] #[must_use]
pub(crate) fn peek_token_lexeme_and_location( pub(crate) fn peek_position(&mut self) -> Option<TokenPosition> {
&mut self, self.peek_buffered_token()
) -> Option<(Token, &'src str, TokenLocation)> { .map(|(token_position, _)| *token_position)
self.peek_entry().map(|(token_location, token_piece)| {
(token_piece.token, token_piece.lexeme, *token_location)
})
} }
/// Returns the next token and its lexeme without consuming it. /// Returns the next significant token and its lexeme without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
/// ///
/// Returns [`None`] if no tokens remain. /// Returns [`None`] if no tokens remain.
#[must_use] #[must_use]
pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> { pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> {
self.peek_entry() self.peek_buffered_token()
.map(|(_, token_piece)| (token_piece.token, token_piece.lexeme)) .map(|(_, token_data)| (token_data.token, token_data.lexeme))
} }
/// Returns the next token and its location without consuming it. /// Returns the next significant token and its position without consuming
/// it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
/// ///
/// Returns [`None`] if no tokens remain. /// Returns [`None`] if no tokens remain.
#[must_use] #[must_use]
pub(crate) fn peek_token_and_location(&mut self) -> Option<(Token, TokenLocation)> { pub(crate) fn peek_token_and_position(&mut self) -> Option<(Token, TokenPosition)> {
self.peek_entry() self.peek_buffered_token()
.map(|(token_location, token_piece)| (token_piece.token, *token_location)) .map(|(token_position, token_data)| (token_data.token, *token_position))
} }
/// Returns the location of the next token, or [`TokenLocation::EndOfFile`] /// Returns the next keyword and its position without consuming it.
/// if none remain.
#[must_use]
pub(crate) fn peek_location(&mut self) -> TokenLocation {
self.peek_entry()
.map(|(token_location, _)| *token_location)
.unwrap_or(TokenLocation::EndOfFile)
}
/// Returns the location of the last token that was actually consumed
/// by [`crate::parser::Parser::advance`].
/// ///
/// Returns [`None`] if no tokens have been consumed yet. /// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if next token isn't keyword or no tokens remain.
#[must_use] #[must_use]
pub(crate) fn last_consumed_location(&self) -> Option<TokenLocation> { pub(crate) fn peek_keyword_and_position(&mut self) -> Option<(Keyword, TokenPosition)> {
self.cursor.previous_location let Some((Token::Keyword(keyword), keyword_position)) = self.peek_token_and_position()
else {
return None;
};
Some((keyword, keyword_position))
} }
/// Returns the most recent location the parser is "at". /// Returns the next significant token, its lexeme, and its position
/// without consuming them.
/// ///
/// If at least one token has been consumed, this is the location of the /// May buffer additional tokens and record skipped trivia, but does not
/// last consumed token. Otherwise it falls back to the location of the /// consume any significant token.
/// first significant token in the stream (or [`TokenLocation::EndOfFile`] ///
/// if the stream is empty). /// Returns [`None`] if no tokens remain.
#[must_use] #[must_use]
pub(crate) fn last_visited_location(&mut self) -> TokenLocation { pub(crate) fn peek_token_lexeme_and_position(
// Only has to `unwrap` before *any* characters were consumed &mut self,
self.last_consumed_location() ) -> Option<(Token, &'src str, TokenPosition)> {
.unwrap_or_else(|| self.peek_location()) self.peek_buffered_token()
.map(|(token_position, token_data)| {
(token_data.token, token_data.lexeme, *token_position)
})
} }
/// Peeks the token at `lookahead` (`0` is the next token) /// Returns the next significant token at `lookahead` without consuming it.
/// without consuming.
/// ///
/// Returns `None` if the stream ends before that position. /// `lookahead` counts significant tokens, with `0` referring to the next
/// significant token.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use] #[must_use]
pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> { pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> {
self.cursor self.cursor
.ensure_min_lookahead(lookahead, &mut self.trivia); .ensure_lookahead_available(lookahead, &mut self.trivia);
self.cursor self.cursor
.lookahead_buffer .lookahead_buffer
.get(lookahead) .get(lookahead)
.map(|(_, token_piece)| token_piece.token) .map(|(_, token_data)| token_data.token)
}
/// Returns the keyword at `lookahead` without consuming it.
///
/// `lookahead` counts significant tokens, with `0` referring to the next
/// significant token.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if the token at that position is not a keyword or if
/// the stream ends before that position.
#[must_use]
pub(crate) fn peek_keyword_at(&mut self, lookahead: usize) -> Option<Keyword> {
match self.peek_token_at(lookahead) {
Some(Token::Keyword(keyword)) => Some(keyword),
_ => None,
}
}
/// Returns the position of the next significant token without consuming it.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
self.peek_position()
.ok_or_else(|| self.make_error_here(error_kind))
}
/// Returns the next significant token and its position without consuming
/// it.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_token_and_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, (Token, TokenPosition)> {
self.peek_token_and_position()
.ok_or_else(|| self.make_error_here(error_kind))
}
/// Returns the next significant token, its lexeme, and its position
/// without consuming them.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_token_lexeme_and_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> {
self.peek_token_lexeme_and_position()
.ok_or_else(|| self.make_error_here(error_kind))
} }
/// Advances by one significant token. /// Advances by one significant token.
/// ///
/// Trivia is internally handled and recorded. /// Records any skipped trivia and returns the consumed token position.
/// Does nothing at the end-of-file. /// Returns [`None`] if no significant tokens remain.
pub(crate) fn advance(&mut self) { pub(crate) fn advance(&mut self) -> Option<TokenPosition> {
self.cursor.ensure_min_lookahead(0, &mut self.trivia); self.cursor.ensure_lookahead_available(0, &mut self.trivia);
if let Some((location, _)) = self.cursor.lookahead_buffer.pop_front() { if let Some((token_position, _)) = self.cursor.lookahead_buffer.pop_front() {
self.cursor.previous_location = Some(location); self.cursor.last_consumed_position = Some(token_position);
Some(token_position)
} else {
None
} }
} }
/// If the next token equals `token`, consumes it and returns `true`. /// If the next significant token equals `token`, consumes it and
/// returns `true`.
/// ///
/// Otherwise leaves the cursor unchanged and returns `false`. /// Otherwise leaves the cursor unchanged and returns `false`.
/// Trivia is recorded automatically. #[must_use]
pub(crate) fn eat(&mut self, token: Token) -> bool { pub(crate) fn eat(&mut self, token: Token) -> bool {
let correct_token = self.peek_token() == Some(token); if self.peek_token() == Some(token) {
if correct_token {
self.advance(); self.advance();
true
} else {
false
} }
correct_token
} }
/// Centralized peek used by public peekers. /// If the next significant token corresponds to the given keyword,
fn peek_entry(&mut self) -> Option<&(TokenLocation, crate::lexer::TokenPiece<'src>)> { /// consumes it and returns `true`.
self.cursor.ensure_min_lookahead(0, &mut self.trivia);
self.cursor.lookahead_buffer.front()
}
/// Expects `expected` at the current position.
/// ///
/// On match consumes the token and returns its [`TokenLocation`]. /// Otherwise leaves the cursor unchanged and returns `false`.
/// Otherwise returns a [`crate::parser::ParseError`] of #[must_use]
/// the given [`crate::parser::ParseErrorKind`] that carries the current pub(crate) fn eat_keyword(&mut self, keyword: Keyword) -> bool {
/// span for diagnostics. self.eat(Token::Keyword(keyword))
}
/// Expects `expected` token as the next significant one.
///
/// On match consumes the token and returns its [`TokenPosition`].
/// Otherwise returns an error of `error_kind` anchored at
/// the current token, or at the last consumed token if the stream is
/// exhausted. That error also gets set a blame span that contains exactly
/// that anchor point.
pub(crate) fn expect( pub(crate) fn expect(
&mut self, &mut self,
expected: Token, expected: Token,
error_kind: crate::parser::ParseErrorKind, error_kind: parser::ParseErrorKind,
) -> crate::parser::ParseResult<'src, 'arena, TokenLocation> { ) -> ParseResult<'src, 'arena, TokenPosition> {
let token_position = self.peek_location(); // Anchors EOF diagnostics at the last consumed token
// `Token` only includes type information, so comparison is valid // when no current token exists.
let anchor = self
.peek_position()
.unwrap_or_else(|| self.last_consumed_position_or_start());
// `Token` equality is enough here because lexeme and position
// are stored separately.
if self.peek_token() == Some(expected) { if self.peek_token() == Some(expected) {
self.advance(); self.advance();
Ok(token_position) Ok(anchor)
} else { } else {
Err(crate::parser::ParseError { Err(self
kind: error_kind, .make_error_at(error_kind, anchor)
source_span: crate::ast::AstSpan::new(token_position), .blame(AstSpan::new(anchor)))
}) }
}
/// Expects `expected` keyword as the next significant token.
///
/// On match consumes the keyword and returns its [`TokenPosition`].
/// Otherwise returns an error of `error_kind` anchored at the current
/// token, or at the last consumed token if the stream is exhausted.
pub(crate) fn expect_keyword(
&mut self,
expected: Keyword,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
self.expect(Token::Keyword(expected), error_kind)
}
/// Returns position of the last significant token that was actually
/// consumed by [`parser::Parser::advance`].
///
/// Returns [`None`] if no tokens have been consumed yet.
#[must_use]
pub(crate) const fn last_consumed_position(&self) -> Option<TokenPosition> {
self.cursor.last_consumed_position
}
/// Returns the position of the last significant token consumed by
/// [`parser::Parser::advance`], or the start of the stream if no token has
/// been consumed yet.
///
/// Useful when diagnostics need a stable anchor even at the beginning of
/// input.
#[must_use]
pub(crate) fn last_consumed_position_or_start(&self) -> TokenPosition {
self.cursor
.last_consumed_position
.unwrap_or(TokenPosition(0))
}
/// Ensures that parsing has advanced past `old_position`.
///
/// This is intended as a safeguard against infinite-loop bugs while
/// recovering from invalid input. In debug builds it asserts that progress
/// was made; in release builds it consumes one significant token when
/// the parser stalls.
#[track_caller]
pub(crate) fn ensure_forward_progress(&mut self, old_position: TokenPosition) {
if let Some(peeked_position) = self.peek_position() {
debug_assert!(
peeked_position > old_position,
"parser made no forward progress"
);
if peeked_position <= old_position {
self.advance();
}
} }
} }
} }

View File

@ -1,6 +1,6 @@
//! Submodule with parsing related errors. //! Submodule with parsing related errors.
use crate::ast::AstSpan; use crate::{ast::AstSpan, lexer::TokenPosition};
/// Internal parse error kinds. /// Internal parse error kinds.
/// ///
@ -14,13 +14,89 @@ use crate::ast::AstSpan;
/// `UnexpectedToken`, `MultipleDefaults`, etc.). /// `UnexpectedToken`, `MultipleDefaults`, etc.).
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum ParseErrorKind { pub enum ParseErrorKind {
// ================== New errors that are 100% used! ==================
// headline: empty parenthesized expression
// primary label on ): expected an expression before this \)'`
// secondary label on (: parenthesized expression starts here
// Remove the parentheses or put an expression inside them.
ParenthesizedExpressionEmpty {
left_parenthesis_position: TokenPosition,
},
// headline: missing type argument in \class<...>``
// primary label on > or insertion site: expected a type name here
// secondary label on < or on class: type argument list starts here
// help: Write a type name, for example \class<Pawn>`.`
ClassTypeMissingTypeArgument {
left_angle_bracket_position: TokenPosition,
},
// headline: missing closing \>` in `class<...>``
// primary label on offending following token or EOF: expected \>` before this token` or at EOF: expected \>` here`
// secondary label on <: this \<` starts the type argument`
// help: Add \>` to close the class type expression.`
ClassTypeMissingClosingAngleBracket {
left_angle_bracket_position: TokenPosition,
},
// headline: missing closing \)'`
// primary label on the point where ) was expected: expected \)' here` or, if you have a real token there, expected \)' before this token`
// secondary label on the opening (: this \(` starts the parenthesized expression`
// help: Add \)' to close the expression.`
ParenthesizedExpressionMissingClosingParenthesis {
left_parenthesis_position: TokenPosition,
},
// headline: expected expression
// primary label: this token cannot start an expression
// optional help: Expressions can start with literals, identifiers, \(`, `{`, or expression keywords.`
ExpressionExpected,
// headline: invalid type argument in \class<...>``
// primary label on the bad token inside the angle brackets: expected a qualified type name here
// secondary label on class or <: while parsing this class type expression
// note: Only a type name is accepted between \<` and `>` here.`
ClassTypeInvalidTypeArgument {
left_angle_bracket_position: TokenPosition,
},
// headline: too many arguments in \new(...)``
// primary label on the fourth argument, or on the comma before it if that is easier: unexpected extra argument
// secondary label on the opening (: this argument list accepts at most three arguments
// note: The three slots are \outer`, `name`, and `flags`.`
// help: Remove the extra argument.
NewTooManyArguments {
left_parenthesis_position: TokenPosition,
},
// headline: missing closing \)' in `new(...)``
// primary label: expected \)' here`
// secondary label on the opening (: this argument list starts here
// help: Add \)' to close the argument list.`
NewMissingClosingParenthesis {
left_parenthesis_position: TokenPosition,
},
// missing class specifier in \new` expression`
// Primary label on the first token where a class specifier should have started: expected a class specifier here
// Secondary label on new: \new` expression starts here` If there was an argument list, an additional secondary on ( is also reasonable: optional \new(...)` arguments end here`
// Help: Add the class or expression to instantiate after \new` or `new(...)`.`
NewMissingClassSpecifier {
new_keyword_position: TokenPosition,
},
// ================== Old errors to be thrown away! ==================
/// Expression inside `(...)` could not be parsed and no closing `)` /// Expression inside `(...)` could not be parsed and no closing `)`
/// was found. /// was found.
ExpressionMissingClosingParenthesis, FunctionCallMissingClosingParenthesis,
/// A `do` block was not followed by a matching `until`. /// A `do` block was not followed by a matching `until`.
DoMissingUntil, DoMissingUntil,
/// Found an unexpected token while parsing an expression. /// Found an unexpected token while parsing an expression.
ExpressionUnexpectedToken, ExpressionUnexpectedToken,
DeclEmptyVariableDeclarations,
DeclNoSeparatorBetweenVariableDeclarations,
DeclExpectedRightBracketAfterArraySize,
DeclExpectedCommaAfterVariableDeclarator,
TypeSpecExpectedType,
TypeSpecInvalidNamedTypeName,
TypeSpecArrayMissingOpeningAngle,
TypeSpecArrayMissingInnerType,
TypeSpecArrayMissingClosingAngle,
TypeSpecClassMissingInnerType,
TypeSpecClassMissingClosingAngle,
/// A `for` loop is missing its opening `(`. /// A `for` loop is missing its opening `(`.
ForMissingOpeningParenthesis, ForMissingOpeningParenthesis,
/// The first `;` in `for (init; cond; step)` is missing. /// The first `;` in `for (init; cond; step)` is missing.
@ -33,6 +109,7 @@ pub enum ParseErrorKind {
BlockMissingSemicolonAfterExpression, BlockMissingSemicolonAfterExpression,
/// A statement inside a block is not terminated with `;`. /// A statement inside a block is not terminated with `;`.
BlockMissingSemicolonAfterStatement, BlockMissingSemicolonAfterStatement,
BlockMissingClosingBrace,
/// `switch` has no body (missing matching braces). /// `switch` has no body (missing matching braces).
SwitchMissingBody, SwitchMissingBody,
/// The first top-level item in a `switch` body is not a `case`. /// The first top-level item in a `switch` body is not a `case`.
@ -43,6 +120,7 @@ pub enum ParseErrorKind {
SwitchDuplicateDefault, SwitchDuplicateDefault,
/// Found `case` arms after a `default` branch. /// Found `case` arms after a `default` branch.
SwitchCasesAfterDefault, SwitchCasesAfterDefault,
SwitchMissingClosingBrace,
/// A `goto` was not followed by a label. /// A `goto` was not followed by a label.
GotoMissingLabel, GotoMissingLabel,
/// Unexpected end of input while parsing. /// Unexpected end of input while parsing.
@ -70,6 +148,189 @@ pub enum ParseErrorKind {
DeclMissingIdentifier, DeclMissingIdentifier,
/// Invalid variable name identifier in non-`local` variable definition. /// Invalid variable name identifier in non-`local` variable definition.
DeclBadVariableIdentifier, DeclBadVariableIdentifier,
/// Found an unexpected token while parsing a declaration literal.
///
/// Expected one of: integer, float, string, `true`, `false`, `none`
/// or an identifier.
DeclarationLiteralUnexpectedToken,
/// A class name was expected, but the current token is not an identifier.
///
/// Emitted when parsing `class Foo` and the token after `class` is not an
/// identifier (so its string value cannot be extracted).
ClassNameNotIdentifier,
/// A parent class name after `extends` was expected, but the token is not
/// an identifier.
///
/// Emitted when parsing `class Foo extends Bar` and the token after
/// `extends` is not an identifier.
ClassParentNameNotIdentifier,
/// A class declaration was not terminated with `;`.
///
/// Emitted when the parser reaches the end of a class definition but
/// does not encounter the required semicolon.
ClassMissingSemicolon,
/// An identifier was expected inside optional parentheses, but the token
/// is not an identifier.
///
/// Emitted by helpers that parse either `(<Ident>)` or bare `<Ident>`.
ParenthesisedIdentifierNameNotIdentifier,
/// A `(` was seen before an identifier, but the matching `)` was not found.
///
/// Emitted when parsing a parenthesised identifier like `(Foo)`.
ParenthesisedIdentifierMissingClosingParenthesis,
/// `HideCategories` is missing the opening `(` before the category list.
///
/// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`.
HideCategoriesMissingOpeningParenthesis,
/// `HideCategories` is missing the closing `)` after the category list.
HideCategoriesMissingClosingParenthesis,
/// `HideCategories` is missing the opening `(` before the category list.
///
/// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`.
ShowCategoriesMissingOpeningParenthesis,
/// `HideCategories` is missing the closing `)` after the category list.
ShowCategoriesMissingClosingParenthesis,
/// `Within` must be followed by a class or package name identifier.
///
/// Example: `Within(MyOuterClass)`.
WithinNameNotIdentifier,
/// `operator` modifier is missing the opening `(` before
/// the precedence rank.
///
/// Expected syntax: `operator(<integer>)`.
OperatorMissingOpeningParenthesis,
/// `operator(<...>)` must contain an integer literal precedence rank.
///
/// Emitted when the token inside parentheses is not an integer literal.
OperatorPrecedenceNotIntegerLiteral,
/// `operator(<integer>` is missing the closing `)`.
OperatorMissingClosingParenthesis,
ParamInvalidTypeName,
ParamMissingIdentifier,
FunctionReturnTypeNotTypeName,
FunctionNameNotIdentifier,
FunctionParamsMissingOpeningParenthesis,
FunctionParamsMissingClosingParenthesis,
ClassUnexpectedItem,
EnumMissingLeftBrace,
EnumBadVariant,
StructFieldMissingName,
StructFieldMissingSemicolon,
StructMissingRightBrace,
// Named enum/struct typedefs
EnumMissingKeyword, // class member: expected `enum`
EnumExpectedNameOrBrace, // after `enum`, expected identifier
EnumNoClosingBrace,
EnumEmptyVariants,
EnumNoSeparatorBetweenVariants,
EnumMissingLBrace,
StructMissingKeyword, // class member: expected `struct`
StructExpectedNameOrBrace, // after `struct`, expected identifier
StructExpectedExtendsOrBrace,
StructMissingLeftBrace,
StructExpectedBaseName,
StructBodyUnexpectedItem,
CppDirectiveMissingCppBlock,
// var(...) field decls
VarMissingKeyword, // class member: expected `var`
VarSpecsMissingOpeningParenthesis, // after `var`, expected '('
VarSpecNotIdentifier, // inside var(...), expected identifier
VarSpecsMissingClosingParenthesis, // var(...) missing ')'
// Generic decl end
DeclMissingSemicolon, // class-level declaration missing `;`
// --- Replication ---
ReplicationMissingReliability,
ReplicationIfMissingOpeningParenthesis,
ReplicationIfMissingClosingParenthesis,
ReplicationMemberNotIdentifier,
ReplicationMemberMissingClosingParenthesis,
ReplicationRuleMissingSemicolon,
ReplicationMissingKeyword,
ReplicationMissingLBrace,
ReplicationMissingRBrace,
// --- DefaultProperties ---
DefaultPropPathExpectedIdentifier,
DefaultPropIndexNotIntegerLiteral,
DefaultPropIndexMissingClosingParenthesis,
DefaultPropAssignMissingEq,
DefaultPropsMissingKeyword,
DefaultPropsMissingLBrace,
DefaultPropsMissingRBrace,
// --- Begin/End Object headers ---
ObjectBeginMissingKeyword,
ObjectMissingKeyword,
ObjectHeaderKeyNotIdentifier,
ObjectHeaderMissingEq,
// --- State / ignores ---
IgnoresItemNotIdentifier,
IgnoresMissingSemicolon,
StateMissingKeyword,
StateNameNotIdentifier,
StateParentNameNotIdentifier,
StateMissingLBrace,
StateMissingRBrace,
ClassMissingKeyword,
TypeMissingLT,
TypeMissingGT,
StateParensMissingRParen,
BadTypeInClassTypeDeclaration,
IdentifierExpected,
// --- Generic list diagnostics (comma-separated, closed by `)`) ---
/// Saw `)` immediately after `(`, or closed the list without any items.
/// Use when a construct requires at least one item: e.g. `HideCategories(...)`.
ListEmpty,
/// Parser was positioned where an item was required but found neither an
/// item nor a terminator. Typical triggers:
/// - Leading comma: `(, Foo)`
/// - Double comma: `(Foo,, Bar)`
/// - Garbage in place of an item: `(@@, Foo)`
///
/// Recovery: skip to next comma or `)`.
ListMissingIdentifierBeforeSeparator,
/// Parser was positioned where an item was required but found neither an
/// item nor a terminator. Typical triggers:
/// - Leading comma: `(, Foo)`
/// - Double comma: `(Foo,, Bar)`
/// - Garbage in place of an item: `(@@, Foo)`
///
/// Recovery: skip to next comma or `)`.
ListInvalidIdentifier,
/// Two items without a comma (or some token after an item where a comma
/// was required). Typical triggers:
/// - Adjacent identifiers: `(Foo Bar)`
/// - Token after an item where only `,` or `)` are valid.
///
/// Recovery: behave as if a comma were present; continue with the next item.
ListMissingSeparator,
/// Comma directly before `)`: `(Foo, )`.
/// Treat as a soft error or warning, depending on your policy.
ListTrailingSeparator,
FunctionArgumentMissingComma,
// Expression was required, but none started
MissingExpression,
MissingBranchBody,
CallableExpectedHeader,
CallableExpectedKind,
CallableOperatorInvalidPrecedence,
CallableMissingBodyOrSemicolon,
CallableNameNotIdentifier,
CallablePrefixOperatorInvalidSymbol,
CallableInfixOperatorInvalidSymbol,
CallablePostfixOperatorInvalidSymbol,
CallableParamsMissingOpeningParenthesis,
CallableParamsMissingClosingParenthesis,
NativeModifierIdNotIntegerLiteral,
} }
/// Enumerates all specific kinds of parsing errors that the parser can emit. /// Enumerates all specific kinds of parsing errors that the parser can emit.
@ -78,18 +339,32 @@ pub enum ParseErrorKind {
pub struct ParseError { pub struct ParseError {
/// The specific kind of parse error that occurred. /// The specific kind of parse error that occurred.
pub kind: ParseErrorKind, pub kind: ParseErrorKind,
pub anchor: TokenPosition,
/// Where the user should look first.
pub blame_span: AstSpan,
/// The source span in which the error was detected. /// The source span in which the error was detected.
pub source_span: AstSpan, pub covered_span: AstSpan,
pub related_span: Option<AstSpan>,
} }
pub type ParseResult<'src, 'arena, T> = Result<T, ParseError>; pub type ParseResult<'src, 'arena, T> = Result<T, ParseError>;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { impl crate::parser::Parser<'_, '_> {
#[must_use] pub(crate) fn make_error_here(&self, error_kind: ParseErrorKind) -> ParseError {
pub(crate) fn make_error_here(&mut self, error_kind: ParseErrorKind) -> ParseError { self.make_error_at(error_kind, self.last_consumed_position_or_start())
}
pub(crate) fn make_error_at(
&self,
error_kind: ParseErrorKind,
position: TokenPosition,
) -> ParseError {
ParseError { ParseError {
kind: error_kind, kind: error_kind,
source_span: AstSpan::new(self.peek_location()), anchor: position,
blame_span: AstSpan::new(position),
covered_span: AstSpan::new(position),
related_span: None,
} }
} }
} }

View File

@ -1,60 +0,0 @@
use crate::ast::Expression;
use crate::lexer::Token;
use crate::parser::ParseErrorKind;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a block `{ ... }` after `{`.
///
/// Consumes tokens until the matching `}` and returns
/// an [`Expression::Block`] spanning from the opening `{` to
/// the closing `}`.
/// Returns a best-effort block on premature end-of-file.
#[must_use]
pub(crate) fn parse_block_cont(
&mut self,
block_start_location: crate::lexer::TokenLocation,
) -> crate::ast::ExpressionRef<'src, 'arena> {
let mut statements = self.arena.vec();
let mut tail = None;
loop {
let Some((token, token_location)) = self.peek_token_and_location() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
return self.arena.alloc(
Expression::Block { statements, tail },
crate::ast::AstSpan {
from: block_start_location,
to: self.peek_location(),
},
);
};
if let Token::RightBrace = token {
self.advance(); // '}'
let block_span = crate::ast::AstSpan {
from: block_start_location,
to: token_location,
};
return self
.arena
.alloc(Expression::Block { statements, tail }, block_span);
}
// We know that at this point:
// 1. There is still a token and it is not end-of-file;
// 2. It isn't end of the block.
// So having a tail statement there is a problem!
if let Some(tail_expression) = tail {
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
let tail_span = *tail_expression.span();
let node = self.arena.alloc(
crate::ast::Statement::Expression(tail_expression),
tail_span,
);
statements.push(node);
}
tail = self.parse_block_item(&mut statements);
// Ensure forward progress under errors to avoid infinite loops.
if self.peek_location() <= token_location {
self.advance();
}
}
}
}

View File

@ -0,0 +1,959 @@
// rottlib/src/parser/grammar/class.rs
#![allow(clippy::all, clippy::pedantic, clippy::nursery)]
use crate::ast::{
AstSpan, BlockBody, ClassConstDecl, ClassConstDeclRef, ClassDeclaration, ClassDefinition,
ClassMember, ClassModifier, ClassModifierRef, ClassVarDecl, ClassVarDeclRef,
DeclarationLiteral, DeclarationLiteralRef, ExecDirective, ExecDirectiveRef, ExpressionRef,
IdentifierToken, Reliability, ReplicationBlock, ReplicationBlockRef, ReplicationRule,
ReplicationRuleRef, StateDecl, StateDeclRef, StateModifier, VariableDeclarator,
VariableDeclaratorRef,
};
use crate::lexer::{Keyword, Token, TokenPosition};
use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
#[inline]
pub fn ensure_progress_or_break(&mut self, before: TokenPosition) -> bool {
match self.peek_position() {
Some(position) if position > before => true,
_ => self.advance().is_some(),
}
}
fn parse_exec_directive(&mut self) -> ParseResult<'src, 'arena, ExecDirectiveRef<'arena>> {
let (token, lexeme, start_position) =
self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?;
debug_assert!(matches!(token, Token::ExecDirective));
let trimmed = lexeme.trim_end_matches(['\r', '\n']);
self.advance();
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
ExecDirective {
text: self.arena.string(trimmed),
span,
},
span,
))
}
fn parse_parenthesised_identifier(&mut self) -> ParseResult<'src, 'arena, IdentifierToken> {
let has_opening_parenthesis = self.eat(Token::LeftParenthesis);
let identifier =
self.parse_identifier(ParseErrorKind::ParenthesisedIdentifierNameNotIdentifier)?;
if has_opening_parenthesis {
self.expect(
Token::RightParenthesis,
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
}
Ok(identifier)
}
#[inline]
fn lookahead_state_after_mods(&mut self) -> bool {
let mut lookahead = 0;
loop {
match self.peek_keyword_at(lookahead) {
Some(Keyword::Auto | Keyword::Simulated) => {
lookahead += 1;
}
Some(Keyword::State) => return true,
_ => return false,
}
}
}
pub fn parse_array_len_expr(
&mut self,
) -> ParseResult<'src, 'arena, Option<ExpressionRef<'src, 'arena>>> {
if !self.eat(Token::LeftBracket) {
return Ok(None);
}
let expression = self.parse_expression();
self.expect(
Token::RightBracket,
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
left_parenthesis_position: self.last_consumed_position_or_start(),
},
)
.sync_error_at(self, SyncLevel::CloseBracket)?;
Ok(Some(expression))
}
pub fn parse_class_declaration_modifier(
&mut self,
) -> ParseResult<'src, 'arena, Option<ClassModifierRef<'arena>>> {
use ClassModifier::{
Abstract, CacheExempt, CollapseCategories, Config, Const, DependsOn, Deprecated,
DontCollapseCategories, DynamicRecompile, EditConst, EditInline, EditInlineNew, Export,
ExportStructs, Final, GlobalConfig, HideCategories, HideDropdown, Instanced, Localized,
Native, NativeReplication, NoExport, NotEditInlineNew, NotPlaceable, ParseConfig,
PerObjectConfig, Placeable, Private, Protected, Public, SafeReplace, ShowCategories,
Static, Transient, Within,
};
let Some((token, modifier_position)) = self.peek_token_and_position() else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
let mut consumed_inside_match = false;
let mut span = AstSpan::new(modifier_position);
let modifier = match token {
Token::Keyword(Keyword::Final) => Final,
Token::Keyword(Keyword::Native) => Native,
Token::Keyword(Keyword::Abstract) => Abstract,
Token::Keyword(Keyword::Transient) => Transient,
Token::Keyword(Keyword::Public) => Public,
Token::Keyword(Keyword::Protected) => Protected,
Token::Keyword(Keyword::Private) => Private,
Token::Keyword(Keyword::Static) => Static,
Token::Keyword(Keyword::Const) => Const,
Token::Keyword(Keyword::Deprecated) => Deprecated,
Token::Keyword(Keyword::NoExport) => NoExport,
Token::Keyword(Keyword::Export) => Export,
Token::Keyword(Keyword::Config) => {
self.advance();
consumed_inside_match = true;
let value = if self.peek_token() == Some(Token::LeftParenthesis) {
Some(self.parse_parenthesised_identifier()?)
} else {
None
};
Config(value)
}
Token::Keyword(Keyword::Localized) => Localized,
Token::Keyword(Keyword::Placeable) => Placeable,
Token::Keyword(Keyword::NotPlaceable) => NotPlaceable,
Token::Keyword(Keyword::Instanced) => Instanced,
Token::Keyword(Keyword::EditConst) => EditConst,
Token::Keyword(Keyword::EditInline) => EditInline,
Token::Keyword(Keyword::EditInlineNew) => EditInlineNew,
Token::Keyword(Keyword::NotEditInlineNew) => NotEditInlineNew,
Token::Keyword(Keyword::CollapseCategories) => CollapseCategories,
Token::Keyword(Keyword::DontCollapseCategories) => DontCollapseCategories,
Token::Keyword(Keyword::GlobalConfig) => GlobalConfig,
Token::Keyword(Keyword::PerObjectConfig) => PerObjectConfig,
Token::Keyword(Keyword::DynamicRecompile) => DynamicRecompile,
Token::Keyword(Keyword::CacheExempt) => CacheExempt,
Token::Keyword(Keyword::HideDropdown) => HideDropdown,
Token::Keyword(Keyword::ParseConfig) => ParseConfig,
Token::Keyword(Keyword::NativeReplication) => NativeReplication,
Token::Keyword(Keyword::ExportStructs) => ExportStructs,
Token::Keyword(Keyword::SafeReplace) => SafeReplace,
Token::Keyword(Keyword::HideCategories) => {
self.advance();
consumed_inside_match = true;
self.expect(
Token::LeftParenthesis,
ParseErrorKind::HideCategoriesMissingOpeningParenthesis,
)?;
let categories = self.parse_identifier_list();
self.expect(
Token::RightParenthesis,
ParseErrorKind::HideCategoriesMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
HideCategories(categories)
}
Token::Keyword(Keyword::ShowCategories) => {
self.advance();
consumed_inside_match = true;
self.expect(
Token::LeftParenthesis,
ParseErrorKind::ShowCategoriesMissingOpeningParenthesis,
)?;
let categories = self.parse_identifier_list();
self.expect(
Token::RightParenthesis,
ParseErrorKind::ShowCategoriesMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
ShowCategories(categories)
}
Token::Keyword(Keyword::Within) => {
self.advance();
consumed_inside_match = true;
Within(self.parse_identifier(ParseErrorKind::WithinNameNotIdentifier)?)
}
Token::Keyword(Keyword::DependsOn) => {
self.advance();
consumed_inside_match = true;
DependsOn(self.parse_parenthesised_identifier()?)
}
_ => return Ok(None),
};
if !consumed_inside_match {
self.advance();
}
span.extend_to(self.last_consumed_position_or_start());
Ok(Some(self.arena.alloc_node(modifier, span)))
}
pub(crate) fn parse_class_header_cont(
&mut self,
) -> ParseResult<'src, 'arena, ClassDeclaration<'arena>>
where
'src: 'arena,
{
let class_name = self.parse_identifier(ParseErrorKind::ClassNameNotIdentifier)?;
let parent_class_name = if self.eat_keyword(Keyword::Extends) {
let qualified_parent =
self.parse_qualified_identifier(ParseErrorKind::ClassParentNameNotIdentifier)?;
Some(qualified_parent)
} else {
None
};
let mut modifiers = Vec::new();
loop {
match self.parse_class_declaration_modifier() {
Ok(Some(next_modifier)) => modifiers.push(next_modifier),
Ok(None) => break,
Err(error) => {
self.report_error(error);
break;
}
}
}
self.expect(Token::Semicolon, ParseErrorKind::ClassMissingSemicolon)?;
Ok(ClassDeclaration {
name: class_name,
parent: parent_class_name.map(|identifier| identifier.head()),
modifiers,
})
}
fn parse_class_var_decl(&mut self) -> ParseResult<'src, 'arena, ClassVarDeclRef<'src, 'arena>> {
let start_position = self.expect(
Token::Keyword(Keyword::Var),
ParseErrorKind::VarMissingKeyword,
)?;
let paren_specs = self.parse_var_editor_specifier_list();
let modifiers = self.parse_var_declaration_modifiers();
let type_spec = self.parse_type_specifier()?;
let declarators = self.parse_class_var_declarators();
self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?;
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
ClassVarDecl {
paren_specs,
modifiers,
type_spec,
declarators,
span,
},
span,
))
}
fn parse_replication_rule(
&mut self,
) -> ParseResult<'src, 'arena, ReplicationRuleRef<'src, 'arena>> {
let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?;
let reliability = match self.peek_token() {
Some(Token::Keyword(Keyword::Reliable)) => {
self.advance();
Reliability::Reliable
}
Some(Token::Keyword(Keyword::Unreliable)) => {
self.advance();
Reliability::Unreliable
}
_ => return Err(self.make_error_here(ParseErrorKind::ReplicationMissingReliability)),
};
let condition = if self.eat_keyword(Keyword::If) {
self.expect(
Token::LeftParenthesis,
ParseErrorKind::ReplicationIfMissingOpeningParenthesis,
)?;
let expression = self.parse_expression();
self.expect(
Token::RightParenthesis,
ParseErrorKind::ReplicationIfMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
Some(expression)
} else {
None
};
let mut members = self.arena.vec();
loop {
let identifier =
self.parse_identifier(ParseErrorKind::ReplicationMemberNotIdentifier)?;
members.push(identifier);
if self.eat(Token::LeftParenthesis) {
self.expect(
Token::RightParenthesis,
ParseErrorKind::ReplicationMemberMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
}
if !self.eat(Token::Comma) {
break;
}
}
self.expect(
Token::Semicolon,
ParseErrorKind::ReplicationRuleMissingSemicolon,
)?;
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
ReplicationRule {
reliability,
condition,
members,
span,
},
span,
))
}
fn parse_replication_block(
&mut self,
) -> ParseResult<'src, 'arena, ReplicationBlockRef<'src, 'arena>> {
let start_position = self.expect(
Token::Keyword(Keyword::Replication),
ParseErrorKind::ReplicationMissingKeyword,
)?;
self.expect(Token::LeftBrace, ParseErrorKind::ReplicationMissingLBrace)?;
let mut rules = self.arena.vec();
while !matches!(self.peek_token(), Some(Token::RightBrace)) {
let loop_start = self
.peek_position()
.unwrap_or_else(|| self.last_consumed_position_or_start());
if self.peek_token().is_none() {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
}
match self.parse_replication_rule() {
Ok(rule) => rules.push(rule),
Err(error) => {
self.report_error(error);
self.recover_until(SyncLevel::Statement);
let _ = self.eat(Token::Semicolon);
if !self.ensure_progress_or_break(loop_start) {
break;
}
continue;
}
}
if !self.ensure_progress_or_break(loop_start) {
break;
}
}
self.expect(Token::RightBrace, ParseErrorKind::ReplicationMissingRBrace)?;
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self
.arena
.alloc_node(ReplicationBlock { rules, span }, span))
}
fn parse_ignores_clause(
&mut self,
) -> ParseResult<'src, 'arena, Option<crate::arena::ArenaVec<'arena, IdentifierToken>>> {
if !self.eat_keyword(Keyword::Ignores) {
return Ok(None);
}
let mut identifiers = self.arena.vec();
loop {
let identifier = self.parse_identifier(ParseErrorKind::IgnoresItemNotIdentifier)?;
identifiers.push(identifier);
if !self.eat(Token::Comma) {
break;
}
}
self.expect(Token::Semicolon, ParseErrorKind::IgnoresMissingSemicolon)?;
Ok(Some(identifiers))
}
fn parse_state_decl(&mut self) -> ParseResult<'src, 'arena, StateDeclRef<'src, 'arena>> {
let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?;
let mut modifiers = self.arena.vec();
loop {
match self.peek_keyword() {
Some(Keyword::Auto) => {
self.advance();
modifiers.push(StateModifier::Auto);
}
Some(Keyword::Simulated) => {
self.advance();
modifiers.push(StateModifier::Simulated);
}
_ => break,
}
}
self.expect(
Token::Keyword(Keyword::State),
ParseErrorKind::StateMissingKeyword,
)?;
loop {
match self.peek_keyword() {
Some(Keyword::Auto) => {
self.advance();
modifiers.push(StateModifier::Auto);
}
Some(Keyword::Simulated) => {
self.advance();
modifiers.push(StateModifier::Simulated);
}
_ => break,
}
}
if self.eat(Token::LeftParenthesis) {
self.expect(
Token::RightParenthesis,
ParseErrorKind::StateParensMissingRParen,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
}
let name = self.parse_identifier(ParseErrorKind::StateNameNotIdentifier)?;
let parent = if self.eat_keyword(Keyword::Extends) {
Some(self.parse_identifier(ParseErrorKind::StateParentNameNotIdentifier)?)
} else {
None
};
let opening_brace_position =
self.expect(Token::LeftBrace, ParseErrorKind::StateMissingLBrace)?;
let ignores = self.parse_ignores_clause()?;
let BlockBody {
statements: body,
span: inner_span,
} = self.parse_braced_block_statements_tail(opening_brace_position);
let span = AstSpan::range(start_position, inner_span.token_to);
Ok(self.arena.alloc_node(
StateDecl {
name,
parent,
modifiers,
ignores,
body,
span,
},
span,
))
}
pub(crate) fn parse_class_definition_cont(
&mut self,
) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> {
let header = self.parse_class_header_cont()?;
let mut members = self.arena.vec();
loop {
let Some((token, member_start)) = self.peek_token_and_position() else {
break;
};
match token {
Token::Keyword(Keyword::DefaultProperties) => break,
_ if self.lookahead_state_after_mods() => {
let state = self
.parse_state_decl()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *state.span();
members.push(self.arena.alloc_node(ClassMember::State(state), span));
}
_ if self.is_callable_header_ahead() => {
let callable = self.parse_callable_definition();
let span = *callable.span();
members.push(self.arena.alloc_node(ClassMember::Function(callable), span));
}
Token::Keyword(Keyword::Const) => {
let constant = self
.parse_class_const_decl()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *constant.span();
members.push(self.arena.alloc_node(ClassMember::Const(constant), span));
}
Token::Keyword(Keyword::Enum)
if !matches!(self.peek_token_at(1), Some(Token::LeftBrace)) =>
{
self.advance();
let enum_definition = self.parse_enum_definition_tail(member_start);
let span = *enum_definition.span();
members.push(
self.arena
.alloc_node(ClassMember::TypeDefEnum(enum_definition), span),
);
let _ = self.eat(Token::Semicolon);
}
Token::Keyword(Keyword::Struct) => {
self.advance();
let struct_definition = self.parse_struct_definition_tail(member_start);
let span = *struct_definition.span();
members.push(
self.arena
.alloc_node(ClassMember::TypeDefStruct(struct_definition), span),
);
let _ = self.eat(Token::Semicolon);
}
Token::Keyword(Keyword::Var) => {
let variable_declaration = self
.parse_class_var_decl()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *variable_declaration.span();
members.push(
self.arena
.alloc_node(ClassMember::Var(variable_declaration), span),
);
}
Token::Keyword(Keyword::Replication) => {
let replication = self
.parse_replication_block()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *replication.span();
members.push(
self.arena
.alloc_node(ClassMember::Replication(replication), span),
);
}
Token::ExecDirective => {
let directive = self
.parse_exec_directive()
.widen_error_span_from(member_start)
.unwrap_or_fallback(self);
let span = *directive.span();
members.push(self.arena.alloc_node(ClassMember::Exec(directive), span));
}
Token::Keyword(Keyword::CppText | Keyword::CppStruct) => {
self.advance();
if !self.eat(Token::CppBlock) {
self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock);
}
}
Token::Keyword(Keyword::Class) => break,
Token::Semicolon => {
self.advance();
}
_ => {
self.report_error_here(ParseErrorKind::ClassUnexpectedItem);
while let Some(next_token) = self.peek_token() {
match next_token {
Token::Keyword(
Keyword::Function
| Keyword::Event
| Keyword::Enum
| Keyword::Struct
| Keyword::Var
| Keyword::Replication
| Keyword::State
| Keyword::Class
| Keyword::DefaultProperties,
) => break,
_ => {
self.advance();
}
}
}
}
}
if !self.ensure_progress_or_break(member_start) {
break;
}
}
Ok(ClassDefinition { header, members })
}
pub fn parse_source_file(
&mut self,
) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> {
loop {
match self.peek_token() {
Some(Token::Semicolon) => {
self.advance();
}
Some(Token::ExecDirective) => {
if let Err(error) = self.parse_exec_directive() {
self.report_error(error);
}
}
//Some(Token::Keyword(Keyword::Class)) | None => break,
_ => break,
}
}
self.expect(
Token::Keyword(Keyword::Class),
ParseErrorKind::ClassMissingKeyword,
)?;
let class_definition = self.parse_class_definition_cont()?;
if matches!(
self.peek_token(),
Some(Token::Keyword(Keyword::DefaultProperties))
) {
return Ok(class_definition);
}
loop {
match self.peek_token() {
Some(Token::Semicolon) => {
self.advance();
}
Some(_) => {
self.report_error_here(ParseErrorKind::ClassUnexpectedItem);
while self.peek_token().is_some() {
self.advance();
}
break;
}
None => break,
}
}
Ok(class_definition)
}
fn decode_signed_integer_literal(&self, s: &str) -> ParseResult<'src, 'arena, i128> {
let (negative, body) = if let Some(rest) = s.strip_prefix('-') {
(true, rest)
} else if let Some(rest) = s.strip_prefix('+') {
(false, rest)
} else {
(false, s)
};
let magnitude: u128 = self.decode_unsigned_integer_magnitude(body)?;
if negative {
const MIN_MAGNITUDE: u128 = 1u128 << 127;
if magnitude == MIN_MAGNITUDE {
Ok(i128::MIN)
} else {
let magnitude_as_i128 = i128::try_from(magnitude)
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))?;
Ok(-magnitude_as_i128)
}
} else {
i128::try_from(magnitude)
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
}
fn decode_unsigned_integer_magnitude(&self, body: &str) -> ParseResult<'src, 'arena, u128> {
use ParseErrorKind::InvalidNumericLiteral;
if body.is_empty() {
return Err(self.make_error_here(InvalidNumericLiteral));
}
let (base, digits) =
if let Some(rest) = body.strip_prefix("0x").or_else(|| body.strip_prefix("0X")) {
(16u128, rest)
} else if let Some(rest) = body.strip_prefix("0b").or_else(|| body.strip_prefix("0B")) {
(2u128, rest)
} else if let Some(rest) = body.strip_prefix("0o").or_else(|| body.strip_prefix("0O")) {
(8u128, rest)
} else {
(10u128, body)
};
if digits.is_empty() {
return Err(self.make_error_here(InvalidNumericLiteral));
}
let mut accumulator: u128 = 0;
for character in digits.chars() {
if character == '_' {
continue;
}
let digit_value = match character {
'0'..='9' => u128::from(character as u32 - '0' as u32),
'a'..='f' => u128::from(10 + (character as u32 - 'a' as u32)),
'A'..='F' => u128::from(10 + (character as u32 - 'A' as u32)),
_ => return Err(self.make_error_here(InvalidNumericLiteral)),
};
if digit_value >= base {
return Err(self.make_error_here(InvalidNumericLiteral));
}
accumulator = accumulator
.checked_mul(base)
.and_then(|value| value.checked_add(digit_value))
.ok_or_else(|| self.make_error_here(InvalidNumericLiteral))?;
}
Ok(accumulator)
}
fn parse_declaration_literal_class(
&mut self,
) -> ParseResult<'src, 'arena, DeclarationLiteralRef<'src, 'arena>> {
let (token, lexeme, token_position) =
self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?;
let literal = match token {
Token::Plus | Token::Minus => {
let is_negative = matches!(token, Token::Minus);
self.advance();
let (next_token, next_lexeme, _) =
self.require_token_lexeme_and_position(ParseErrorKind::InvalidNumericLiteral)?;
match next_token {
Token::IntegerLiteral => {
let value = if is_negative {
self.decode_signed_integer_literal(&format!("-{next_lexeme}"))?
} else {
self.decode_signed_integer_literal(next_lexeme)?
};
self.advance();
DeclarationLiteral::Integer(value)
}
Token::FloatLiteral => {
let mut signed_lexeme = String::with_capacity(1 + next_lexeme.len());
signed_lexeme.push(if is_negative { '-' } else { '+' });
signed_lexeme.push_str(next_lexeme);
let value = self.decode_float_literal(&signed_lexeme)?;
self.advance();
DeclarationLiteral::Float(value)
}
_ => {
return Err(
self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken)
);
}
}
}
Token::IntegerLiteral => {
let value = self.decode_signed_integer_literal(lexeme)?;
self.advance();
DeclarationLiteral::Integer(value)
}
Token::FloatLiteral => {
let value = self.decode_float_literal(lexeme)?;
self.advance();
DeclarationLiteral::Float(value)
}
Token::StringLiteral => {
let value = self.unescape_string_literal(lexeme);
self.advance();
DeclarationLiteral::String(value)
}
Token::Keyword(Keyword::True) => {
self.advance();
DeclarationLiteral::Bool(true)
}
Token::Keyword(Keyword::False) => {
self.advance();
DeclarationLiteral::Bool(false)
}
Token::Keyword(Keyword::None) => {
self.advance();
DeclarationLiteral::None
}
Token::NameLiteral => {
let inner = &lexeme[1..lexeme.len() - 1];
let value = self.arena.string(inner);
self.advance();
DeclarationLiteral::String(value)
}
Token::Keyword(Keyword::Class) => {
self.advance();
let (next_token, next_lexeme, _) = self.require_token_lexeme_and_position(
ParseErrorKind::DeclarationLiteralUnexpectedToken,
)?;
if !matches!(next_token, Token::NameLiteral) {
return Err(
self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken)
);
}
let inner = &next_lexeme[1..next_lexeme.len() - 1];
let quoted_name = self.arena.string(inner);
self.advance();
DeclarationLiteral::TaggedName {
tag: IdentifierToken(token_position),
quoted: quoted_name,
}
}
_ if token.is_valid_identifier_name() => {
self.advance();
DeclarationLiteral::Identifier(lexeme)
}
_ => return Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken)),
};
Ok(DeclarationLiteralRef {
literal,
position: token_position,
})
}
fn parse_class_const_decl(
&mut self,
) -> ParseResult<'src, 'arena, ClassConstDeclRef<'src, 'arena>> {
let start_position = self.expect(
Token::Keyword(Keyword::Const),
ParseErrorKind::ClassUnexpectedItem,
)?;
let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?;
self.expect(Token::Assign, ParseErrorKind::TypeSpecInvalidNamedTypeName)?;
let value = self.parse_declaration_literal_class()?;
self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?;
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
Ok(self
.arena
.alloc_node(ClassConstDecl { name, value, span }, span))
}
fn parse_class_var_declarators(
&mut self,
) -> crate::arena::ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> {
let mut declarators = self.arena.vec();
loop {
match self.peek_token_and_position() {
Some((next_token, declarator_start)) if next_token.is_valid_identifier_name() => {
let identifier = self
.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)
.unwrap_or(IdentifierToken(declarator_start));
let array_size = match self.parse_array_len_expr() {
Ok(value) => value,
Err(error) => {
self.report_error(error);
self.recover_until(SyncLevel::CloseBracket);
let _ = self.eat(Token::RightBracket);
None
}
};
let span = AstSpan::range(identifier.0, self.last_consumed_position_or_start());
declarators.push(self.arena.alloc_node(
VariableDeclarator {
name: identifier,
initializer: None,
array_size,
},
span,
));
if self.eat(Token::Comma) {
if self.peek_token() == Some(Token::Semicolon) {
break;
}
continue;
}
break;
}
Some((_, _)) if declarators.is_empty() => {
self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier);
self.recover_until(SyncLevel::Statement);
let _ = self.eat(Token::Semicolon);
break;
}
_ => break,
}
}
declarators
}
pub(crate) fn parse_identifier_list(
&mut self,
) -> crate::arena::ArenaVec<'arena, IdentifierToken> {
let list_start = self.last_consumed_position_or_start();
let mut identifiers = self.arena.vec();
while let Some((token, _lexeme, identifier_position)) =
self.peek_token_lexeme_and_position()
{
match token {
Token::RightParenthesis => break,
Token::Comma => {
self.advance();
self.report_error_here(ParseErrorKind::ListMissingIdentifierBeforeSeparator);
}
_ if token.is_valid_identifier_name() => {
self.advance();
identifiers.push(IdentifierToken(identifier_position));
if !self.eat(Token::Comma)
&& let Some(next_token) = self.peek_token()
&& next_token != Token::RightParenthesis
{
self.report_error_here(ParseErrorKind::ListMissingSeparator);
}
}
_ => {
self.make_error_here(ParseErrorKind::ListInvalidIdentifier)
.sync_error_until(self, SyncLevel::ListSeparator)
.report_error(self);
}
}
}
if identifiers.is_empty() {
let list_end = self.last_consumed_position_or_start();
self.report_error(crate::parser::ParseError {
kind: ParseErrorKind::ListEmpty,
anchor: list_start,
blame_span: AstSpan::range(list_start, list_end),
covered_span: AstSpan::range(list_start, list_end),
related_span: None,
});
}
identifiers
}
}

View File

@ -1,180 +0,0 @@
use crate::ast::{AstSpan, Expression, ExpressionRef};
use crate::lexer::{Token, TokenLocation};
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses an `if` block, assuming that `if` token was consumed.
///
/// Produces an [`Expression::If`] spanning from the `if` keyword to
/// the end of the last arm (`else` body if present,
/// otherwise the `if` body).
#[must_use]
pub(crate) fn parse_if_cont(
&mut self,
if_start_location: TokenLocation,
) -> ExpressionRef<'src, 'arena> {
let condition = self.parse_expression();
let body = self.parse_expression();
let (else_body, if_end_location) = if let Some(Token::Else) = self.peek_token() {
self.advance(); // else
let else_body = self.parse_expression();
// Capture end before moving `else_body` to build the full `if` span
let body_end = else_body.span().to;
(Some(else_body), body_end)
} else {
(None, body.span().to)
};
let span = AstSpan {
from: if_start_location,
to: if_end_location,
};
self.arena.alloc(
Expression::If {
condition,
body,
else_body,
},
span,
)
}
/// Parses a `while` loop, assuming that `while` token was consumed.
///
/// Produces an [`Expression::While`] spanning from the `while` keyword
/// to the end of the body.
#[must_use]
pub(crate) fn parse_while_cont(
&mut self,
while_start_location: TokenLocation,
) -> ExpressionRef<'src, 'arena> {
let condition = self.parse_expression();
let body = self.parse_expression();
let span = AstSpan {
from: while_start_location,
to: body.span().to,
};
self.arena
.alloc(Expression::While { condition, body }, span)
}
/// Parses a `do ... until ...` loop after `do`, assuming that `do` token
/// was consumed.
///
/// On a missing `until`, returns an error
/// [`ParseErrorKind::DoMissingUntil`].
/// On success, produces an [`Expression::DoUntil`] spanning from `do`
/// to the end of the condition.
#[must_use]
pub(crate) fn parse_do_until_cont(
&mut self,
do_start_location: TokenLocation,
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
let body = self.parse_expression();
self.expect(Token::Until, ParseErrorKind::DoMissingUntil)
.widen_error_span_from(do_start_location)?;
let condition = self.parse_expression();
let span = AstSpan {
from: do_start_location,
to: condition.span().to,
};
Ok(self
.arena
.alloc(Expression::DoUntil { condition, body }, span))
}
/// Parses a `foreach` loop, assuming that `foreach` token was consumed.
///
/// Produces an [`Expression::ForEach`] spanning from `foreach`
/// to the end of the body.
#[must_use]
pub(crate) fn parse_foreach_cont(
&mut self,
foreach_start_location: TokenLocation,
) -> ExpressionRef<'src, 'arena> {
let iterator = self.parse_expression();
let body = self.parse_expression();
let span = AstSpan {
from: foreach_start_location,
to: body.span().to,
};
self.arena
.alloc(Expression::ForEach { iterator, body }, span)
}
/// Parses a `for` loop after `for`, assuming that `for` token was consumed.
///
/// Grammar: `for (init?; condition?; step?) body`.
/// Any of `init`, `condition`, or `step` may be omitted.
/// Emits specific `ParseErrorKind` values for missing
/// delimiters/separators.
/// On success returns an [`Expression::For`] spanning from `for` to
/// the end of the body.
#[must_use]
pub(crate) fn parse_for_cont(
&mut self,
for_start_location: TokenLocation,
) -> crate::parser::ParseResult<'src, 'arena, ExpressionRef<'src, 'arena>> {
self.expect(
Token::LeftParenthesis,
ParseErrorKind::ForMissingOpeningParenthesis,
)
.widen_error_span_from(for_start_location)?;
let init = if let Some(Token::Semicolon) = self.peek_token() {
self.advance();
None
} else {
let init = self.parse_expression();
self.expect(
Token::Semicolon,
ParseErrorKind::ForMissingInitializationSemicolon,
)?;
Some(init)
};
let condition = if let Some(Token::Semicolon) = self.peek_token() {
self.advance();
None
} else {
let condition = self.parse_expression();
self.expect(
Token::Semicolon,
ParseErrorKind::ForMissingConditionSemicolon,
)?;
Some(condition)
};
let step = if let Some(Token::RightParenthesis) = self.peek_token() {
self.advance();
None
} else {
let step = self.parse_expression();
self.expect(
Token::RightParenthesis,
ParseErrorKind::ForMissingClosingParenthesis,
)
.widen_error_span_from(for_start_location)
.sync_error_until(self, crate::parser::SyncLevel::CloseParenthesis)?;
Some(step)
};
let body = self.parse_expression();
let span = AstSpan {
from: for_start_location,
to: body.span().to,
};
Ok(self.arena.alloc(
Expression::For {
init,
condition,
step,
body,
},
span,
))
}
}

View File

@ -0,0 +1,138 @@
//! Parsing of enum definitions for Fermented `UnrealScript`.
use std::ops::ControlFlow;
use crate::arena::ArenaVec;
use crate::ast::{AstSpan, EnumDefRef, EnumDefinition, IdentifierToken};
use crate::lexer::Token;
use crate::lexer::TokenPosition;
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum EnumParseState {
ExpectingVariant,
ExpectingSeparator,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses an `enum` definition after the `enum` keyword has been consumed.
///
/// Returns a reference to the allocated enum definition containing its name
/// and variants.
pub(crate) fn parse_enum_definition_tail(
&mut self,
enum_keyword_position: TokenPosition,
) -> EnumDefRef<'src, 'arena> {
let name = self
.parse_identifier(ParseErrorKind::EnumExpectedNameOrBrace)
.unwrap_or_fallback(self);
self.expect(Token::LeftBrace, ParseErrorKind::EnumMissingLeftBrace)
.report_error(self);
let variants = self.parse_enum_variants();
self.expect(Token::RightBrace, ParseErrorKind::EnumNoClosingBrace)
.report_error(self);
let span = AstSpan::range(
enum_keyword_position,
self.last_consumed_position_or_start(),
);
self.arena
.alloc_node(EnumDefinition { name, variants }, span)
}
/// Parses the list of enum variants inside braces, handling commas and
/// errors.
///
/// Returns a vector of successfully parsed variant identifiers.
fn parse_enum_variants(&mut self) -> ArenaVec<'arena, IdentifierToken> {
use EnumParseState::{ExpectingSeparator, ExpectingVariant};
let mut variants = self.arena.vec();
let mut parser_state = ExpectingVariant;
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
let should_break = match (parser_state, next_token) {
(_, Token::RightBrace) => break,
(ExpectingVariant, Token::Comma) => self
.recover_from_empty_enum_variant(next_token_position)
.is_break(),
(ExpectingVariant, _) => {
parser_state = ExpectingSeparator;
self.parse_and_push_enum_variant(&mut variants).is_break()
}
(ExpectingSeparator, Token::Comma) => {
self.advance(); // `,`
parser_state = ExpectingVariant;
false
}
(ExpectingSeparator, _) => self
.parse_enum_variant_after_missing_separator(next_token_position, &mut variants)
.is_break(),
};
if should_break {
break;
}
self.ensure_forward_progress(next_token_position);
}
variants
}
/// Recovers from one or more commas appearing where a variant is expected.
///
/// Stops parsing if only a closing brace or end-of-file remains.
fn recover_from_empty_enum_variant(
&mut self,
error_start_position: TokenPosition,
) -> ControlFlow<()> {
while self.peek_token() == Some(Token::Comma) {
self.advance();
}
self.make_error_here(ParseErrorKind::EnumEmptyVariants)
.widen_error_span_from(error_start_position)
.report_error(self);
if matches!(self.peek_token(), Some(Token::RightBrace) | None) {
ControlFlow::Break(())
} else {
ControlFlow::Continue(())
}
}
/// Parses one enum variant and appends it to `variants`.
///
/// Stops parsing if recovery does not produce a valid identifier.
fn parse_and_push_enum_variant(
&mut self,
variants: &mut ArenaVec<'arena, IdentifierToken>,
) -> ControlFlow<()> {
self.parse_identifier(ParseErrorKind::EnumBadVariant)
.sync_error_until(self, SyncLevel::Statement)
.ok_or_report(self)
.map_or(ControlFlow::Break(()), |variant| {
variants.push(variant);
ControlFlow::Continue(())
})
}
/// Parses a variant after a missing separator and reports the missing-comma
/// diagnostic if recovery succeeds.
fn parse_enum_variant_after_missing_separator(
&mut self,
error_start_position: TokenPosition,
variants: &mut ArenaVec<'arena, IdentifierToken>,
) -> ControlFlow<()> {
let Some(variant) = self
.parse_identifier(ParseErrorKind::EnumBadVariant)
.widen_error_span_from(error_start_position)
.sync_error_until(self, SyncLevel::Statement)
.ok_or_report(self)
else {
// If we don't even get a good identifier - error is different
return ControlFlow::Break(());
};
self.make_error_here(ParseErrorKind::EnumNoSeparatorBetweenVariants)
.widen_error_span_from(error_start_position)
.report_error(self);
variants.push(variant);
ControlFlow::Continue(())
}
}

View File

@ -0,0 +1,11 @@
//! Declaration parsing for Fermented `UnrealScript`.
//!
//! Implements recursive-descent parsing for declaration-related grammar:
//! type specifiers, enum and struct definitions, `var(...)` prefixes,
//! and variable declarators.
mod enum_definition;
mod struct_definition;
mod type_specifier; // Type-specifier parsing (variable types).
mod var_specifiers; // `var(...)` editor specifiers and declaration-modifiers.
mod variable_declarators; // Comma-separated declarator lists (variable lists).

View File

@ -0,0 +1,210 @@
//! Parsing of struct definitions for Fermented `UnrealScript`.
//!
//! ## C++ block handling
//!
//! The Fermented `UnrealScript` parser must support parsing several legacy
//! source files that contain `cpptext` or `cppstruct`. Our compiler does not
//! compile with C++ code and therefore does not need these blocks in
//! the resulting AST. We treat them the same as trivia and skip them.
//!
//! However, some related tokens are context-sensitive, so handling these
//! blocks in the general trivia-skipping path would complicate the separation
//! between the lexer and the parser.
//!
//! The resulting files will not be compiled, but they can still be used to
//! extract type information.
use crate::arena::ArenaVec;
use crate::ast::{
AstSpan, IdentifierToken, QualifiedIdentifierRef, StructDefRef, StructDefinition, StructField,
StructFieldRef, StructModifier, StructModifierKind, TypeSpecifierRef, VarEditorSpecifierRef,
VarModifier,
};
use crate::lexer::{Keyword, Token, TokenPosition};
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Debug)]
struct ParsedStructFieldPrefix<'src, 'arena> {
editor_specifiers: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
declaration_modifiers: ArenaVec<'arena, VarModifier>,
type_specifier: TypeSpecifierRef<'src, 'arena>,
}
#[derive(Debug)]
enum StructBodyItemParseOutcome<'src, 'arena> {
Field(StructFieldRef<'src, 'arena>),
Skip,
Stop,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a `struct` definition after the `struct` keyword has been
/// consumed.
pub(crate) fn parse_struct_definition_tail(
&mut self,
struct_keyword_position: TokenPosition,
) -> StructDefRef<'src, 'arena> {
let modifiers = self.parse_struct_declaration_modifiers();
let (name, base_type_name) = self.parse_struct_name_base_and_open_brace();
let mut fields = self.arena.vec();
while let Some((next_token, next_position)) = self.peek_token_and_position()
&& next_token != Token::RightBrace
{
match self.parse_or_skip_struct_body_item() {
StructBodyItemParseOutcome::Field(new_field) => fields.push(new_field),
StructBodyItemParseOutcome::Skip => (),
StructBodyItemParseOutcome::Stop => break,
}
self.ensure_forward_progress(next_position);
}
self.expect(Token::RightBrace, ParseErrorKind::StructMissingRightBrace)
.widen_error_span_from(struct_keyword_position)
.report_error(self);
let span = AstSpan::range(
struct_keyword_position,
self.last_consumed_position_or_start(),
);
self.arena.alloc_node(
StructDefinition {
name,
base_type_name,
modifiers,
fields,
},
span,
)
}
/// Parses one item in a struct body or skips an unsupported one.
///
/// Returns [`StructBodyItemParseOutcome::Field`] for a successfully parsed
/// field, [`StructBodyItemParseOutcome::Skip`] when recovery allows parsing
/// to continue, and [`StructBodyItemParseOutcome::Stop`] when parsing
/// should stop at this level.
fn parse_or_skip_struct_body_item(&mut self) -> StructBodyItemParseOutcome<'src, 'arena> {
let Some((token, token_position)) = self.peek_token_and_position() else {
// This is the end of the file;
// it will be handled by a higher-level parser.
return StructBodyItemParseOutcome::Stop;
};
match token {
Token::Keyword(Keyword::CppText | Keyword::CppStruct) => {
self.advance();
if !self.eat(Token::CppBlock) {
self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock);
self.recover_until(SyncLevel::Statement);
}
StructBodyItemParseOutcome::Skip
}
Token::Keyword(Keyword::Var) => {
self.advance();
self.parse_struct_field_tail(token_position)
}
_ => {
self.report_error_here(ParseErrorKind::StructBodyUnexpectedItem);
self.recover_until(SyncLevel::BlockBoundary);
StructBodyItemParseOutcome::Skip
}
}
}
/// Parses a struct field after the `var` keyword has been consumed.
///
/// Returns [`StructBodyItemParseOutcome::Skip`] if the field cannot be
/// parsed far enough to produce a usable AST node after recovery.
fn parse_struct_field_tail(
&mut self,
var_keyword_position: TokenPosition,
) -> StructBodyItemParseOutcome<'src, 'arena> {
let Some(field_prefix) = self.parse_struct_field_prefix() else {
return StructBodyItemParseOutcome::Skip;
};
let declarators = self.parse_variable_declarators();
if !self.eat(Token::Semicolon) {
self.report_error_here(ParseErrorKind::StructFieldMissingSemicolon);
self.recover_until(SyncLevel::BlockBoundary);
let _ = self.eat(Token::Semicolon);
}
if declarators.is_empty() {
return StructBodyItemParseOutcome::Skip;
}
let span = AstSpan::range(var_keyword_position, self.last_consumed_position_or_start());
StructBodyItemParseOutcome::Field(self.arena.alloc_node(
StructField {
type_specifier: field_prefix.type_specifier,
declaration_modifiers: field_prefix.declaration_modifiers,
editor_specifiers: field_prefix.editor_specifiers,
declarators,
},
span,
))
}
fn parse_struct_field_prefix(&mut self) -> Option<ParsedStructFieldPrefix<'src, 'arena>> {
let editor_specifiers = self.parse_var_editor_specifier_list();
let declaration_modifiers = self.parse_var_declaration_modifiers();
let type_specification = self
.parse_type_specifier()
.sync_error_until(self, SyncLevel::BlockBoundary)
.ok_or_report(self)?;
Some(ParsedStructFieldPrefix {
editor_specifiers,
declaration_modifiers,
type_specifier: type_specification,
})
}
/// Parses the struct name, optional base type, and opening brace.
///
/// Accepts anonymous structs that begin immediately with `{`.
fn parse_struct_name_base_and_open_brace(
&mut self,
) -> (
Option<IdentifierToken>,
Option<QualifiedIdentifierRef<'arena>>,
) {
if self.eat(Token::LeftBrace) {
return (None, None);
}
let name = self
.parse_identifier(ParseErrorKind::StructExpectedNameOrBrace)
.ok_or_report(self);
let base_type_name =
if let Some((Token::Keyword(Keyword::Extends), extends_keyword_position)) =
self.peek_token_and_position()
{
self.advance();
self.parse_qualified_identifier(ParseErrorKind::StructExpectedBaseName)
.widen_error_span_from(extends_keyword_position)
.ok_or_report(self)
} else {
None
};
self.expect(Token::LeftBrace, ParseErrorKind::StructMissingLeftBrace)
.report_error(self);
(name, base_type_name)
}
fn parse_struct_declaration_modifiers(&mut self) -> ArenaVec<'arena, StructModifier> {
let mut modifiers = self.arena.vec();
while let Some((next_keyword, next_keyword_position)) = self.peek_keyword_and_position() {
let next_modifier_kind = match next_keyword {
Keyword::Native => StructModifierKind::Native,
Keyword::Init => StructModifierKind::Init,
Keyword::Export => StructModifierKind::Export,
Keyword::NoExport => StructModifierKind::NoExport,
Keyword::Transient => StructModifierKind::Transient,
Keyword::Deprecated => StructModifierKind::Deprecated,
Keyword::Long => StructModifierKind::Long,
_ => break,
};
modifiers.push(StructModifier {
kind: next_modifier_kind,
position: next_keyword_position,
});
self.advance();
}
modifiers
}
}

View File

@ -0,0 +1,116 @@
//! Parsing of type specifiers for Fermented `UnrealScript`.
use crate::ast::{AstSpan, TypeSpecifier, TypeSpecifierRef};
use crate::lexer::{Keyword, Token, TokenPosition};
use crate::parser::{ParseErrorKind, ParseResult, Parser};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a type specifier used in variable declarations.
///
/// Accepts named types, `class<...>` types, `array<...>` types, and inline
/// `enum` and `struct` definitions.
///
/// Returns an error if the next tokens do not form a valid type specifier.
pub(crate) fn parse_type_specifier(
&mut self,
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
let (starting_token, starting_token_position) =
self.require_token_and_position(ParseErrorKind::TypeSpecExpectedType)?;
match starting_token {
Token::Keyword(Keyword::Enum) => {
self.advance();
Ok(self.parse_inline_enum_tail(starting_token_position))
}
Token::Keyword(Keyword::Struct) => {
self.advance();
Ok(self.parse_inline_struct_tail(starting_token_position))
}
Token::Keyword(Keyword::Array) => {
self.advance();
self.parse_array_type_specification_tail(starting_token_position)
}
Token::Keyword(Keyword::Class) => {
self.advance();
self.parse_class_type_specification_tail(starting_token_position)
}
_ if starting_token.is_valid_type_name() => {
let type_name =
self.parse_qualified_identifier(ParseErrorKind::TypeSpecInvalidNamedTypeName)?;
let full_span = *type_name.span();
Ok(self
.arena
.alloc_node(TypeSpecifier::Named(type_name), full_span))
}
_ => Err(self.make_error_here(ParseErrorKind::TypeSpecExpectedType)),
}
}
fn parse_inline_enum_tail(
&mut self,
starting_token_position: TokenPosition,
) -> TypeSpecifierRef<'src, 'arena> {
let enum_definition = self.parse_enum_definition_tail(starting_token_position);
let enum_span = AstSpan::range(starting_token_position, enum_definition.span().token_to);
self.arena
.alloc_node(TypeSpecifier::InlineEnum(enum_definition), enum_span)
}
fn parse_inline_struct_tail(
&mut self,
starting_token_position: TokenPosition,
) -> TypeSpecifierRef<'src, 'arena> {
let struct_definition = self.parse_struct_definition_tail(starting_token_position);
let struct_span =
AstSpan::range(starting_token_position, struct_definition.span().token_to);
self.arena
.alloc_node(TypeSpecifier::InlineStruct(struct_definition), struct_span)
}
fn parse_array_type_specification_tail(
&mut self,
starting_token_position: TokenPosition,
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
self.expect(
Token::Less,
ParseErrorKind::TypeSpecArrayMissingOpeningAngle,
)?;
let element_modifiers = self.parse_var_declaration_modifiers();
let element_type = self.parse_type_specifier()?;
let closing_angle_bracket_position = self.expect(
Token::Greater,
ParseErrorKind::TypeSpecArrayMissingClosingAngle,
)?;
let array_span = AstSpan::range(starting_token_position, closing_angle_bracket_position);
Ok(self.arena.alloc_node(
TypeSpecifier::Array {
element_type,
element_modifiers,
},
array_span,
))
}
fn parse_class_type_specification_tail(
&mut self,
starting_token_position: TokenPosition,
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
let (inner_type_name, class_type_end) = if self.eat(Token::Less) {
let inner_type_name = Some(
self.parse_qualified_identifier(ParseErrorKind::TypeSpecClassMissingInnerType)?,
);
let class_type_end = self.expect(
Token::Greater,
ParseErrorKind::TypeSpecClassMissingClosingAngle,
)?;
(inner_type_name, class_type_end)
} else {
(None, starting_token_position)
};
let span = AstSpan::range(starting_token_position, class_type_end);
Ok(self
.arena
.alloc_node(TypeSpecifier::Class(inner_type_name), span))
}
}

View File

@ -0,0 +1,89 @@
//! Parsing of declaration specifiers used in `var(...) ...` syntax for
//! Fermented `UnrealScript`.
use crate::arena::ArenaVec;
use crate::ast::{VarEditorSpecifier, VarEditorSpecifierRef, VarModifier};
use crate::lexer::Token;
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a consecutive run of variable declaration modifiers.
///
/// This is used for declarations such as
/// `var transient config editconst int X;`.
///
/// Parsing stops when the next token is not a recognized [`VarModifier`].
/// That token is left unconsumed for the caller.
///
/// Returns the parsed modifiers in source order, or an empty vector if the
/// current token does not begin a modifier list.
#[must_use]
pub(crate) fn parse_var_declaration_modifiers(&mut self) -> ArenaVec<'arena, VarModifier> {
let mut modifiers = self.arena.vec();
while let Some(current_token_and_position) = self.peek_token_and_position() {
let Ok(parsed_modifier) = VarModifier::try_from(current_token_and_position) else {
break;
};
self.advance();
modifiers.push(parsed_modifier);
}
modifiers
}
/// Parses the optional parenthesized editor specifier list in `var(...)`.
///
/// Assumes that `var` has already been consumed.
///
/// Returns `None` if the current token is not `(`. Returns `Some(...)` once
/// `(` is present, including for an empty list.
///
/// Recovery is intentionally minimal because these specifier lists are not
/// important enough to justify aggressive repair.
#[must_use]
pub(crate) fn parse_var_editor_specifier_list(
&mut self,
) -> Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>> {
if !self.eat(Token::LeftParenthesis) {
return None;
}
let mut editor_specifiers = self.arena.vec();
while let Some((next_token, next_token_lexeme, next_token_position)) =
self.peek_token_lexeme_and_position()
&& next_token != Token::RightParenthesis
{
if next_token == Token::StringLiteral {
self.advance();
let string_value = self.unescape_string_literal(next_token_lexeme);
editor_specifiers.push(self.arena.alloc_node_at(
VarEditorSpecifier::String(string_value),
next_token_position,
));
} else if let Some(specifier_identifier) =
Self::identifier_token_from_token(next_token, next_token_position)
{
self.advance();
editor_specifiers.push(self.arena.alloc_node_at(
VarEditorSpecifier::Identifier(specifier_identifier),
next_token_position,
));
} else {
self.make_error_here(ParseErrorKind::VarSpecNotIdentifier)
.sync_error_until(self, SyncLevel::ListSeparator)
.report_error(self);
}
// Detailed recovery is not worthwhile here;
// stop once list structure becomes unclear.
if !self.eat(Token::Comma) {
break;
}
self.ensure_forward_progress(next_token_position);
}
self.expect(
Token::RightParenthesis,
ParseErrorKind::VarSpecsMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
Some(editor_specifiers)
}
}

View File

@ -0,0 +1,172 @@
//! Parsing of comma-separated variable declarator lists for
//! Fermented `UnrealScript`.
//!
//! Extends original `UnrealScript` by allowing array-size expressions and
//! declarator initializers.
#![allow(clippy::option_if_let_else)]
use std::ops::ControlFlow;
use crate::arena::ArenaVec;
use crate::ast::{AstSpan, OptionalExpression, VariableDeclarator, VariableDeclaratorRef};
use crate::lexer::{Token, TokenPosition};
use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
enum VariableDeclaratorParseState {
ExpectingDeclarator,
ExpectingSeparator,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a comma-separated list of variable declarators.
///
/// Accepts optional array-size expressions and `=` initializers.
#[must_use]
pub(crate) fn parse_variable_declarators(
&mut self,
) -> ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> {
use VariableDeclaratorParseState::{ExpectingDeclarator, ExpectingSeparator};
let mut declarators = self.arena.vec();
let mut parser_state = ExpectingDeclarator;
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
match (parser_state, next_token) {
(ExpectingDeclarator, Token::Semicolon) => {
self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations);
return declarators;
}
(ExpectingDeclarator, Token::Comma) => {
if self
.recover_empty_variable_declarator(next_token_position)
.is_break()
{
return declarators;
}
}
(ExpectingDeclarator, _) => {
if self
.parse_variable_declarator_into(&mut declarators)
.is_break()
{
// Breaking means we've failed to parse declarator
self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations);
break;
}
parser_state = ExpectingSeparator;
}
(ExpectingSeparator, Token::Comma) => {
self.advance();
parser_state = ExpectingDeclarator;
}
(ExpectingSeparator, Token::Semicolon) => break,
(ExpectingSeparator, _) => {
if self
.recover_missing_variable_declarator_separator(
next_token_position,
&mut declarators,
)
.is_break()
{
break;
}
}
}
self.ensure_forward_progress(next_token_position);
}
// In case of reaching EOF here, it does not matter if we emit
// an additional diagnostic.
// The caller is expected to report the more relevant enclosing error.
declarators
}
fn recover_empty_variable_declarator(
&mut self,
error_start_position: TokenPosition,
) -> ControlFlow<()> {
while self.peek_token() == Some(Token::Comma) {
self.advance();
}
self.make_error_here(ParseErrorKind::DeclEmptyVariableDeclarations)
.widen_error_span_from(error_start_position)
.report_error(self);
if matches!(self.peek_token(), Some(Token::Semicolon) | None) {
ControlFlow::Break(())
} else {
ControlFlow::Continue(())
}
}
fn parse_variable_declarator_into(
&mut self,
declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
) -> ControlFlow<()> {
if let Some(parsed_declarator) = self
.parse_variable_declarator()
.sync_error_until(self, SyncLevel::Statement)
.ok_or_report(self)
{
declarators.push(parsed_declarator);
ControlFlow::Continue(())
} else {
ControlFlow::Break(())
}
}
fn recover_missing_variable_declarator_separator(
&mut self,
error_start_position: TokenPosition,
declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
) -> ControlFlow<()> {
if let Some(parsed_declarator) = self
.parse_variable_declarator()
.widen_error_span_from(error_start_position)
.sync_error_until(self, SyncLevel::Statement)
.ok_or_report(self)
{
self.make_error_here(ParseErrorKind::DeclNoSeparatorBetweenVariableDeclarations)
.widen_error_span_from(error_start_position)
.report_error(self);
declarators.push(parsed_declarator);
ControlFlow::Continue(())
} else {
ControlFlow::Break(())
}
}
fn parse_variable_declarator(
&mut self,
) -> ParseResult<'src, 'arena, VariableDeclaratorRef<'src, 'arena>> {
let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?;
let array_size = self.parse_optional_array_size();
let initializer = self.parse_optional_variable_initializer();
let span = AstSpan::range(name.0, self.last_consumed_position_or_start());
Ok(self.arena.alloc_node(
VariableDeclarator {
name,
initializer,
array_size,
},
span,
))
}
fn parse_optional_array_size(&mut self) -> OptionalExpression<'src, 'arena> {
if !self.eat(Token::LeftBracket) {
return None;
}
let array_size_expression = self.parse_expression();
self.expect(
Token::RightBracket,
ParseErrorKind::DeclExpectedRightBracketAfterArraySize,
)
.sync_error_at(self, SyncLevel::CloseBracket)
.report_error(self);
Some(array_size_expression)
}
fn parse_optional_variable_initializer(&mut self) -> OptionalExpression<'src, 'arena> {
self.eat(Token::Assign).then(|| self.parse_expression())
}
}

View File

@ -0,0 +1,109 @@
//! Block-body parsing for Fermented `UnrealScript`.
//!
//! Provides shared routines for parsing `{ ... }`-delimited bodies used in
//! function, loop, state, and similar constructs after the opening `{`
//! has been consumed.
use crate::arena::ArenaVec;
use crate::ast::{AstSpan, BlockBody, Expression, ExpressionRef, Statement, StatementRef};
use crate::lexer::{Token, TokenPosition};
use crate::parser::{ParseErrorKind, Parser};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a `{ ... }` block after the opening `{` has been consumed.
///
/// Consumes tokens until the matching `}` and returns an
/// [`Expression::Block`] whose span covers the entire block, from
/// `opening_brace_position` to the closing `}`.
///
/// On premature end-of-file, returns a best-effort block.
#[must_use]
pub(crate) fn parse_block_tail(
&mut self,
opening_brace_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let BlockBody { statements, span } =
self.parse_braced_block_statements_tail(opening_brace_position);
self.arena.alloc_node(Expression::Block(statements), span)
}
/// Parses a `{ ... }` block after the opening `{` has been consumed.
///
/// Consumes tokens until the matching `}` and returns the contained
/// statements together with a span that covers the entire block, from
/// `opening_brace_position` to the closing `}`.
///
/// On premature end-of-file, returns a best-effort statement list and span.
#[must_use]
pub(crate) fn parse_braced_block_statements_tail(
&mut self,
opening_brace_position: TokenPosition,
) -> BlockBody<'src, 'arena> {
let mut statements = self.arena.vec();
while let Some((token, token_position)) = self.peek_token_and_position() {
if token == Token::RightBrace {
self.advance(); // '}'
let span = AstSpan::range(opening_brace_position, token_position);
return BlockBody { statements, span };
}
self.parse_next_block_item_into(&mut statements);
self.ensure_forward_progress(token_position);
}
// Reached EOF without a closing `}`
self.report_error_here(ParseErrorKind::BlockMissingClosingBrace);
let span = AstSpan::range(
opening_brace_position,
self.last_consumed_position_or_start(),
);
BlockBody { statements, span }
}
/// Parses one statement inside a `{ ... }` block and appends it to
/// `statements`.
///
/// This method never consumes the closing `}` and is only meant to be
/// called while parsing inside a block. It always appends at least one
/// statement, even in the presence of syntax errors.
pub(crate) fn parse_next_block_item_into(
&mut self,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
) {
let mut next_statement = self.parse_statement().unwrap_or_else(|| {
let next_expression = self.parse_expression();
let next_expression_span = *next_expression.span();
self.arena
.alloc_node(Statement::Expression(next_expression), next_expression_span)
});
if statement_needs_semicolon(&next_statement)
&& let Some((Token::Semicolon, semicolon_position)) = self.peek_token_and_position()
{
next_statement.span_mut().extend_to(semicolon_position);
self.advance(); // ';'
}
statements.push(next_statement);
}
}
fn statement_needs_semicolon(statement: &Statement) -> bool {
use Statement::{Empty, Error, Expression, Function, Label, LocalVariableDeclaration};
match statement {
Empty | Label(_) | Error | Function(_) => false,
Expression(expression) => expression_needs_semicolon(expression),
LocalVariableDeclaration { .. } => true,
}
}
const fn expression_needs_semicolon(expression: &Expression) -> bool {
use Expression::{Block, DoUntil, Error, For, ForEach, If, Switch, While};
matches!(
expression,
Block { .. }
| If { .. }
| While { .. }
| DoUntil { .. }
| ForEach { .. }
| For { .. }
| Switch { .. }
| Error
)
}

View File

@ -0,0 +1,446 @@
//! Control expression parsing for Fermented `UnrealScript`.
//!
//! ## Condition parsing and legacy compatibility
//!
//! Fermented `UnrealScript` allows omitting parentheses `(...)` around the
//! condition expression of `if`/`while`/etc. For compatibility with older
//! `UnrealScript` code, we also apply a special rule:
//!
//! If a condition starts with `(`, we parse the condition as exactly the
//! matching parenthesized subexpression and stop at its corresponding `)`.
//! In other words, `( ... )` must cover the whole condition; trailing tokens
//! like `* c == d` are not allowed to continue the condition.
//!
//! This prevents the parser from accidentally consuming the following
//! statement/body as part of the condition in older code such as:
//!
//! ```unrealscript
//! if ( AIController(Controller) != None ) Cross = vect(0,0,0);
//! ```
//!
//! Trade-off: you cannot write `if (a + b) * c == d`;
//! write `if ((a + b) * c == d)` or `if d == (a + b) * c` instead.
//!
//! ## Disambiguation of `for` as loop vs expression
//!
//! Unlike other control-flow keywords, `for` is disambiguated from a functions
//! or variables with the same name. This is done syntactically in
//! [`Parser::is_for_loop_header_ahead`]: a `for` token followed by
//! a `(` whose contents contain a top-level `;` is unambiguously a loop header.
//!
//! This rule is lightweight, local, and robust, and mirrors the fixed grammar
//! `for (init; condition; step)` without requiring name resolution.
//!
//! ### Why this is not done for `if` / `while` / `do`
//!
//! No similarly reliable way to discriminate `if`, `while`, or related
//! keywords at this stage of parsing: their parenthesized forms are
//! indistinguishable from single argument function calls.
//!
//! Supporting these keywords as identifiers would complicate parsing
//! disproportionately and we always treat them as openers for conditional and
//! cycle expressions. This matches common `UnrealScript` usage and intentionally
//! drops support for moronic design choices where such names were reused
//! as variables or functions (like what author did by declaring
//! a `For` function in Acedia).
//!
//! ### But what about `switch`?
//!
//! `switch` is handled separately because, in existing `UnrealScript` code,
//! it may appear either as a keyword-led construct or as an identifier.
//!
//! Its disambiguation rule is simpler than for `for`: if the next token is
//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains
//! available as an identifier.
//!
//! This rule is local and purely syntactic, matching the behavior expected by
//! the existing codebase we support. The actual parsing of `switch` expressions
//! lives in a separate module because the construct itself is more involved
//! than the control-flow forms handled here.
use crate::ast::{AstSpan, BranchBody, Expression, ExpressionRef};
use crate::lexer::{Keyword, Token, TokenPosition};
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a control-flow condition.
///
/// If the next token is `(`, attempts to consume one parenthesized
/// subexpression and returns it wrapped as [`Expression::Parentheses`].
/// Otherwise consumes a general expression.
fn parse_condition(&mut self) -> ExpressionRef<'src, 'arena> {
if let Some((Token::LeftParenthesis, left_parenthesis_position)) =
self.peek_token_and_position()
{
self.advance(); // '('
let condition_expression = self.parse_expression();
let right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
left_parenthesis_position,
},
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.unwrap_or_fallback(self);
self.arena.alloc_node_between(
Expression::Parentheses(condition_expression),
left_parenthesis_position,
right_parenthesis_position,
)
} else {
self.parse_expression()
}
}
/// Parses a branch body for a control-flow construct.
///
/// Normalizes the following source forms into a [`BranchBody`]:
///
/// - empty body with semicolon: `if (cond);`
/// - empty body before a closing `}`: `if (cond) }`
/// - non-empty block body: `if (cond) { ... }`
/// - non-empty single-expression body: `if (cond) expr;`
///
/// For non-block bodies, this method consumes a trailing `;` when present
/// and records its position in the returned [`BranchBody`].
fn parse_branch_body(&mut self) -> BranchBody<'src, 'arena> {
let Some((first_token, first_token_position)) = self.peek_token_and_position() else {
let error = self.make_error_here(ParseErrorKind::MissingBranchBody);
self.report_error(error);
return BranchBody {
expression: None,
semicolon_position: None,
end_anchor_token_position: error.covered_span.token_to,
};
};
// `if (is_condition);`
if first_token == Token::Semicolon {
self.advance(); // ';'
return BranchBody {
expression: None,
semicolon_position: Some(first_token_position),
end_anchor_token_position: first_token_position,
};
}
// `{ ... if (is_condition) }`
if first_token == Token::RightBrace {
return BranchBody {
expression: None,
semicolon_position: None,
// `unwrap` actually triggering is effectively impossible,
// because by the time a branch body is parsed, some prior token
// (e.g. `if`, `)`, etc.) has already been consumed,
// so the parser should have a last-consumed position
end_anchor_token_position: self
.last_consumed_position()
.unwrap_or(first_token_position),
};
}
let branch_expression = self.parse_expression();
let end_anchor_token_position = branch_expression.span().token_to;
// A block body in `if {...}` or `if {...};` owns its own terminator;
// a following `;` does not belong to the branch body.
if let Expression::Block(_) = *branch_expression {
return BranchBody {
expression: Some(branch_expression),
semicolon_position: None,
end_anchor_token_position,
};
}
// For single-expression bodies, consume a trailing semicolon if present
let trailing_semicolon_position = if self.eat(Token::Semicolon) {
self.last_consumed_position()
} else {
None
};
BranchBody {
expression: Some(branch_expression),
semicolon_position: trailing_semicolon_position,
end_anchor_token_position: trailing_semicolon_position
.unwrap_or(end_anchor_token_position),
}
}
/// Parses an `if` expression after the `if` keyword.
///
/// The resulting [`Expression::If`] spans from `if_keyword_position` to the
/// end of the `if` body, or to the end of the `else` body if one is
/// present.
#[must_use]
pub(crate) fn parse_if_tail(
&mut self,
if_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let condition = self.parse_condition();
let body = self.parse_branch_body();
let (else_body, if_end_position) = if self.peek_keyword() == Some(Keyword::Else) {
self.advance(); // 'else'
let else_body = self.parse_branch_body();
let else_body_end = else_body.end_anchor_token_position;
(Some(else_body), else_body_end)
} else {
(None, body.end_anchor_token_position)
};
let span = AstSpan::range(if_keyword_position, if_end_position);
self.arena.alloc_node(
Expression::If {
condition,
body,
else_body,
},
span,
)
}
/// Parses a `while` expression after the `while` keyword.
///
/// The resulting [`Expression::While`] spans from `while_keyword_position`
/// to the end of its body.
#[must_use]
pub(crate) fn parse_while_tail(
&mut self,
while_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let condition = self.parse_condition();
let body = self.parse_branch_body();
let span = AstSpan::range(while_keyword_position, body.end_anchor_token_position);
self.arena
.alloc_node(Expression::While { condition, body }, span)
}
/// Parses a `do ... until ...` expression after the `do` keyword.
///
/// The resulting [`Expression::DoUntil`] spans from `do_keyword_position`
/// to the end of the condition.
#[must_use]
pub(crate) fn parse_do_until_tail(
&mut self,
do_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let body = self.parse_branch_body();
let condition = if self
.expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil)
.widen_error_span_from(do_keyword_position)
.report_error(self)
{
crate::arena::ArenaNode::new_in(
Expression::Error,
AstSpan::new(body.end_anchor_token_position),
self.arena,
)
} else {
self.parse_condition()
};
let span = AstSpan::range(do_keyword_position, condition.span().token_to);
self.arena
.alloc_node(Expression::DoUntil { condition, body }, span)
}
/// Parses a `foreach` expression after the `foreach` keyword.
///
/// The iterator part is consumed as a regular expression, followed by a
/// branch body.
///
/// The resulting [`Expression::ForEach`] spans from
/// `foreach_keyword_position` to the end of the body.
#[must_use]
pub(crate) fn parse_foreach_tail(
&mut self,
foreach_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// UnrealScript `foreach` iterator expressions are simple enough that
// they do not need the special parenthesized-condition handling used by
// `parse_condition()`.
let iterated_expression = self.parse_expression();
let body = self.parse_branch_body();
let span = AstSpan::range(foreach_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::ForEach {
iterated_expression,
body,
},
span,
)
}
/// Returns whether the upcoming tokens have the syntactic shape of a
/// `for (...)` header.
///
/// More precisely, this returns `true` iff the next token is `(` and a
/// top-level `;` appears before the matching `)` is closed or input ends.
///
/// This is used only for loop-vs-identifier disambiguation.
pub(crate) fn is_for_loop_header_ahead(&mut self) -> bool {
if self.peek_token() != Some(Token::LeftParenthesis) {
return false;
}
let mut nesting_depth: usize = 1;
let mut lookahead_token_offset: usize = 1;
while let Some(next_token) = self.peek_token_at(lookahead_token_offset) {
match next_token {
Token::LeftParenthesis => nesting_depth += 1,
Token::RightParenthesis => {
if nesting_depth <= 1 {
// End of the immediate `for (...)` group without a
// top-level `;`: not a loop header.
return false;
}
nesting_depth -= 1;
}
Token::Semicolon if nesting_depth == 1 => return true,
_ => (),
}
lookahead_token_offset += 1;
}
false
}
/// Parses a `for` expression after the `for` keyword.
///
/// This method expects the standard header shape
/// `for (initialization; condition; step)` and then parses a branch body.
///
/// Each header component may be omitted. The resulting [`Expression::For`]
/// spans from `for_keyword_position` to the end of the body.
#[must_use]
pub(crate) fn parse_for_tail(
&mut self,
for_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// This path is expected to be entered only after
// `is_for_loop_header_ahead()`, so the opening `(` and at least one
// top-level `;` should already be structurally guaranteed.
self.expect(
Token::LeftParenthesis,
ParseErrorKind::ForMissingOpeningParenthesis,
)
.widen_error_span_from(for_keyword_position)
.report_error(self);
let initialization = if self.peek_token() == Some(Token::Semicolon) {
self.advance();
None
} else {
let init = self.parse_expression();
self.expect(
Token::Semicolon,
ParseErrorKind::ForMissingInitializationSemicolon,
)
.report_error(self);
Some(init)
};
let condition = if self.peek_token() == Some(Token::Semicolon) {
self.advance();
None
} else {
let condition = self.parse_expression();
self.expect(
Token::Semicolon,
ParseErrorKind::ForMissingConditionSemicolon,
)
.report_error(self);
Some(condition)
};
let step = if self.peek_token() == Some(Token::RightParenthesis) {
self.advance();
None
} else {
let step = self.parse_expression();
self.expect(
Token::RightParenthesis,
ParseErrorKind::ForMissingClosingParenthesis,
)
.widen_error_span_from(for_keyword_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
Some(step)
};
let body = self.parse_branch_body();
let span = AstSpan::range(for_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::For {
initialization,
condition,
step,
body,
},
span,
)
}
/// Parses the continuation of a `return` expression after its keyword.
///
/// If the next token is not `;`, consumes a return value expression.
/// The terminating `;` is not consumed here.
#[must_use]
pub(crate) fn parse_return_tail(
&mut self,
return_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() == Some(Token::Semicolon) {
(None, AstSpan::new(return_keyword_position))
} else {
let returned_value = self.parse_expression();
let span = AstSpan::range(return_keyword_position, returned_value.span().token_to);
(Some(returned_value), span)
};
self.arena.alloc_node(Expression::Return(value), span)
}
/// Parses the continuation of a `break` expression after its keyword.
///
/// If the next token is not `;`, consumes a break value expression.
/// The terminating `;` is not consumed here.
#[must_use]
pub(crate) fn parse_break_tail(
&mut self,
break_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() == Some(Token::Semicolon) {
(None, AstSpan::new(break_keyword_position))
} else {
let returned_value = self.parse_expression();
let span = AstSpan::range(break_keyword_position, returned_value.span().token_to);
(Some(returned_value), span)
};
self.arena.alloc_node(Expression::Break(value), span)
}
/// Parses the continuation of a `goto` expression after its keyword.
///
/// Accepts either a name literal or an identifier as the target label.
#[must_use]
pub(crate) fn parse_goto_tail(
&mut self,
goto_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
if let Some((label_token, label_position)) = self.peek_token_and_position()
&& (label_token == Token::NameLiteral || label_token == Token::Identifier)
{
self.advance();
return self.arena.alloc_node_between(
Expression::Goto(label_position),
goto_keyword_position,
label_position,
);
}
self.make_error_here(ParseErrorKind::GotoMissingLabel)
.widen_error_span_from(goto_keyword_position)
.sync_error_until(self, SyncLevel::Statement)
.report_error(self);
crate::arena::ArenaNode::new_in(
Expression::Error,
AstSpan::new(goto_keyword_position),
self.arena,
)
}
}

View File

@ -0,0 +1,76 @@
//! Identifier parsing for Fermented `UnrealScript`.
//!
//! Provides shared routines for parsing both regular and qualified identifiers,
//! e.g. `KFChar.ZombieClot`.
use crate::arena::{self, ArenaVec};
use crate::ast::{AstSpan, IdentifierToken, QualifiedIdentifier, QualifiedIdentifierRef};
use crate::lexer::{self, Token};
use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses an identifier.
///
/// On failure (unexpected end-of-file or a token that cannot be used as an
/// identifier), produces `invalid_identifier_error_kind`.
pub(crate) fn parse_identifier(
&mut self,
invalid_identifier_error_kind: ParseErrorKind,
) -> ParseResult<'src, 'arena, IdentifierToken> {
let (token, token_position) =
self.require_token_and_position(invalid_identifier_error_kind)?;
let identifier = Parser::identifier_token_from_token(token, token_position)
.ok_or_else(|| self.make_error_here(invalid_identifier_error_kind))?;
self.advance();
Ok(identifier)
}
/// Returns an [`IdentifierToken`] for `token` if it is valid as an
/// identifier name.
///
/// This helper performs only token-to-identifier validation/wrapping;
/// it does not consume input from the parser.
pub(crate) fn identifier_token_from_token(
token: Token,
token_position: lexer::TokenPosition,
) -> Option<IdentifierToken> {
token
.is_valid_identifier_name()
.then_some(IdentifierToken(token_position))
}
/// Parses a qualified (dot-separated) identifier path,
/// e.g. `KFChar.ZombieClot`.
///
/// This is used for name paths where each segment must be
/// a valid identifier and segments are separated by `.` tokens.
///
/// On failure produces an error of specified [`ParseErrorKind`]
/// `invalid_identifier_error_kind`.
pub(crate) fn parse_qualified_identifier(
&mut self,
invalid_identifier_error_kind: ParseErrorKind,
) -> ParseResult<'src, 'arena, QualifiedIdentifierRef<'arena>> {
let head = self.parse_identifier(invalid_identifier_error_kind)?;
let mut tail = None;
let span_start = head.0;
let mut span_end = span_start;
while self.peek_token() == Some(Token::Period) {
self.advance(); // '.'
let next_segment = self
.parse_identifier(invalid_identifier_error_kind)
.widen_error_span_from(head.0)?;
span_end = next_segment.0;
let tail_vec = tail.get_or_insert_with(|| ArenaVec::new_in(self.arena));
tail_vec.push(next_segment);
}
Ok(arena::ArenaNode::new_in(
QualifiedIdentifier { head, tail },
AstSpan::range(span_start, span_end),
self.arena,
))
}
}

View File

@ -0,0 +1,123 @@
//! Literal decoding for Fermented `UnrealScript`.
//!
//! This module defines the semantic rules for interpreting literal tokens
//! produced by the lexer. It is responsible only for *decoding* the textual
//! representation of literals into their internal values.
//!
//! The rules implemented here intentionally mirror the quirks of
//! Unreal Engine 2s `UnrealScript`.
use crate::parser::{ParseErrorKind, ParseResult};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Decodes an integer literal string into [`u128`].
///
/// Syntax:
/// - Optional base prefix: `0b` | `0o` | `0x` (case-insensitive).
/// No prefix -> decimal.
/// - Digits must match the base (`0-1`/`0-7`/`0-9A-F`).
/// - Underscores are allowed and ignored (e.g., `1_000`, `0xDE_AD`).
/// - No leading sign; parsed as a non-negative magnitude.
/// - Must fit within [`u128`].
///
/// Examples: `42`, `0b1010_0011`, `0o755`, `0xDEAD_BEEF`.
///
/// On failure, returns [`ParseErrorKind::InvalidNumericLiteral`] at
/// the parser's current cursor position.
pub(crate) fn decode_integer_literal(&self, literal: &str) -> ParseResult<'src, 'arena, u128> {
let (base, content) = match literal.split_at_checked(2) {
Some(("0b" | "0B", stripped)) => (2, stripped),
Some(("0o" | "0O", stripped)) => (8, stripped),
Some(("0x" | "0X", stripped)) => (16, stripped),
_ => (10, literal),
};
let digits_without_underscores = content.replace('_', "");
u128::from_str_radix(&digits_without_underscores, base)
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
/// Decodes a float literal as `f64`, following the permissive and only
/// partially documented behavior of `UnrealScript`.
///
/// Unreal Engine 2 does not define a precise and consistent set of rules
/// for float literals and the original compiler contains several quirks.
/// Because of this, we default to normalizing the text using a small set of
/// UnrealScript-specific rules and then parse the result using rust's
/// `f64` parser.
///
/// Rules implemented here:
/// - Only decimal floats and special literals (e.g. `NaN`, `inf`)
/// are supported (no hex or binary formats).
/// - A single trailing `f` or `F`, if present, is removed before parsing.
/// - The literal text is scanned for periods (`.`). If a second period
/// is found, everything from that second `.` onward is discarded.
///
/// Examples:
/// * `1.2.3e4` becomes `1.2`
/// * `1.2e3.4` becomes `1.2e3`
///
/// - After this truncation step, the remaining text is interpreted as a
/// normal rust `f64` literal. This means it may contain digits, at
/// most one decimal point, and an optional exponent part (for example
/// `e3` or `E-2`), but it must otherwise follow rust's `f64` syntax.
/// Underscores, spaces, and other unsupported characters cause a
/// parse error.
///
/// On failure, this function returns
/// [`ParseErrorKind::InvalidNumericLiteral`] at the current parser
/// position.
pub(crate) fn decode_float_literal(&self, literal: &str) -> ParseResult<'src, 'arena, f64> {
let content = literal
.strip_suffix('f')
.or_else(|| literal.strip_suffix('F'))
.unwrap_or(literal);
// Truncate after the second '.', matching UnrealScript behavior
let content = content
.match_indices('.')
.nth(1)
.and_then(|(period_index, _)| content.get(..period_index))
.unwrap_or(content);
content
.parse::<f64>()
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
/// Unescapes a tokenized string literal into an arena string.
///
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
/// Unknown escapes drop the backslash and emit the character unchanged
/// (`UnrealScript` behavior).
/// If `raw_string` ends with a trailing `\` (which should not happen for
/// well-formed tokens), that backslash is simply ignored.
///
/// This function assumes `raw_string` is the token text without surrounding
/// quotes.
pub(crate) fn unescape_string_literal(
&self,
raw_string: &str,
) -> crate::arena::ArenaString<'arena> {
let mut buffer = String::with_capacity(raw_string.len());
let mut characters = raw_string.chars();
while let Some(next_character) = characters.next() {
if next_character == '\\' {
// Under the lexer contract, string tokens do not end with a lone
// backslash, so there is always a following character. If this
// invariant is broken, the final '\' is simply ignored here.
if let Some(escaped_character) = characters.next() {
match escaped_character {
'n' => buffer.push('\n'),
't' => buffer.push('\t'),
'"' => buffer.push('"'),
'\\' => buffer.push('\\'),
// Simply leaving the escaped character matches
// UnrealScript behavior.
unrecognized_escape_char => buffer.push(unrecognized_escape_char),
}
}
} else {
buffer.push(next_character);
}
}
self.arena.string(&buffer)
}
}

View File

@ -0,0 +1,32 @@
//! Expression parsing for Fermented `UnrealScript`.
//!
//! This module group implements the language's expression parser around a
//! Pratt-style core. It is split into small submodules by role: precedence,
//! identifiers, literals, selectors, block bodies, keyword-led/control-flow
//! forms, primary-expression dispatch, and the Pratt driver itself.
//!
//! The parser is designed to keep building a best-effort AST on malformed
//! input. Syntax problems are reported through diagnostics, while committed
//! parsers recover locally and return fallback nodes or partial structures when
//! necessary.
//!
//! ## Expression layering
//!
//! The parser distinguishes several layers of expression parsing:
//!
//! - **primaries**: forms that can be parsed directly from the current token,
//! without an already parsed left-hand side;
//! - **selectors**: suffix continuations such as member access, indexing, and
//! calls, which require a left-hand side;
//! - **prefix / postfix / infix operators**: handled by the Pratt parser using
//! precedence ranks.
mod block; // `{ ... }` block-body parsing and block/expression item handling.
mod control_flow; // `if`, `while`, `do`, `foreach`, `for`, `return`, etc.
mod identifier; // Identifier and qualified-name parsing helpers.
mod literals; // Literal decoding and literal-specific parsing utilities.
mod pratt; // Top-level Pratt driver.
mod precedence; // Operator precedence ranks and Pratt binding rules.
mod primary; // Primary-expression parsing and keyword-vs-identifier dispatch.
mod selectors; // Suffix continuations: member access, indexing, and calls.
mod switch; // `switch (...) { ... }` parsing and arm/body recovery.

View File

@ -0,0 +1,194 @@
//! Core of the expression parser for Fermented `UnrealScript`.
//!
//! This module implements a Pratt-style parser for the language's expression
//! grammar, supporting:
//!
//! * Primary expressions (see [`crate::parser::primary`] for details on what
//! we consider to be a primary expression);
//! * Prefix operators;
//! * Postfix operators;
//! * Infix operators with hard-coded precedence and associativity.
//!
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
//! operators bind. Infix parsing uses the pair of binding powers returned by
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
//! The parser infrastructure supports both left- and right-associative
//! operators, but Fermented `UnrealScript` currently defines only
//! left-associative ones.
//!
//! ## Postfix operator vs "selectors"
//!
//! Everywhere here we distinguish *selectors* like field accessor `.`,
//! function call `()` or array indices `[]` from other *postfix operators*
//! as they:
//!
//! 1. Have significantly different semantic meaning;
//! 2. Are not considered operators from `UnrealScript`'s viewpoint
//! (e.g. cannot be overloaded).
//!
//! ## See also
//!
//! - [`parser::Parser::parse_expression`] - main entry point
//! - [`PrecedenceRank`] - operator binding strengths
//! - [`super::precedence`] - operator precedence definitions
use crate::ast::{self, Expression, ExpressionRef};
use crate::parser::{self, Parser, ResultRecoveryExt};
pub use super::precedence::PrecedenceRank;
/// Returns whether postfix operators like `++` and `--` are disallowed
/// after this expression.
///
/// This restriction applies only to postfix operators. Selectors such as
/// field access `.x`, indexing `[i]`, and calls `(args)` remain allowed.
fn forbids_postfix_operators(expression: &ExpressionRef<'_, '_>) -> bool {
matches!(
**expression,
Expression::If { .. }
| Expression::While { .. }
| Expression::DoUntil { .. }
| Expression::For { .. }
| Expression::ForEach { .. }
| Expression::Switch { .. }
| Expression::Block { .. }
)
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses an expression.
///
/// Always returns some expression node; any syntax errors are reported
/// through the parser's diagnostics.
#[must_use]
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
}
/// Parses an expression, including only operators with binding power
/// at least `min_precedence_rank` (as tight or tighter).
fn parse_expression_with_min_precedence_rank(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> ExpressionRef<'src, 'arena> {
let mut left_hand_side = self
.parse_prefix_or_primary()
.sync_error_until(self, parser::SyncLevel::Expression)
.unwrap_or_fallback(self);
left_hand_side = self
.parse_selectors_into(left_hand_side)
.unwrap_or_fallback(self);
// We disallow only postfix operators after expression forms that
// represent control-flow or block constructs. Selectors are still
// parsed normally.
// This avoids ambiguities in cases like:
//
// ```unrealscript
// if test() { do_it(); }
// ++ counter;
// ```
//
// This wasn't a problem in UnrealScript, because such constructs were
// never treated as expressions. And it shouldn't be an issue for us
// because neither `--` or `++` (the only existing default postfix
// operators) make any sense after such expressions anyway.
if !forbids_postfix_operators(&left_hand_side) {
left_hand_side = self.parse_postfix_into(left_hand_side);
}
self.parse_infix_into(left_hand_side, min_precedence_rank)
}
/// Parses a prefix or primary expression (Pratt parser's "nud" or
/// null denotation).
fn parse_prefix_or_primary(&mut self) -> parser::ParseExpressionResult<'src, 'arena> {
let (token, token_lexeme, token_position) =
self.require_token_lexeme_and_position(parser::ParseErrorKind::MissingExpression)?;
self.advance();
if let Ok(operator) = ast::PrefixOperator::try_from(token) {
// In UnrealScript, prefix and postfix operators bind tighter than
// any infix operators, so we can safely parse the right hand side
// at the tightest precedence.
let right_hand_side =
self.parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST);
Ok(Expression::new_prefix(
self.arena,
token_position,
operator,
right_hand_side,
))
} else {
self.parse_primary_from_current_token(token, token_lexeme, token_position)
}
}
/// Parses all postfix operators it can, creating a tree with
/// `left_hand_side` as a child.
fn parse_postfix_into(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
) -> ExpressionRef<'src, 'arena> {
while let Some((operator, operator_position)) = self.peek_postfix_with_position() {
self.advance();
left_hand_side =
Expression::new_postfix(self.arena, left_hand_side, operator, operator_position);
}
left_hand_side
}
/// Parses infix operators binding at least as tight as
/// `min_precedence_rank`.
///
/// Associativity is encoded by
/// [`super::precedence::infix_precedence_ranks`].
///
/// Stops when the next operator is looser than `min_precedence_rank`.
fn parse_infix_into(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
min_precedence_rank: PrecedenceRank,
) -> ExpressionRef<'src, 'arena> {
while let Some((operator, right_precedence_rank)) =
self.peek_infix_with_min_precedence_rank(min_precedence_rank)
{
self.advance();
let right_hand_side =
self.parse_expression_with_min_precedence_rank(right_precedence_rank);
left_hand_side =
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
}
left_hand_side
}
/// Returns the next postfix operator and its position if present.
///
/// Helper to avoid peeking and mapping twice; used to drive the postfix
/// loop without unwraps.
fn peek_postfix_with_position(
&mut self,
) -> Option<(ast::PostfixOperator, crate::lexer::TokenPosition)> {
let (token, token_position) = self.peek_token_and_position()?;
let Ok(operator) = ast::PostfixOperator::try_from(token) else {
return None;
};
Some((operator, token_position))
}
/// If the next token is an infix operator with left binding power at least
/// `min_precedence_rank`, returns its operator and the minimum precedence
/// rank to use when parsing the right-hand side (i.e. the operator's right
/// binding power).
///
/// Otherwise returns [`None`].
fn peek_infix_with_min_precedence_rank(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> Option<(ast::InfixOperator, PrecedenceRank)> {
let (left_precedence_rank, operator, right_precedence_rank) = self
.peek_token()
.and_then(super::precedence::infix_precedence_ranks)?;
if left_precedence_rank.is_looser_than(min_precedence_rank) {
return None;
}
Some((operator, right_precedence_rank))
}
}

View File

@ -0,0 +1,93 @@
//! Precedence tables for Fermented `UnrealScript` operators.
//!
//! These values don't follow the usual *binding power* convention for
//! a Pratt parser, where tighter binding corresponds to a larger number.\
//! Here, the smaller the number, the tighter the binding power.\
//! For this reason, we use the term *precedence rank* instead.
//!
//! ## Operators sorted by precedence (lowest number = tighter binding)
//!
//! ### Infix operators
//!
//! All infix operators in `UnrealScript` are
//! [left-associative](https://wiki.beyondunreal.com/Operators).
//!
//! 12: `**`
//! 16: `*`, `/`, `Cross`, `Dot`
//! 18: `%`
//! 20: `+`, `-`
//! 22: `<<`, `>>`, `>>>`
//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom`
//! 26: `!=`
//! 28: `&`, `^`, `|`
//! 30: `&&`, `^^`
//! 32: `||`
//! 34: `*=`, `/=`, `+=`, `-=`
//! 40: `$`, `*`, `@`
//! 44: `$=`, `*=`, `@=`
//! 45: `-=`
//!
//! Some operator, such as `*`, appear twice with different precedence
//! ranks because they were defined with different values for different types
//! in separate script source files (as in the Killing Floor sources).\
//! However, `UnrealScript` uses only the first definition it encounters in
//! `Object.uc`, which corresponds to the lower value.
//!
//! ### Prefix operators
//!
//! `!`, `~`, `+`, `-`, `++`, `--`.
//!
//! ### Postfix operators
//!
//! `++`, `--`.
use crate::ast::{InfixOperator, infix_operator_info};
use crate::lexer::Token;
/// Compact precedence rank used by the Pratt Parser.
///
/// A smaller number means tighter binding, and a larger number means looser
/// binding. This inverted scale matches how `UnrealScript` tables were recorded.
#[must_use]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct PrecedenceRank(u8);
impl PrecedenceRank {
/// The loosest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by the maximum [`u8`] value.
pub const LOOSEST: Self = Self(u8::MAX);
/// The tightest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by zero.
pub const TIGHTEST: Self = Self(0);
/// Returns `true` if `self` has a looser binding than `other`.
pub const fn is_looser_than(self, other: Self) -> bool {
self.0 > other.0
}
}
/// Maps a token to its infix operator along with its left and right binding
/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`.
///
/// Returns [`None`] if and only if `token` is not an infix operator.
pub fn infix_precedence_ranks(
token: Token,
) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> {
let info = infix_operator_info(token)?;
// All operators are left-associative, so `right_precedence_rank` is set to
// `left_binding_rank - 1` (with our "smaller is tighter" scale, this
// enforces left associativity in Pratt parsing).
//
// Since all precedences are even, subtracting one won't actually cross
// any boundary between operator groups.
Some((
PrecedenceRank(info.right_precedence_rank),
info.operator,
PrecedenceRank(info.right_precedence_rank - 1),
))
}

View File

@ -0,0 +1,463 @@
//! Parser for primary expressions in Fermented `UnrealScript`.
//!
//! This module implements parsing of primary expressions via
//! [`Parser::parse_primary_from_current_token`] and its helper
//! [`Parser::parse_keyword_primary`].
//!
//! ## What is a "primary expression" here?
//!
//! In this module, "primary" is used somewhat more broadly than in a
//! textbook grammar, but it still has one essential property:
//!
//! A primary expression is an expression form that can be parsed
//! directly from the current token, without requiring an already
//! parsed left-hand side.
//!
//! This includes ordinary primaries such as literals, identifiers, and
//! parenthesized expressions, as well as keyword-led forms such as
//! `if`, `while`, `for`, `foreach`, `switch`, `return`, `break`,
//! `continue`, `new`, and `class<...>`.
//!
//! By contrast, selectors, postfix operators, and infix operators are
//! not primaries. They cannot stand on their own here: they are parsed
//! only as continuations of an already parsed expression.
//!
//! So "primary" here does not mean "smallest atomic expression".
//! It means "an expression form that does not need a left-hand side
//! in order to be parsed".
use super::selectors::ParsedCallArgumentSlot;
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{Keyword, Token, TokenPosition};
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a primary expression starting from the provided token.
///
/// The provided token is assumed to be the already consumed first token of
/// the primary expression.
///
/// This includes literals, identifiers, grouped expressions, block
/// expressions, and certain keyword-led forms.
///
/// It does not parse selectors, postfix operators, or infix operators;
/// those are handled afterwards as continuations of the parsed primary.
///
/// # Errors
///
/// Returns [`ParseErrorKind::ExpressionExpected`] if the provided
/// token cannot begin any valid primary expression in this position.
pub(crate) fn parse_primary_from_current_token(
&mut self,
token: Token,
token_lexeme: &'src str,
token_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
Ok(match token {
Token::IntegerLiteral => {
let value = self.decode_integer_literal(token_lexeme)?;
self.arena
.alloc_node_at(Expression::Integer(value), token_position)
}
Token::FloatLiteral => {
let value = self.decode_float_literal(token_lexeme)?;
self.arena
.alloc_node_at(Expression::Float(value), token_position)
}
Token::StringLiteral => {
let value = self.unescape_string_literal(token_lexeme);
self.arena
.alloc_node_at(Expression::String(value), token_position)
}
Token::NameLiteral => self.arena.alloc_node_at(
Expression::NameLiteral {
tag: None,
name: token_lexeme,
},
token_position,
),
Token::LeftParenthesis => self.parse_parenthesized_expression_tail(token_position),
Token::LeftBrace => self.parse_block_tail(token_position),
Token::Keyword(keyword) => match self.parse_keyword_primary(keyword, token_position) {
Some(keyword_expression) => keyword_expression,
None => return self.parse_identifier_like_primary(token, token_position),
},
_ => return self.parse_identifier_like_primary(token, token_position),
})
}
/// Parses a keyword-led primary expression.
///
/// Returns `None` if the keyword should instead be interpreted as an
/// identifier in this position.
fn parse_keyword_primary(
&mut self,
keyword: Keyword,
token_position: TokenPosition,
) -> OptionalExpression<'src, 'arena> {
Some(match keyword {
Keyword::True => self
.arena
.alloc_node_at(Expression::Bool(true), token_position),
Keyword::False => self
.arena
.alloc_node_at(Expression::Bool(false), token_position),
Keyword::None => self.arena.alloc_node_at(Expression::None, token_position),
Keyword::If => self.parse_if_tail(token_position),
Keyword::While => self.parse_while_tail(token_position),
Keyword::Do => self.parse_do_until_tail(token_position),
Keyword::ForEach => self.parse_foreach_tail(token_position),
Keyword::Return => self.parse_return_tail(token_position),
Keyword::Break => self.parse_break_tail(token_position),
Keyword::Continue => self
.arena
.alloc_node_at(Expression::Continue, token_position),
Keyword::New => self.parse_new_expression_tail(token_position),
// These keywords remain valid identifiers unless the following
// tokens commit to the keyword-led form.
Keyword::For if self.is_for_loop_header_ahead() => self.parse_for_tail(token_position),
Keyword::Goto if !matches!(self.peek_token(), Some(Token::LeftParenthesis)) => {
self.parse_goto_tail(token_position)
}
// `switch` is only treated as keyword-led when followed by `(`
// to match the syntax accepted by the existing codebase.
Keyword::Switch if matches!(self.peek_token(), Some(Token::LeftParenthesis)) => {
self.parse_switch_tail(token_position)
}
Keyword::Class => {
if let Some((Token::Less, left_angle_bracket_position)) =
self.peek_token_and_position()
{
self.advance(); // '<'
self.parse_class_type_tail(token_position, left_angle_bracket_position)
} else {
return None;
}
}
_ => return None,
})
}
/// Attempts to parse the already-consumed token as an identifier or tagged
/// name literal.
///
/// # Errors
///
/// Returns [`ParseErrorKind::ExpressionExpected`] if the token
/// cannot be used as an identifier in this position.
fn parse_identifier_like_primary(
&mut self,
primary_token: Token,
primary_token_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
let identifier_token =
Parser::identifier_token_from_token(primary_token, primary_token_position).ok_or_else(
|| self.make_error_at(ParseErrorKind::ExpressionExpected, primary_token_position),
)?;
// A token that is valid as an identifier may still start a tagged-name
// literal such as `Texture'Foo.Bar'`.
let expression = if let Some((Token::NameLiteral, lexeme, name_position)) =
self.peek_token_lexeme_and_position()
{
self.advance();
self.arena.alloc_node_between(
Expression::NameLiteral {
tag: Some(identifier_token),
name: lexeme,
},
primary_token_position,
name_position,
)
} else {
self.arena.alloc_node_at(
Expression::Identifier(identifier_token),
primary_token_position,
)
};
Ok(expression)
}
/// Parses a parenthesized expression.
///
/// Assumes the opening `(` has already been consumed.
/// Reports and recovers from a missing closing `)`.
fn parse_parenthesized_expression_tail(
&mut self,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// Special case for an empty expression
if let Some((Token::RightParenthesis, right_parenthesis_position)) =
self.peek_token_and_position()
{
self.make_error_here(ParseErrorKind::ParenthesizedExpressionEmpty {
left_parenthesis_position,
})
.widen_error_span_from(left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.blame_token(right_parenthesis_position)
.report_error(self);
return self.arena.alloc_node_between(
Expression::Error,
left_parenthesis_position,
right_parenthesis_position,
);
}
// Continue parsing normally
let inner_expression = if self.next_token_definitely_cannot_start_expression() {
let error = self
.make_error_here(ParseErrorKind::ExpressionExpected)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at(self, SyncLevel::Expression)
.related_token(left_parenthesis_position);
let error_span = error.covered_span;
self.report_error(error);
return crate::arena::ArenaNode::new_in(
crate::ast::Expression::Error,
error_span,
self.arena,
);
} else {
self.parse_expression()
};
let right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
left_parenthesis_position,
},
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.unwrap_or_fallback(self);
self.arena.alloc_node_between(
Expression::Parentheses(inner_expression),
left_parenthesis_position,
right_parenthesis_position,
)
}
/// Parses a class type expression of the form `class<...>`.
///
/// Assumes the `class` keyword and following '<' token have already been
/// consumed. Reports and recovers from malformed type syntax locally.
fn parse_class_type_tail(
&mut self,
class_keyword_position: TokenPosition,
left_angle_bracket_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// Special case for an empty argument
if let Some((Token::Greater, right_angle_bracket_position)) = self.peek_token_and_position()
{
self.make_error_here(ParseErrorKind::ClassTypeMissingTypeArgument {
left_angle_bracket_position,
})
.widen_error_span_from(left_angle_bracket_position)
.sync_error_at(self, SyncLevel::CloseAngleBracket)
.blame_token(right_angle_bracket_position)
.report_error(self);
return self.arena.alloc_node_between(
Expression::Error,
class_keyword_position,
right_angle_bracket_position,
);
}
// Qualified identifiers do not have a meaningful fallback option
let class_type = match self
.parse_qualified_identifier(ParseErrorKind::ClassTypeInvalidTypeArgument {
left_angle_bracket_position,
})
.widen_error_span_from(class_keyword_position)
.sync_error_at(self, SyncLevel::CloseAngleBracket)
{
Ok(class_type) => class_type,
Err(error) => {
self.report_error(error);
return self.arena.alloc_node_between(
Expression::Error,
class_keyword_position,
self.last_consumed_position()
.unwrap_or(class_keyword_position),
);
}
};
let right_angle_bracket_position = self
.expect(
Token::Greater,
ParseErrorKind::ClassTypeMissingClosingAngleBracket {
left_angle_bracket_position,
},
)
.widen_error_span_from(class_keyword_position)
.sync_error_at(self, SyncLevel::CloseAngleBracket)
.unwrap_or_fallback(self);
self.arena.alloc_node_between(
Expression::ClassType(class_type),
class_keyword_position,
right_angle_bracket_position,
)
}
/// Parses a `new` expression with an optional parenthesized argument list.
///
/// Assumes the `new` keyword has already been consumed.
/// The parenthesized argument list is optional.
fn parse_new_expression_tail(
&mut self,
new_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (outer_argument, name_argument, flags_argument) =
if let Some((Token::LeftParenthesis, left_parenthesis_position)) =
self.peek_token_and_position()
{
self.advance();
self.parse_new_argument_list_tail(left_parenthesis_position)
} else {
(None, None, None)
};
// The class specifier is often a literal class reference, but any
// expression is accepted here.
let class_specifier = if self.next_token_definitely_cannot_start_expression() {
let error = self
.make_error_here(ParseErrorKind::NewMissingClassSpecifier {
new_keyword_position,
})
.widen_error_span_from(new_keyword_position)
.sync_error_at(self, SyncLevel::Expression);
let error_span = error.covered_span;
self.report_error(error);
crate::arena::ArenaNode::new_in(crate::ast::Expression::Error, error_span, self.arena)
} else {
self.parse_expression()
};
let class_specifier_end_position = class_specifier.span().token_to;
self.arena.alloc_node_between(
Expression::New {
outer_argument,
name_argument,
flags_argument,
class_specifier,
},
new_keyword_position,
class_specifier_end_position,
)
}
/// Parses the optional parenthesized arguments of a `new` expression.
///
/// Assumes the opening `(` has already been consumed.
/// Returns the `outer`, `name`, and `flags` argument slots, each of which
/// may be omitted. Reports and recovers from a missing closing `)`.
fn parse_new_argument_list_tail(
&mut self,
left_parenthesis_position: TokenPosition,
) -> (
OptionalExpression<'src, 'arena>,
OptionalExpression<'src, 'arena>,
OptionalExpression<'src, 'arena>,
) {
let mut outer_argument = None;
let mut name_argument = None;
let mut flags_argument = None;
for slot in [&mut outer_argument, &mut name_argument, &mut flags_argument] {
match self.parse_call_argument_slot(left_parenthesis_position) {
ParsedCallArgumentSlot::Argument(argument) => *slot = argument,
ParsedCallArgumentSlot::NoMoreArguments => break,
}
}
if let Some((next_token, next_token_position)) = self.peek_token_and_position()
&& next_token != Token::RightParenthesis
{
self.make_error_here(ParseErrorKind::NewTooManyArguments {
left_parenthesis_position,
})
.widen_error_span_from(left_parenthesis_position)
.sync_error_until(self, SyncLevel::CloseParenthesis)
.blame_token(next_token_position)
.extend_blame_to_covered_end()
.report_error(self);
}
self.expect(
Token::RightParenthesis,
ParseErrorKind::NewMissingClosingParenthesis {
left_parenthesis_position,
},
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
(outer_argument, name_argument, flags_argument)
}
/// Returns `true` iff the next token is definitely not a valid start of an
/// expression.
///
/// This is intentionally conservative:
/// - `true` means parsing an expression here is pointless;
/// - `false` means "might be valid", so the normal expression parser should
/// decide and potentially emit a more specific error.
#[must_use]
pub(crate) fn next_token_definitely_cannot_start_expression(&mut self) -> bool {
matches!(
self.peek_token(),
None
// Closing delimiters / separators
| Some(Token::RightParenthesis)
| Some(Token::RightBrace)
| Some(Token::RightBracket)
| Some(Token::Semicolon)
| Some(Token::Comma)
| Some(Token::Colon)
| Some(Token::Question)
// Tokens that only continue a previous expression
| Some(Token::Period)
// Infix / postfix / assignment operators
| Some(Token::Exponentiation)
| Some(Token::Multiply)
| Some(Token::Divide)
| Some(Token::Modulo)
| Some(Token::ConcatSpace)
| Some(Token::Concat)
| Some(Token::LeftShift)
| Some(Token::LogicalRightShift)
| Some(Token::RightShift)
| Some(Token::Less)
| Some(Token::LessEqual)
| Some(Token::Greater)
| Some(Token::GreaterEqual)
| Some(Token::Equal)
| Some(Token::NotEqual)
| Some(Token::ApproximatelyEqual)
| Some(Token::BitwiseAnd)
| Some(Token::BitwiseOr)
| Some(Token::BitwiseXor)
| Some(Token::LogicalAnd)
| Some(Token::LogicalXor)
| Some(Token::LogicalOr)
| Some(Token::Assign)
| Some(Token::MultiplyAssign)
| Some(Token::DivideAssign)
| Some(Token::ModuloAssign)
| Some(Token::PlusAssign)
| Some(Token::MinusAssign)
| Some(Token::ConcatAssign)
| Some(Token::ConcatSpaceAssign)
// Non-expression trivia / technical tokens
| Some(Token::ExecDirective)
| Some(Token::CppBlock)
| Some(Token::Hash)
| Some(Token::LineComment)
| Some(Token::BlockComment)
| Some(Token::Newline)
| Some(Token::Whitespace)
| Some(Token::Error)
)
}
}

View File

@ -0,0 +1,197 @@
//! Parser for expression selectors in Fermented `UnrealScript`.
//!
//! Selectors are suffix forms that extend an already parsed expression,
//! such as member access, indexing, and calls.
//!
//! Unlike primaries, selectors cannot be parsed on their own from the
//! current token. They always require a left-hand side expression.
use crate::arena::ArenaVec;
use crate::ast::AstSpan;
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{Token, TokenPosition};
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
/// Represents the result of parsing one call argument slot.
///
/// This distinguishes between the end of the argument list and a parsed
/// argument slot, including an omitted one.
#[must_use]
#[derive(Debug, PartialEq)]
pub enum ParsedCallArgumentSlot<'src, 'arena> {
/// Indicates that the argument list has ended.
NoMoreArguments,
/// The parsed argument for this slot.
///
/// `None` represents an omitted argument between commas.
Argument(OptionalExpression<'src, 'arena>),
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses zero or more postfix selectors after `left_hand_side`.
///
/// Returns the resulting expression after all contiguous selectors.
pub(crate) fn parse_selectors_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
) -> ParseExpressionResult<'src, 'arena> {
let mut left_hand_side = left_hand_side;
// `next_position` is used only to widen diagnostic spans.
while let Some((next_token, next_position)) = self.peek_token_and_position() {
left_hand_side = match next_token {
Token::Period => self.parse_selector_member_access_into(left_hand_side)?,
Token::LeftBracket => {
self.parse_selector_index_into(left_hand_side, next_position)?
}
Token::LeftParenthesis => {
self.parse_selector_call_into(left_hand_side, next_position)
}
_ => break,
};
}
Ok(left_hand_side)
}
/// Parses a member access selector after `left_hand_side`.
///
/// Expects the leading `.` to be the next token and returns the resulting
/// member access expression.
fn parse_selector_member_access_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
) -> ParseExpressionResult<'src, 'arena> {
self.advance(); // `.`
let member_access_start = left_hand_side.span().token_from;
let member_identifier = self.parse_identifier(ParseErrorKind::ExpressionUnexpectedToken)?;
let member_access_end = member_identifier.0;
Ok(self.arena.alloc_node(
Expression::Member {
target: left_hand_side,
name: member_identifier,
},
AstSpan::range(member_access_start, member_access_end),
))
}
/// Parses an index selector after `left_hand_side`.
///
/// Expects the leading `[` to be the next token and returns the resulting
/// indexing expression.
fn parse_selector_index_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_bracket_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
self.advance(); // '['
let index_expression = self.parse_expression();
let right_bracket_position = self
.expect(
Token::RightBracket,
ParseErrorKind::ExpressionUnexpectedToken,
)
.widen_error_span_from(left_bracket_position)
.sync_error_at(self, SyncLevel::CloseBracket)?;
let expression_start = left_hand_side.span().token_from;
Ok(self.arena.alloc_node_between(
Expression::Index {
target: left_hand_side,
index: index_expression,
},
expression_start,
right_bracket_position,
))
}
/// Parses a call selector after `left_hand_side`.
///
/// Expects the leading `(` to be the next token and returns the resulting
/// call expression.
fn parse_selector_call_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
self.advance(); // '('
let argument_list = self.parse_call_argument_list(left_parenthesis_position);
let right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::FunctionCallMissingClosingParenthesis,
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.unwrap_or_fallback(self);
let expression_start = left_hand_side.span().token_from;
self.arena.alloc_node_between(
Expression::Call {
callee: left_hand_side,
arguments: argument_list,
},
expression_start,
right_parenthesis_position,
)
}
/// Parses one call argument slot after an already consumed `(`.
///
/// In `UnrealScript`, every comma introduces a follow-up argument slot, so a
/// trailing comma immediately before `)` denotes an omitted final argument.
///
/// Returns [`ParsedCallArgumentSlot::NoMoreArguments`] when the argument
/// list ends, and `Argument(None)` for an omitted argument slot.
pub(crate) fn parse_call_argument_slot(
&mut self,
left_parenthesis_position: TokenPosition,
) -> ParsedCallArgumentSlot<'src, 'arena> {
match self.peek_token() {
Some(Token::RightParenthesis) => return ParsedCallArgumentSlot::NoMoreArguments,
Some(Token::Comma) => {
self.advance();
if self.at_call_argument_boundary() {
return ParsedCallArgumentSlot::Argument(None);
}
}
_ => (),
}
let argument = self.parse_expression();
if !self.at_call_argument_boundary() {
self.make_error_here(ParseErrorKind::FunctionArgumentMissingComma)
.widen_error_span_from(left_parenthesis_position)
.report_error(self);
}
ParsedCallArgumentSlot::Argument(Some(argument))
}
/// Parses a call argument list after an already-consumed `(`.
///
/// Returns all parsed argument slots, preserving omitted arguments
/// as `None`.
fn parse_call_argument_list(
&mut self,
left_parenthesis_position: TokenPosition,
) -> ArenaVec<'arena, Option<ExpressionRef<'src, 'arena>>> {
let mut argument_list = ArenaVec::new_in(self.arena);
while let ParsedCallArgumentSlot::Argument(argument) =
self.parse_call_argument_slot(left_parenthesis_position)
{
argument_list.push(argument);
}
argument_list
}
/// Returns whether the current lookahead token ends the current call
/// argument slot.
///
/// This is true for `,`, which starts the next slot, and for `)`, which
/// ends the argument list.
fn at_call_argument_boundary(&mut self) -> bool {
matches!(
self.peek_token(),
Some(Token::Comma | Token::RightParenthesis)
)
}
}

View File

@ -0,0 +1,203 @@
//! Switch parsing for Fermented `UnrealScript`.
//!
//! Provides routines for parsing `switch (...) { ... }` expressions.
use crate::arena::ArenaVec;
use crate::ast::{AstSpan, ExpressionRef, StatementRef};
use crate::lexer::{Keyword, Token, TokenPosition};
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a `switch` expression after the `switch` keyword has been
/// consumed.
///
/// Returns an [`crate::ast::Expression::Switch`] whose span covers the
/// entire construct, from `switch_start_position` to the closing `}`.
///
/// Only one `default` arm is recorded. Duplicate defaults and `case` arms
/// after a `default` are reported as errors.
///
/// On premature end-of-file, reports an error and returns a best-effort
/// switch node.
#[must_use]
pub(crate) fn parse_switch_tail(
&mut self,
switch_start_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let selector = self.parse_expression();
let mut cases = self.arena.vec();
let mut default_arm = None;
let mut span = AstSpan::new(switch_start_position);
if self
.expect(Token::LeftBrace, ParseErrorKind::SwitchMissingBody)
.report_error(self)
{
return self.alloc_switch_node(selector, cases, default_arm, span);
}
while let Some((token, token_position)) = self.peek_token_and_position() {
match token {
Token::RightBrace => {
self.advance(); // '}'
span.extend_to(token_position);
return self.alloc_switch_node(selector, cases, default_arm, span);
}
Token::Keyword(Keyword::Case) => {
if default_arm.is_some() {
self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault);
}
let case_node = self.parse_switch_case_group(token_position);
cases.push(case_node);
}
Token::Keyword(Keyword::Default) => {
if default_arm.is_some() {
self.report_error_here(ParseErrorKind::SwitchDuplicateDefault);
}
// Duplicate `default` is still parsed so that diagnostics
// in its body can be reported.
self.parse_switch_default_arm(
token_position,
default_arm.get_or_insert_with(|| self.arena.vec()),
);
}
// Items before the first arm declaration are not allowed, but
// are parsed for basic diagnostics and simplicity.
_ => self.parse_switch_preamble_items(token_position),
}
self.ensure_forward_progress(token_position);
}
self.report_error_here(ParseErrorKind::SwitchMissingClosingBrace);
// This can only be `None` in the pathological case of
// an empty token stream
span.extend_to(
self.last_consumed_position()
.unwrap_or(switch_start_position),
);
self.alloc_switch_node(selector, cases, default_arm, span)
}
/// Parses a stacked `case` group and its body:
/// `case <expr>: (case <expr>:)* <arm-body>`.
///
/// Returns the allocated [`crate::ast::CaseRef`] node.
///
/// The returned node span covers the entire group, from
/// `first_case_position` to the end of the arm body, or to the end of the
/// last label if the body is empty.
#[must_use]
fn parse_switch_case_group(
&mut self,
first_case_position: TokenPosition,
) -> crate::ast::SwitchCaseRef<'src, 'arena> {
let mut labels = self.arena.vec();
while let Some((Keyword::Case, case_position)) = self.peek_keyword_and_position() {
self.advance(); // 'case'
labels.push(self.parse_expression());
// `:` is required after each case label; missing `:` is recovered
// at statement sync level.
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
.widen_error_span_from(case_position)
.sync_error_until(self, crate::parser::SyncLevel::Statement)
.report_error(self);
}
let mut body = self.arena.vec();
self.parse_switch_arm_body(&mut body);
let case_span = compute_case_span(first_case_position, &labels, &body);
self.arena
.alloc_node(crate::ast::SwitchCase { labels, body }, case_span)
}
/// Parses a `default:` arm and appends its statements to `statements`.
fn parse_switch_default_arm(
&mut self,
default_position: TokenPosition,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
) {
self.advance(); // 'default'
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
.widen_error_span_from(default_position)
.sync_error_until(self, crate::parser::SyncLevel::Statement)
.report_error(self);
self.parse_switch_arm_body(statements);
}
/// Parses statements of a single switch arm body.
fn parse_switch_arm_body(
&mut self,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
) {
while let Some((token, token_position)) = self.peek_token_and_position() {
match token {
Token::Keyword(Keyword::Case | Keyword::Default) | Token::RightBrace => break,
_ => {
self.parse_next_block_item_into(statements);
self.ensure_forward_progress(token_position);
}
}
}
}
/// Parses items that appear before any `case` or `default` arm declaration.
///
/// Such items are not allowed, but they are parsed to produce diagnostics
/// and maintain forward progress.
///
/// Parsed statements are discarded; only error reporting is preserved.
///
/// Parsing stops at a boundary token or end-of-file.
/// Boundary tokens: `case`, `default`, `}`.
fn parse_switch_preamble_items(&mut self, preamble_start_position: TokenPosition)
where
'src: 'arena,
{
// Discard parsed statements into a sink vector.
// This is a bit "hacky", but I don't want to adapt code to skip
// production of AST nodes just to report errors in
// one problematic case.
let mut sink = self.arena.vec();
self.parse_switch_arm_body(&mut sink);
self.make_error_here(ParseErrorKind::SwitchTopLevelItemNotCase)
.widen_error_span_from(preamble_start_position)
.report_error(self);
}
/// Helper to allocate a `Switch` expression with the given span.
#[must_use]
fn alloc_switch_node(
&self,
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, crate::ast::SwitchCaseRef<'src, 'arena>>,
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
span: AstSpan,
) -> ExpressionRef<'src, 'arena> {
self.arena.alloc_node(
crate::ast::Expression::Switch {
selector,
cases,
default_arm,
},
span,
)
}
}
/// Computes an [`AstSpan`] covering a `case` group.
///
/// The span begins at `labels_start_position` and extends to:
/// - the end of the last statement in `body`, if present; otherwise
/// - the end of the last label in `labels`, if present.
///
/// If both are empty, the span covers only `labels_start_position`.
#[must_use]
fn compute_case_span(
labels_start_position: TokenPosition,
labels: &[ExpressionRef],
body: &[StatementRef],
) -> AstSpan {
let mut span = AstSpan::new(labels_start_position);
if let Some(last_statement) = body.last() {
span.extend_to(last_statement.span().token_to);
} else if let Some(last_label) = labels.last() {
span.extend_to(last_label.span().token_to);
}
span
}

View File

@ -1,99 +0,0 @@
use crate::ast::{AstSpan, Expression};
use crate::lexer::{Token, TokenLocation};
use crate::parser::ParseErrorKind;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parse the continuation of a `return` after its keyword was consumed.
///
/// Doesn't consume the terminating `;`.
/// If the next token is not `;`, parses an expression as the optional
/// value. Produces an [`Expression::Return`] whose span runs from
/// the `return` keyword to the end of the value if present, otherwise to
/// the `return` keyword.
#[must_use]
pub(crate) fn parse_return_cont(
&mut self,
return_start_location: TokenLocation,
) -> crate::ast::ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
let value = self.parse_expression();
let span = AstSpan {
from: return_start_location,
to: value.span().to,
};
(Some(value), span)
} else {
(
None,
AstSpan {
from: return_start_location,
to: return_start_location,
},
)
};
self.arena.alloc(Expression::Return(value), span)
}
/// Parse the continuation of a `break` after its keyword was consumed.
///
/// Doesn't consume the terminating `;`.
/// If the next token is not `;`, parses an optional value expression.
/// Produces an [`Expression::Break`] spanning from `break` to the end
/// of the value if present, otherwise to the `break` keyword.
#[must_use]
pub(crate) fn parse_break_cont(
&mut self,
break_start_location: TokenLocation,
) -> crate::ast::ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
let value = self.parse_expression();
let span = AstSpan {
from: break_start_location,
to: value.span().to,
};
(Some(value), span)
} else {
(
None,
AstSpan {
from: break_start_location,
to: break_start_location,
},
)
};
self.arena.alloc(Expression::Break(value), span)
}
/// Parses a `goto` expression after `goto`, assuming that the `goto` token
/// was consumed.
///
/// Requires the next token to be an identifier label.
/// On missing token, returns [`ParseErrorKind::UnexpectedEndOfFile`].
/// On a non-identifier next token,
/// returns [`ParseErrorKind::GotoMissingLabel`].
/// On success, produces an [`Expression::Goto`] spanning from `goto`
/// to the label token.
#[must_use]
pub(crate) fn parse_goto_cont(
&mut self,
goto_start_location: TokenLocation,
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
let Some((token, text, token_location)) = self.peek_token_lexeme_and_location() else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
if token == Token::Identifier {
let span = AstSpan {
from: goto_start_location,
to: token_location,
};
self.advance();
Ok(self
.arena
.alloc(Expression::Goto(self.arena.string(text)), span))
} else {
Err(self.make_error_here(ParseErrorKind::GotoMissingLabel))
}
}
}

View File

@ -0,0 +1,297 @@
//! Parsing of callable definitions for Fermented `UnrealScript`
//! (functions, events, delegates, operators).
use crate::arena::ArenaVec;
use crate::ast::{
AstSpan, CallableDefinition, CallableDefinitionRef, CallableKind, CallableModifier,
CallableModifierKind, CallableName, IdentifierToken, InfixOperator, InfixOperatorName,
ParameterRef, PostfixOperator, PostfixOperatorName, PrefixOperator, PrefixOperatorName,
TypeSpecifierRef,
};
use crate::lexer::{Keyword, Token, TokenPosition};
use crate::parser::{
ParseError, ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel,
recovery::RecoveryFallback,
};
/// Temporary parsed representation of a callable header without its body.
#[derive(Debug)]
pub(super) struct ParsedCallableHeader<'src, 'arena> {
pub start_position: TokenPosition,
pub modifiers: crate::arena::ArenaVec<'arena, CallableModifier>,
pub kind: CallableKind,
pub return_type_specifier: Option<TypeSpecifierRef<'src, 'arena>>,
pub name: CallableName,
pub parameters: crate::arena::ArenaVec<'arena, ParameterRef<'src, 'arena>>,
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for ParsedCallableHeader<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
let fallback_position = error.covered_span.token_from;
ParsedCallableHeader {
start_position: fallback_position,
modifiers: parser.arena.vec(),
kind: CallableKind::Function,
return_type_specifier: None,
name: CallableName::Identifier(IdentifierToken(fallback_position)),
parameters: parser.arena.vec(),
}
}
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses a callable definition.
///
/// Assumes [`Parser::is_callable_header_ahead`] has already confirmed that
/// a callable declaration begins at the current position. This affects
/// the diagnostics produced for malformed input.
#[must_use]
pub(crate) fn parse_callable_definition(&mut self) -> CallableDefinitionRef<'src, 'arena> {
let header = self.parse_callable_header().unwrap_or_fallback(self);
let body = if self.eat(Token::LeftBrace) {
Some(self.parse_braced_block_statements_tail(self.last_consumed_position_or_start()))
} else {
self.expect(
Token::Semicolon,
ParseErrorKind::CallableMissingBodyOrSemicolon,
)
.report_error(self);
None
};
let span = AstSpan::range(
header.start_position,
self.last_consumed_position_or_start(),
);
self.arena.alloc_node(
CallableDefinition {
name: header.name,
kind: header.kind,
return_type_specifier: header.return_type_specifier,
modifiers: header.modifiers,
parameters: header.parameters,
body,
},
span,
)
}
/// Parses a callable header without the body.
fn parse_callable_header(
&mut self,
) -> ParseResult<'src, 'arena, ParsedCallableHeader<'src, 'arena>> {
let start_position = self.require_position(ParseErrorKind::CallableExpectedHeader)?;
let mut modifiers = self.arena.vec();
self.collect_callable_modifiers(&mut modifiers);
let kind = self.parse_callable_kind()?;
self.collect_callable_modifiers(&mut modifiers);
// `(` cannot appear inside a return type in this grammar,
// so seeing it here means the callable has no return type specifier.
let return_type_specifier = match self.peek_token_at(1) {
Some(Token::LeftParenthesis) => None,
_ => Some(self.parse_type_specifier()?),
};
let name = self.parse_callable_name(kind)?;
self.expect(
Token::LeftParenthesis,
ParseErrorKind::CallableParamsMissingOpeningParenthesis,
)
.report_error(self);
let parameters = self.parse_parameter_list();
self.expect(
Token::RightParenthesis,
ParseErrorKind::CallableParamsMissingClosingParenthesis,
)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
Ok(ParsedCallableHeader {
start_position,
modifiers,
kind,
return_type_specifier,
name,
parameters,
})
}
fn parse_callable_kind(&mut self) -> ParseResult<'src, 'arena, CallableKind> {
if let Some(keyword) = self.peek_keyword() {
// Handle this separately because only infix operators can carry
// an optional precedence and cannot, therefore, be handled by
// a simple converter.
if keyword == Keyword::Operator {
self.advance();
let precedence = self.parse_optional_parenthesized_integer(
ParseErrorKind::CallableOperatorInvalidPrecedence,
);
return Ok(CallableKind::InfixOperator(precedence));
}
if let Ok(kind) = CallableKind::try_from(keyword) {
self.advance();
return Ok(kind);
}
}
Err(self.make_error_here(ParseErrorKind::CallableExpectedKind))
}
fn parse_callable_name(
&mut self,
kind: CallableKind,
) -> ParseResult<'src, 'arena, CallableName> {
match kind {
CallableKind::Function | CallableKind::Event | CallableKind::Delegate => self
.parse_identifier(ParseErrorKind::CallableNameNotIdentifier)
.map(CallableName::Identifier),
CallableKind::PrefixOperator => {
let (token, operator_position) = self.require_token_and_position(
ParseErrorKind::CallablePrefixOperatorInvalidSymbol,
)?;
let operator = PrefixOperator::try_from(token).map_err(|()| {
self.make_error_here(ParseErrorKind::CallablePrefixOperatorInvalidSymbol)
})?;
self.advance();
Ok(CallableName::PrefixOperator(PrefixOperatorName {
kind: operator,
position: operator_position,
}))
}
CallableKind::InfixOperator(_) => {
let (token, operator_position) = self.require_token_and_position(
ParseErrorKind::CallableInfixOperatorInvalidSymbol,
)?;
let operator = InfixOperator::try_from(token).map_err(|()| {
self.make_error_here(ParseErrorKind::CallableInfixOperatorInvalidSymbol)
})?;
self.advance();
Ok(CallableName::InfixOperator(InfixOperatorName {
kind: operator,
position: operator_position,
}))
}
CallableKind::PostfixOperator => {
let (token, operator_position) = self.require_token_and_position(
ParseErrorKind::CallablePostfixOperatorInvalidSymbol,
)?;
let operator = PostfixOperator::try_from(token).map_err(|()| {
self.make_error_here(ParseErrorKind::CallablePostfixOperatorInvalidSymbol)
})?;
self.advance();
Ok(CallableName::PostfixOperator(PostfixOperatorName {
kind: operator,
position: operator_position,
}))
}
}
}
/// Parses an uninterrupted sequence of function modifiers into
/// given vector.
pub(crate) fn collect_callable_modifiers(
&mut self,
modifiers: &mut ArenaVec<'arena, CallableModifier>,
) {
while let Some(next_mod) = self.parse_function_modifier() {
modifiers.push(next_mod);
}
}
fn parse_function_modifier(&mut self) -> Option<CallableModifier> {
let (keyword, start) = self.peek_keyword_and_position()?;
let kind = match keyword {
Keyword::Native => {
self.advance();
let native_id = self.parse_optional_parenthesized_integer(
ParseErrorKind::NativeModifierIdNotIntegerLiteral,
);
CallableModifierKind::Native(native_id)
}
Keyword::Config => {
self.advance();
let ident = self
.parse_required_parenthesized_identifier(
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
)
.unwrap_or(IdentifierToken(start));
CallableModifierKind::Config(ident)
}
_ => {
let simple = CallableModifierKind::try_from(keyword).ok()?;
// Only advance after confirming it is the modifier
self.advance();
simple
}
};
let span = AstSpan::range(start, self.last_consumed_position_or_start());
Some(CallableModifier { kind, span })
}
fn parse_optional_parenthesized_integer(&mut self, close_err: ParseErrorKind) -> Option<u128> {
if !self.eat(Token::LeftParenthesis) {
return None;
}
let value = match self.peek_token_and_lexeme() {
Some((Token::IntegerLiteral, lex)) => {
self.advance();
self.decode_integer_literal(lex).ok_or_report(self)
}
Some(_) => {
self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral);
self.advance();
None
}
None => {
self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral);
None
}
};
self.expect(Token::RightParenthesis, close_err)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
value
}
fn parse_required_parenthesized_identifier(
&mut self,
close_err: ParseErrorKind,
ident_err: ParseErrorKind,
) -> Option<IdentifierToken> {
if !self.eat(Token::LeftParenthesis) {
self.report_error_here(ident_err);
return None;
}
let ident = match self.peek_token_lexeme_and_position() {
Some((tok, _, pos)) if tok.is_valid_identifier_name() => {
self.advance();
Some(IdentifierToken(pos))
}
Some(_) => {
self.report_error_here(ident_err);
self.advance();
None
}
None => {
self.report_error_here(ident_err);
None
}
};
self.expect(Token::RightParenthesis, close_err)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.report_error(self);
ident
}
}

View File

@ -0,0 +1,50 @@
//! Lookahead for callable headers in Fermented `UnrealScript`.
use crate::lexer::{Keyword, Token};
use crate::parser::Parser;
impl Parser<'_, '_> {
/// Returns whether the upcoming tokens have the syntactic shape of
/// a callable header.
///
/// Returns `true` when the following tokens consist of zero or more
/// callable modifiers followed by a keyword that defines a callable kind.
///
/// Does not check whether any parenthesized arguments are valid.
#[must_use]
pub(crate) fn is_callable_header_ahead(&mut self) -> bool {
let mut lookahead_offset = 0;
while let Some(keyword) = self.peek_keyword_at(lookahead_offset) {
if keyword.is_callable_kind_keyword() {
return true;
}
if let Some(token_width) = self.callable_modifier_width_at(keyword, lookahead_offset) {
lookahead_offset += token_width;
} else {
break;
}
}
false
}
fn callable_modifier_width_at(
&mut self,
keyword: Keyword,
lookahead_token_offset: usize,
) -> Option<usize> {
if !keyword.is_callable_modifier() {
return None;
}
if matches!(keyword, Keyword::Native | Keyword::Config)
&& self.peek_token_at(lookahead_token_offset + 1) == Some(Token::LeftParenthesis)
&& self.peek_token_at(lookahead_token_offset + 3) == Some(Token::RightParenthesis)
{
// `native(...)` and `config(...)` consume a parenthesized specifier
// in modifier position, so lookahead must skip the whole modifier.
Some(4)
} else {
Some(1)
}
}
}

View File

@ -0,0 +1,3 @@
mod definition;
mod lookahead;
mod params;

View File

@ -0,0 +1,107 @@
use crate::arena::ArenaVec;
use crate::ast::{AstSpan, Parameter, ParameterModifier, ParameterModifierKind, ParameterRef};
use crate::lexer::{Keyword, Token};
use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
// allowed to switch to result returning
pub(crate) fn parse_parameter_list(&mut self) -> ArenaVec<'arena, ParameterRef<'src, 'arena>> {
let mut params = self.arena.vec();
if matches!(self.peek_token(), Some(Token::RightParenthesis)) {
return params;
}
loop {
let start_pos = self.last_consumed_position_or_start();
let mut modifiers = self.arena.vec();
while let Some((next_keyword, next_position)) = self.peek_keyword_and_position() {
match next_keyword {
Keyword::Optional => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Optional,
position: next_position,
});
self.advance();
}
Keyword::Out => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Out,
position: next_position,
});
self.advance();
}
Keyword::Coerce => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Coerce,
position: next_position,
});
self.advance();
}
Keyword::Skip => {
modifiers.push(ParameterModifier {
kind: ParameterModifierKind::Skip,
position: next_position,
});
self.advance();
}
_ => break,
}
}
let type_spec = match self.parse_type_specifier() {
Ok(t) => t,
Err(e) => {
self.report_error(e);
self.recover_until(SyncLevel::ListSeparator);
if self.eat(Token::Comma) {
continue;
}
break;
}
};
let name = self
.parse_identifier(ParseErrorKind::ParamMissingIdentifier)
.unwrap_or_fallback(self);
let array_len = match self.parse_array_len_expr() {
Ok(v) => v,
Err(e) => {
self.report_error(e);
self.recover_until(SyncLevel::CloseBracket);
let _ = self.eat(Token::RightBracket);
None
}
};
let default_value = if self.eat(Token::Assign) {
Some(self.parse_expression())
} else {
None
};
let span = AstSpan::range(start_pos, self.last_consumed_position_or_start());
params.push(self.arena.alloc_node(
Parameter {
modifiers,
type_specifier: type_spec,
name,
array_size: array_len,
default_value,
},
span,
));
if !self.eat(Token::Comma) || matches!(self.peek_token(), Some(Token::RightParenthesis))
{
break;
}
self.ensure_forward_progress(start_pos);
}
params
}
}

View File

@ -1,7 +1,15 @@
mod block; //! ## Naming conventions
mod control; //!
mod flow; //! Some naming conventions that might not be obvious:
mod pratt; //!
mod precedence; //! - `*_tail` means the opening token or keyword has already been consumed.
mod statements; //! Tail parsers build the rest of the construct and usually return a total,
mod switch; //! recovered result.
//! - `*_into` means the method extends an already parsed value or appends into
//! an existing output container.
mod class;
mod declarations;
mod expression;
mod function;
mod statement;

View File

@ -1,406 +0,0 @@
//! Expression parsing for the language front-end.
//!
//! This module implements a Pratt-style parser for the language's expression
//! grammar, supporting:
//!
//! * Primary expressions (literals, identifiers, parenthesized expressions)
//! * Prefix operators
//! * Postfix operators
//! * Infix operators with precedence and associativity
//!
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
//! operators bind. Infix parsing uses the pair of binding powers returned by
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
//! The parser infrastructure supports both left- and right-associative
//! operators, but Fermented UnrealScript currently defines only
//! right-associative ones.
//!
//! ## See also
//!
//! - [`crate::parser::Parser::parse_expression`] - main entry point
//! - [`PrecedenceRank`] - operator binding strengths
//! - [`super::precedence`] - operator precedence definitions
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
use crate::lexer::{Token, TokenLocation};
use crate::parser::{
ParseErrorKind, ParseExpressionResult, ParseResult, ResultRecoveryExt, SyncLevel,
};
pub(crate) use super::precedence::PrecedenceRank;
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses an expression.
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
self.parse_expression_with_precedence(PrecedenceRank::LOOSEST)
}
/// Parses an expression with operators of at least `min_precedence_rank`
/// (as tight or tighter).
fn parse_expression_with_precedence(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> ExpressionRef<'src, 'arena> {
// Intentional order: (1) prefix/primary, (2) postfix (tighter than
// any infix), (3) infix. We don't run a second postfix pass;
// `(a+b)!` works because the parenthesized sub-expression had its own
// postfix pass before returning.
let mut left_hand_side = self
.parse_prefix_or_primary()
.sync_error_until(self, SyncLevel::Expression)
.unwrap_or_fallback(self);
// Postfix operators are tighter than any infix ones
left_hand_side = self.parse_postfix_into(left_hand_side);
left_hand_side = self.parse_infix_into(left_hand_side, min_precedence_rank);
left_hand_side
}
/// Parses a prefix or primary expression (Pratt parser's "nud" or
/// null denotation).
///
/// Errors with [`ParseErrorKind::UnexpectedEndOfFile`] if the stream ends
/// before a valid prefix/primary.
fn parse_prefix_or_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
let Some((token, token_location)) = self.peek_token_and_location() else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
if let Ok(operator) = crate::ast::PrefixOperator::try_from(token) {
self.advance();
let right_hand_side = self.parse_expression_with_precedence(PrecedenceRank::TIGHTEST);
Ok(Expression::new_prefix(
self.arena,
token_location,
operator,
right_hand_side,
))
} else {
self.parse_primary()
}
}
/// Parses a primary expression: literals, identifiers, or a parenthesized
/// sub-expression.
///
/// # Errors
///
/// [`ParseErrorKind::ExpressionUnexpectedToken`] if the next token
/// cannot start a primary; [`ParseErrorKind::UnexpectedEndOfFile`]
/// at end of input.
fn parse_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
// For diagnostics, we only advance *after* fully parsing the current
// literal/token.
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
match token {
Token::IntegerLiteral => {
let value = self.parse_integer_literal(token_text)?;
self.advance();
Ok(self
.arena
.alloc_at(Expression::Integer(value), token_location))
}
Token::FloatLiteral => {
let value = self.parse_float_literal(token_text)?;
self.advance();
Ok(self
.arena
.alloc_at(Expression::Float(value), token_location))
}
Token::StringLiteral => {
let value = unescape_string_literal(self.arena, token_text);
self.advance();
Ok(self
.arena
.alloc_at(Expression::String(value), token_location))
}
Token::True => {
self.advance();
Ok(self.arena.alloc_at(Expression::Bool(true), token_location))
}
Token::False => {
self.advance();
Ok(self.arena.alloc_at(Expression::Bool(false), token_location))
}
Token::None => {
self.advance();
Ok(self.arena.alloc_at(Expression::None, token_location))
}
Token::Identifier => {
self.advance();
Ok(self
.arena
.alloc_at(Expression::Identifier(token_text), token_location))
}
Token::LeftParenthesis => {
self.advance();
self.parse_parenthesized_expression_cont(token_location)
}
Token::If => {
self.advance();
Ok(self.parse_if_cont(token_location))
}
Token::While => {
self.advance();
Ok(self.parse_while_cont(token_location))
}
Token::Do => {
self.advance();
self.parse_do_until_cont(token_location)
}
Token::ForEach => {
self.advance();
Ok(self.parse_foreach_cont(token_location))
}
Token::For => {
self.advance();
self.parse_for_cont(token_location)
}
Token::Brace(crate::lexer::BraceKind::Normal) => {
self.advance();
Ok(self.parse_block_cont(token_location))
}
Token::Return => {
self.advance();
Ok(self.parse_return_cont(token_location))
}
Token::Break => {
self.advance();
Ok(self.parse_break_cont(token_location))
}
Token::Continue => {
self.advance();
Ok(self.arena.alloc_at(Expression::Continue, token_location))
}
Token::Goto => {
self.advance();
self.parse_goto_cont(token_location)
}
Token::Switch => {
self.advance();
self.parse_switch_cont(token_location)
}
_ => {
// Unexpected token in expression.
Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken))
}
}
}
/// Parses an expression in parentheses.
///
/// Assumes the `(` was already consumed; its location is
/// `left_parenthesis_location`.
/// On success, allocates a [`Expression::Parentheses`] node with a span
/// covering from `(` to `)`.
///
/// Errors with [`ParseErrorKind::ExpressionMissingClosingParenthesis`] if
/// a closing `)` is missing; the diagnostic is associated with
/// the opening `(` via `left_parenthesis_location`.
fn parse_parenthesized_expression_cont(
&mut self,
left_parenthesis_location: TokenLocation,
) -> ParseExpressionResult<'src, 'arena> {
let inner_expression = self.parse_expression();
let right_parenthesis_location = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ExpressionMissingClosingParenthesis,
)
.widen_error_span_from(left_parenthesis_location)
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
Ok(self.arena.alloc_between(
Expression::Parentheses(inner_expression),
left_parenthesis_location,
right_parenthesis_location,
))
}
/// Parses all postfix operators it can, creating a tree with
/// `left_hand_side` as a child.
fn parse_postfix_into(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
) -> ExpressionRef<'src, 'arena> {
// Single peek that yields `(postfix_op, location)` so the postfix loop
// can advance once per operator without extra matching/unwraps.
while let Some((operator, operator_location)) = self.peek_postfix_with_location() {
self.advance();
left_hand_side =
Expression::new_postfix(self.arena, left_hand_side, operator, operator_location);
}
left_hand_side
}
/// Parses infix operators binding at least as tight as
/// `min_precedence_rank`.
///
/// Associativity is encoded by
/// [`super::precedence::infix_precedence_ranks`]: the right-hand
/// side is parsed with `right_precedence_rank`, so `a - b - c` vs
/// `a ^ b ^ c` associate correctly based on the pair
/// `(left_rank, right_rank)`.
///
/// Stops when the next operator is looser than `min_precedence_rank`.
fn parse_infix_into(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
min_precedence_rank: PrecedenceRank,
) -> ExpressionRef<'src, 'arena> {
while let Some((operator, right_precedence_rank)) =
self.peek_infix_at_least(min_precedence_rank)
{
self.advance();
let right_hand_side = self.parse_expression_with_precedence(right_precedence_rank);
left_hand_side =
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
}
left_hand_side
}
/// Parses an integer literal as [`i128`].
///
/// Chosen to cover FerUS's integer range so constant folding
/// remains precise.
///
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
/// not a valid integer.
fn parse_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
text.parse::<i128>()
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
/// Parses a float literal as [`f64`].
///
/// Chosen to cover FerUS's float range so constant folding remains
/// precise.
///
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
/// not a valid float.
fn parse_float_literal(&mut self, text: &str) -> ParseResult<f64> {
if let Ok(parsed_value) = text.parse::<f64>() {
Ok(parsed_value)
} else {
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
}
/// Returns the next postfix operator and its location if present.
///
/// Helper to avoid peeking and mapping twice; used to drive the postfix
/// loop without unwraps.
fn peek_postfix_with_location(
&mut self,
) -> Option<(crate::ast::PostfixOperator, TokenLocation)> {
let Some((token, token_location)) = self.peek_token_and_location() else {
return None;
};
let Ok(operator) = crate::ast::PostfixOperator::try_from(token) else {
return None;
};
Some((operator, token_location))
}
/// If the next token is an infix operator with left binding power at least
/// `min_precedence_rank`, returns its operator and precedence rank.
///
/// Otherwise return [`None`].
fn peek_infix_at_least(
&mut self,
min_precedence_rank: PrecedenceRank,
) -> Option<(crate::ast::InfixOperator, PrecedenceRank)> {
let (left_precedence_rank, operator, right_precedence_rank) = self
.peek_token()
.and_then(super::precedence::infix_precedence_ranks)?;
if left_precedence_rank.is_looser_than(min_precedence_rank) {
return None;
}
Some((operator, right_precedence_rank))
}
/// Parses one item inside a `{ ... }` block.
///
/// The item can be a statement (e.g. a variable declaration) or an
/// expression. If the item is an expression without a following
/// semicolon, it is returned as the block's current tail expression
/// - the value considered to be the block's result. In well-formed
/// code such a tail expression appears only at the very end of the block.
///
/// This method never consumes the closing `}` and is only meant to be
/// called while parsing inside a block.
pub(crate) fn parse_block_item(
&mut self,
statements: &mut crate::arena::ArenaVec<'arena, crate::ast::StatementRef<'src, 'arena>>,
) -> Option<crate::ast::ExpressionRef<'src, 'arena>> {
if let Some(mut next_statement) = self.parse_statement() {
if next_statement.needs_semicolon() {
// For statements we immediately know if lack of
// semicolon is an issue
if let Some(Token::Semicolon) = self.peek_token() {
next_statement.span_mut().to = self.peek_location();
self.advance(); // ';'
} else {
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterStatement);
}
}
statements.push(next_statement);
} else {
let mut next_expression = self.parse_expression();
if let Expression::Error = *next_expression {
self.recover_until(SyncLevel::Statement);
next_expression.span_mut().to = self.peek_location();
}
if let Some((Token::Semicolon, semicolon_location)) = self.peek_token_and_location() {
self.advance(); // ;
let span = crate::ast::AstSpan {
from: next_expression.span().from,
to: semicolon_location,
};
let expression_statement_node = self
.arena
.alloc(crate::ast::Statement::Expression(next_expression), span);
statements.push(expression_statement_node);
} else {
return Some(next_expression);
}
}
None
}
}
/// Unescapes a tokenized string literal into an arena string.
///
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
///
/// Note: this function assumes `raw` is the token text without surrounding
/// quotes.
fn unescape_string_literal<'arena>(
arena: &'arena crate::arena::Arena,
raw: &str,
) -> crate::arena::ArenaString<'arena> {
let mut buffer = String::with_capacity(raw.len());
let mut characters = raw.chars();
while let Some(next_character) = characters.next() {
if next_character == '\\' {
// The lexer never produces a trailing backslash in a string token,
// so there's always a following character to inspect.
if let Some(escaped_character) = characters.next() {
match escaped_character {
'n' => buffer.push('\n'),
't' => buffer.push('\t'),
'"' => buffer.push('"'),
'\\' => buffer.push('\\'),
// Simply leaving escaped character as-is is an expected
// behavior by UnrealScript
other => buffer.push(other),
}
}
} else {
buffer.push(next_character);
}
}
arena.string(&buffer)
}

View File

@ -1,185 +0,0 @@
//! Precedence tables for Fermented UnrealScript operators.
//!
//! These values don't follow the usual *binding power* convention for
//! a Pratt parser, where tighter binding corresponds to a larger number.
//! Here, the smaller the number, the tighter the binding power.
//! For this reason, we use the term *precedence rank* instead.
//!
//! ## Operators sorted by precedence (lowest number = tighter binding)
//!
//! ### Infix operators
//!
//! All infix operators in UnrealScript are
//! [left-associative](https://wiki.beyondunreal.com/Operators).
//!
//! 12: `**`
//! 16: `*`, `/`, `Cross`, `Dot`
//! 18: `%`
//! 20: `+`, `-`
//! 22: `<<`, `>>`, `>>>`
//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom`
//! 26: `!=`
//! 28: `&`, `^`, `|`
//! 30: `&&`, `^^`
//! 32: `||`
//! 34: `*=`, `/=`, `+=`, `-=`
//! 40: `$`, `*`, `@`
//! 44: `$=`, `*=`, `@=`
//! 45: `-=`
//!
//! Some operator, such as `*`, appear twice with different precedence
//! ranks because they were defined with different values for different types
//! in separate script source files (as in the Killing Floor sources).
//! However, UnrealScript uses only the first definition it encounters in
//! `Object.uc`, which corresponds to the lower value.
//!
//! ### Prefix operators
//!
//! `!`, `~`, `-`, `++`, `--`.
//!
//! ### Postfix operators
//!
//! `++`, `--`.
use crate::ast::{InfixOperator, PostfixOperator, PrefixOperator};
use crate::lexer::Token;
/// Compact precedence rank used by the Pratt Parser.
///
/// A smaller number means tighter binding, and a larger number means looser
/// binding. This inverted scale matches how UnrealScript tables were recorded.
#[must_use]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct PrecedenceRank(u8);
impl PrecedenceRank {
/// The loosest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by the maximum [`u8`] value.
pub const LOOSEST: Self = PrecedenceRank(u8::MAX);
/// The tightest possible precedence rank.
///
/// In this inverted scale (smaller number = tighter binding),
/// this is represented by zero.
pub const TIGHTEST: PrecedenceRank = PrecedenceRank(0);
/// Returns `true` if `other` has a looser binding than `self`.
///
/// # Examples
///
/// ```
/// # use crate::parser::expressions::PrecedenceRank;
/// let a = PrecedenceRank(40);
/// let b = PrecedenceRank(34);
/// assert!(a.is_looser_than(b)); // 40 is looser than 34
///
/// let c = PrecedenceRank(20);
/// let d = PrecedenceRank(24);
/// assert!(!c.is_looser_than(d)); // 20 is tighter than 24
/// ```
pub fn is_looser_than(self, other: Self) -> bool {
self.0 > other.0
}
}
impl TryFrom<Token> for PrefixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PrefixOperator::*;
Ok(match token {
Token::Not => Not,
Token::Minus => Minus,
Token::BitwiseNot => BitwiseNot,
Token::Increment => Increment,
Token::Decrement => Decrement,
_ => return Err(()),
})
}
}
impl TryFrom<Token> for PostfixOperator {
type Error = ();
fn try_from(token: Token) -> Result<Self, Self::Error> {
use PostfixOperator::*;
Ok(match token {
Token::Increment => Increment,
Token::Decrement => Decrement,
_ => return Err(()),
})
}
}
/// Maps a token to its infix operator along with its left and right binding
/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`.
///
/// Returns [`None`] if and only if `token` is not an infix operator.
pub(crate) fn infix_precedence_ranks(
token: Token,
) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> {
use crate::ast::InfixOperator::*;
let (left_precedence_rank, operator) = match token {
// 12: `**`
Token::Exponentiation => (12, Exponentiation),
// 16: `*`, `/`, `Cross`, `Dot` (left-assoc)
Token::Multiply => (16, Multiply),
Token::Divide => (16, Divide),
Token::Cross => (16, Cross),
Token::Dot => (16, Dot),
// 18: `%`
Token::Modulo => (18, Modulo),
// 20: `+`, `-`
Token::Plus => (20, Plus),
Token::Minus => (20, Minus),
// 22: `<<`, `>>`, `>>>`
Token::LeftShift => (22, LeftShift),
Token::RightShift => (22, RightShift),
Token::LogicalRightShift => (22, LogicalRightShift),
// 24: comparison operators
Token::Less => (24, Less),
Token::LessEqual => (24, LessEqual),
Token::Greater => (24, Greater),
Token::GreaterEqual => (24, GreaterEqual),
Token::Equal => (24, Equal),
Token::ApproximatelyEqual => (24, ApproximatelyEqual),
Token::ClockwiseFrom => (24, ClockwiseFrom),
// 26: `!=`
Token::NotEqual => (26, NotEqual),
// 28: bit-wise `&`, `^`, `|`
Token::BitwiseAnd => (28, BitwiseAnd),
Token::BitwiseXor => (28, BitwiseXor),
Token::BitwiseOr => (28, BitwiseOr),
// 30: logical `&&`, `^^`
Token::And => (30, And),
Token::Xor => (30, Xor),
// 32: logical `||`
Token::Or => (32, Or),
// 34: `*=`, `/=`, `+=`, `-=`
Token::MultiplyAssign => (34, MultiplyAssign),
Token::DivideAssign => (34, DivideAssign),
Token::PlusAssign => (34, PlusAssign),
Token::MinusAssign => (34, MinusAssign),
// Simple '=' treated with same precedence
Token::Assign => (34, Assign),
Token::ModuloAssign => (34, ModuloAssign),
// 40: `$`, `@`
Token::Concat => (40, Concat),
Token::ConcatSpace => (40, ConcatSpace),
// 44: `$=`, `@=`
Token::ConcatAssign => (44, ConcatAssign),
Token::ConcatSpaceAssign => (44, ConcatSpaceAssign),
_ => return None,
};
// All operators are left-associative, so `right_precedence_rank` is set to
// `left_binding_rank - 1` (with our "smaller is tighter" scale, this
// enforces left associativity in Pratt parsing).
//
// Since all precedences are even, subtracting one won't actually cross
// any boundary between operator groups.
Some((
PrecedenceRank(left_precedence_rank),
operator,
PrecedenceRank(left_precedence_rank - 1),
))
}

View File

@ -0,0 +1,85 @@
//! Statement parsing for the language front-end.
//!
//! Implements a simple recursive-descent parser for
//! *Fermented `UnrealScript` statements*.
use crate::ast::{AstSpan, Statement, StatementRef};
use crate::lexer::{Keyword, Token};
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a single statement.
///
/// Does not consume a trailing `;` except for [`Statement::Empty`].
/// The caller handles semicolons. Returns [`Some`] if a statement is
/// recognized; otherwise [`None`].
#[must_use]
pub(crate) fn parse_statement(&mut self) -> Option<StatementRef<'src, 'arena>> {
let Some((token, lexeme, position)) = self.peek_token_lexeme_and_position() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
return None;
};
match token {
// Empty statement
Token::Semicolon => {
self.advance(); // `;`
Some(
self.arena
.alloc_node(Statement::Empty, AstSpan::new(position)),
)
}
// UnrealScript `local` declaration
Token::Keyword(Keyword::Local) => {
self.advance(); // `local`
let start = position;
let type_spec = self.parse_type_specifier().unwrap_or_fallback(self);
let declarators = self.parse_variable_declarators();
// TODO: parse
let span = AstSpan::range(start, self.last_consumed_position_or_start());
Some(self.arena.alloc_node(
Statement::LocalVariableDeclaration {
type_spec,
declarators,
},
span,
))
}
// Label: Ident ':' (also tolerate Begin:/End:)
Token::Identifier | Token::Keyword(Keyword::Begin | Keyword::End)
if matches!(self.peek_token_at(1), Some(Token::Colon)) =>
{
self.advance(); // ident/begin/end
self.advance(); // :
Some(self.arena.alloc_node(
Statement::Label(self.arena.string(lexeme)),
AstSpan::range(position, self.last_consumed_position_or_start()),
))
}
// Nested function/event/operator inside blocks
t if t == Token::Keyword(Keyword::Function)
|| t == Token::Keyword(Keyword::Event)
|| t.is_valid_function_modifier() =>
{
let f = self.parse_callable_definition();
let span = *f.span();
Some(self.arena.alloc_node(Statement::Function(f), span))
}
// C-like variable declaration starting with a TypeSpec
/*token if self.looks_like_variable_declaration_start(token) => Some(
self.parse_variable_declaration_start()
.sync_error_until(self, SyncLevel::Statement)
.unwrap_or_fallback(self),
),*/
// Not a statement
_ => None,
}
}
}

View File

@ -1,185 +0,0 @@
//! Statement parsing for the language front-end.
//!
//! Implements a simple recursive-descent parser for
//! *Fermented UnrealScript statements*.
use crate::ast::{AstSpan, Statement, StatementRef};
use crate::lexer::Token;
use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a single statement.
///
/// Does not consume a trailing `;` except for [`Statement::Empty`].
/// The caller handles semicolons. Returns [`Some`] if a statement is
/// recognized; otherwise [`None`].
#[must_use]
pub(crate) fn parse_statement(&mut self) -> Option<StatementRef<'src, 'arena>> {
let Some((token, lexeme, location)) = self.peek_token_lexeme_and_location() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
return None;
};
match token {
// Empty statement
Token::Semicolon => {
self.advance(); // `;`
Some(self.arena.alloc(Statement::Empty, AstSpan::new(location)))
}
// UnrealScript's standard `local` variable declaration
Token::Local => {
self.advance(); // `local`
Some(
self.parse_local_variable_declaration_cont()
.widen_error_span_from(location)
.sync_error_until(self, SyncLevel::Statement)
.unwrap_or_fallback(self),
)
}
// Label definition
Token::Identifier if matches!(self.peek_token_at(1), Some(Token::Colon)) => {
self.advance(); // `Token::Identifier`
self.advance(); // `:`
Some(self.arena.alloc(
Statement::Label(self.arena.string(lexeme)),
AstSpan::range(location, self.last_visited_location()),
))
}
// C-like variable declaration
token
if token.is_valid_type_name_token()
&& Some(Token::Identifier) == self.peek_token_at(1) =>
{
self.advance(); // `TYPE_NAME`
// Next token is guaranteed to exist by the arm condition
Some(self.parse_variable_declaration_cont(lexeme))
}
// Not a statement
_ => None,
}
}
/// Parses a local variable declaration after `local` has been consumed.
///
/// Requires the next token to be a type name. Initializers are not allowed.
/// Reports and recovers from errors; the identifier list may be empty if
/// recovery fails.
fn parse_local_variable_declaration_cont(
&mut self,
) -> crate::parser::ParseResult<'src, 'arena, StatementRef<'src, 'arena>> {
let Some((type_token, type_name)) = self.peek_token_and_lexeme() else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
if !type_token.is_valid_type_name_token() {
return Err(self.make_error_here(ParseErrorKind::LocalInvalidTypeName));
}
let declaration_start_location = self.last_visited_location();
self.advance(); // `TYPE_NAME`
let type_name = self.arena.string(type_name);
let identifiers = self.parse_local_identifier_list();
if identifiers.is_empty() {
self.make_error_here(ParseErrorKind::LocalMissingIdentifier)
.widen_error_span_from(declaration_start_location)
.report_error(self);
}
Ok(self.arena.alloc(
Statement::LocalVariableDeclaration {
type_name,
identifiers,
},
AstSpan::range(declaration_start_location, self.last_visited_location()),
))
}
/// Parses a comma-separated list of identifiers for a local declaration.
///
/// Best-effort recovery from errors. Returns an empty list if no valid
/// identifiers are found.
fn parse_local_identifier_list(
&mut self,
) -> crate::arena::ArenaVec<'arena, crate::arena::ArenaString<'arena>> {
let mut identifiers = self.arena.vec();
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
if token == Token::Identifier {
identifiers.push(self.arena.string(next_variable_name));
self.advance(); // `Token::Identifier`
} else {
self.report_error_here(ParseErrorKind::LocalBadVariableIdentifier);
// Try to recover to the next variable name
self.recover_until(SyncLevel::ListSeparator);
}
// Disallow initializers in `local`.
if let Some(Token::Assign) = self.peek_token() {
self.report_error_here(ParseErrorKind::LocalInitializerNotAllowed);
self.recover_until(SyncLevel::ListSeparator);
}
// Can the list continue?
// Loop cannot stall: each iteration consumes a token or breaks
if !self.eat(Token::Comma) {
break;
}
}
// End-of-file branch
identifiers
}
/// Parses a non-local variable declaration after the type name token
/// has been consumed.
///
/// The caller must guarantee that at least one declarator follows.
/// Optional initializers are allowed.
fn parse_variable_declaration_cont(
&mut self,
type_name: &'src str,
) -> StatementRef<'src, 'arena> {
let declaration_start_location = self.last_visited_location();
let type_name = self.arena.string(type_name);
let declarations = self.parse_variable_declaration_list();
// An identifier required by method's condition
debug_assert!(!declarations.is_empty());
self.arena.alloc(
Statement::VariableDeclaration {
type_name,
declarations,
},
AstSpan::range(declaration_start_location, self.last_visited_location()),
)
}
/// Parses a comma-separated list of declarators with optional `=`
/// initializers.
///
/// Best-effort recovery on errors.
/// The caller should invoke this when the next token starts a declarator.
fn parse_variable_declaration_list(
&mut self,
) -> crate::arena::ArenaVec<'arena, crate::ast::VariableDeclarator<'src, 'arena>> {
let mut variables = self.arena.vec();
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
if token == Token::Identifier {
self.advance(); // `Token::Identifier`
let name = self.arena.string(next_variable_name);
let initializer = if self.eat(Token::Assign) {
Some(self.parse_expression())
} else {
None
};
variables.push(crate::ast::VariableDeclarator { name, initializer });
} else {
self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier);
// Try to recover to the next variable name
self.recover_until(SyncLevel::ListSeparator);
}
// Can the list continue?
// Loop cannot stall: each iteration consumes a token or breaks
if !self.eat(Token::Comma) {
break;
}
}
// End-of-file branch
variables
}
}

View File

@ -1,227 +0,0 @@
use crate::arena::ArenaVec;
use crate::ast::{AstSpan, ExpressionRef, StatementRef};
use crate::lexer::{Token, TokenLocation};
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a `switch` expression after the `switch` keyword was consumed.
///
/// Arm bodies accept statements and expressions. A last, expression without
/// `;` in the last arm becomes the switch's tail value if none was
/// captured yet.
/// Only one `default` case arm is allowed.
/// Returns a best-effort switch node on premature EOF.
#[must_use]
pub(crate) fn parse_switch_cont(
&mut self,
switch_start_location: TokenLocation,
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
let selector = self.parse_expression();
self.expect(
Token::Brace(crate::lexer::BraceKind::Normal),
ParseErrorKind::SwitchMissingBody,
)
.report_error(self);
let (mut cases, mut default_arm, mut tail) = (self.arena.vec(), None, None);
let mut span = AstSpan::new(switch_start_location);
loop {
let Some((token, token_location)) = self.peek_token_and_location() else {
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
span.extend_to(self.peek_location());
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
};
match token {
Token::RightBrace => {
self.advance(); // '}'
span.extend_to(token_location);
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
}
Token::Case => {
if default_arm.is_some() {
self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault);
}
let case_node = self.parse_switch_case_group(token_location, &mut tail);
cases.push(case_node);
}
Token::Default => {
if default_arm.is_some() {
self.report_error_here(ParseErrorKind::SwitchDuplicateDefault);
}
// We still parse a duplicate default to surface all errors.
// Bodies are effectively fused for error reporting;
// compilation stops anyway, so this trades AST correctness
// for diagnostics.
self.parse_switch_default_arm(
token_location,
default_arm.get_or_insert_with(|| self.arena.vec()),
&mut tail,
);
}
// This can only be triggered before parsing any `case` or
// `default` arms, since they stop either at the start of
// another arm declaration (e.g. at `case`/`default`) or
// at the `}` that ends switch body.
_ => self.parse_switch_preamble_items(&mut tail),
}
// Ensure forward progress under errors to avoid infinite loops.
if self.peek_location() <= token_location {
self.advance();
}
}
}
/// Parses a stacked `case` group and its body:
/// `case <expr>: (case <expr>:)* <arm-body-until-boundary>`.
///
/// Returns the allocated [`crate::ast::CaseRef`] node.
#[must_use]
fn parse_switch_case_group(
&mut self,
first_case_location: TokenLocation,
tail: &mut Option<ExpressionRef<'src, 'arena>>,
) -> crate::ast::CaseRef<'src, 'arena> {
let mut labels = self.arena.vec();
while let Some((Token::Case, case_location)) = self.peek_token_and_location() {
// Guaranteed progress: we entered on `Token::Case`.
self.advance(); // 'case'
labels.push(self.parse_expression());
// Enforce `:` after each case with statement-level recovery.
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
.widen_error_span_from(case_location)
.sync_error_until(self, crate::parser::SyncLevel::Statement)
.report_error(self);
}
let mut body = self.arena.vec();
self.parse_switch_arm_body(&mut body, tail);
let case_span = compute_case_span(first_case_location, &labels, &body);
self.arena
.alloc(crate::ast::SwitchCase { labels, body }, case_span)
}
/// Parses the `default :` arm and its body.
///
/// Does not consume a boundary token after the body.
fn parse_switch_default_arm(
&mut self,
default_location: TokenLocation,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
tail: &mut Option<ExpressionRef<'src, 'arena>>,
) {
self.advance(); // 'default'
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
.widen_error_span_from(default_location)
.sync_error_until(self, crate::parser::SyncLevel::Statement)
.report_error(self);
self.parse_switch_arm_body(statements, tail);
}
/// Parses items of a single switch arm body until a boundary token or EOF.
///
/// Boundary tokens: `case`, `default`, `}`.
fn parse_switch_arm_body(
&mut self,
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
tail: &mut Option<ExpressionRef<'src, 'arena>>,
) {
// No need to report end-of-file as it'll be done by
// `parse_switch_cont`.
while let Some((token, token_location)) = self.peek_token_and_location() {
match token {
// Complain about tail instruction if `switch` body
// doesn't end here
Token::Case | Token::Default => {
if let Some(tail_expression) = tail.take() {
self.report_error_here(ParseErrorKind::SwitchBareExpressionBeforeNextArm);
let span = *tail_expression.span();
let stmt = self
.arena
.alloc(crate::ast::Statement::Expression(tail_expression), span);
statements.push(stmt);
}
break;
}
Token::RightBrace => break,
_ => (),
}
// We know that at this point:
// 1. There is still a token and it is not EOF;
// 2. It isn't end of the block.
// So having a tail statement there is a problem!
if let Some(tail_expression) = tail.take() {
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
let tail_span = *tail_expression.span();
let node = self.arena.alloc(
crate::ast::Statement::Expression(tail_expression),
tail_span,
);
statements.push(node);
}
*tail = self.parse_block_item(statements);
// Ensure forward progress under errors to avoid infinite loops.
if self.peek_location() <= token_location {
self.advance();
}
}
}
/// Parses items that were found in code *before* any arm (`case`/`default`)
/// declaration.
///
/// These aren't allowed, but we still want to perform a proper parsing step
/// to report whatever errors we can in case programmer simply forgot to put
/// an arm declaration.
///
/// Boundary tokens: `case`, `default`, `}`.
fn parse_switch_preamble_items(&mut self, tail: &mut Option<ExpressionRef<'src, 'arena>>) {
// Report the spurious token.
self.report_error_here(ParseErrorKind::SwitchTopLevelItemNotCase);
// Discard parsed statements into a sink vector.
// This is a bit "hacky", but I don't want to adapt code to skip
// production of AST nodes just to report errors in
// one problematic case.
let mut sink = self.arena.vec();
self.parse_switch_arm_body(&mut sink, tail);
}
/// Helper to allocate a `Switch` expression with the given span.
#[must_use]
fn alloc_switch_node(
&mut self,
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, crate::ast::CaseRef<'src, 'arena>>,
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
tail: Option<ExpressionRef<'src, 'arena>>,
span: AstSpan,
) -> ExpressionRef<'src, 'arena> {
self.arena.alloc(
crate::ast::Expression::Switch {
selector,
cases,
default_arm,
tail,
},
span,
)
}
}
/// Computes [`AstSpan`] covering all labels and the body.
#[must_use]
fn compute_case_span(
labels_start_location: TokenLocation,
labels: &[ExpressionRef],
body: &[StatementRef],
) -> AstSpan {
let mut span = AstSpan {
from: labels_start_location,
to: labels_start_location,
};
if let Some(last_statement) = body.last() {
span.extend_to(last_statement.span().to);
} else if let Some(last_label) = labels.last() {
span.extend_to(last_label.span().to);
}
span
}

View File

@ -1,8 +1,8 @@
//! Parser for Fermented UnrealScript (FerUS). //! Parser for Fermented `UnrealScript` (`FerUS`).
//! //!
//! Consumes tokens from [`crate::lexer::TokenizedFile`] and allocates AST //! Consumes tokens from [`crate::lexer::TokenizedFile`] and allocates AST
//! nodes in [`crate::arena::Arena`]. Basic expressions use a Pratt parser; //! nodes in [`crate::arena::Arena`]. Basic expressions use a Pratt parser;
//! the rest rely on recursive descent in [`crate::parser::grammar`]. //! the rest rely on recursive descent in [`crate::parser::grammar`].\
//! Non-fatal errors accumulate in `Parser::diagnostics` as //! Non-fatal errors accumulate in `Parser::diagnostics` as
//! [`crate::diagnostics::Diagnostic`]; recovery skips to sync points defined by //! [`crate::diagnostics::Diagnostic`]; recovery skips to sync points defined by
//! [`crate::parser::recovery::SyncLevel`] and synthesizes error nodes while //! [`crate::parser::recovery::SyncLevel`] and synthesizes error nodes while
@ -27,17 +27,14 @@
use super::lexer; use super::lexer;
pub use lexer::{TokenPiece, Tokens}; pub use lexer::{TokenData, Tokens};
mod cursor; mod cursor;
mod errors; mod errors;
mod grammar; mod grammar;
pub mod pretty;
mod recovery; mod recovery;
mod trivia; mod trivia;
pub use pretty::{ExprTree, StmtTree};
pub use errors::ParseError; pub use errors::ParseError;
pub(crate) use errors::{ParseErrorKind, ParseResult}; pub(crate) use errors::{ParseErrorKind, ParseResult};
pub(crate) use recovery::{ResultRecoveryExt, SyncLevel}; pub(crate) use recovery::{ResultRecoveryExt, SyncLevel};
@ -50,8 +47,8 @@ pub type ParseExpressionResult<'src, 'arena> =
pub struct Parser<'src, 'arena> { pub struct Parser<'src, 'arena> {
arena: &'arena crate::arena::Arena, arena: &'arena crate::arena::Arena,
pub diagnostics: Vec<crate::diagnostics::Diagnostic>, pub diagnostics: Vec<crate::diagnostics::Diagnostic>,
cursor: cursor::CursorComponent<'src>, cursor: cursor::Cursor<'src, 'src>,
trivia: trivia::TriviaComponent<'src>, trivia: trivia::TriviaIndexBuilder<'src>,
} }
impl<'src, 'arena> Parser<'src, 'arena> { impl<'src, 'arena> Parser<'src, 'arena> {
@ -59,8 +56,8 @@ impl<'src, 'arena> Parser<'src, 'arena> {
Self { Self {
arena, arena,
diagnostics: Vec::new(), diagnostics: Vec::new(),
cursor: cursor::CursorComponent::new(file), cursor: cursor::Cursor::new(file),
trivia: trivia::TriviaComponent::default(), trivia: trivia::TriviaIndexBuilder::default(),
} }
} }
} }

View File

@ -1,353 +0,0 @@
use crate::ast::{Expression, Statement, SwitchCase, VariableDeclarator};
use core::fmt;
/// A borrow of either a statement or an expression node,
/// plus helpers to enrich the printed tree.
enum AnyNode<'src, 'a, 'b> {
Stmt(&'b Statement<'src, 'a>),
Expr(&'b Expression<'src, 'a>),
Case(&'b SwitchCase<'src, 'a>),
/// A leaf line with a preformatted label (e.g., variable names).
Text(String),
/// Wraps a child with a tag like "cond", "body", "else", "init".
Tagged(&'static str, Box<AnyNode<'src, 'a, 'b>>),
}
/// Public wrappers to print trees starting from either kind of node.
pub struct StmtTree<'src, 'a, 'b>(pub &'b Statement<'src, 'a>);
pub struct ExprTree<'src, 'a, 'b>(pub &'b Expression<'src, 'a>);
impl<'src, 'a, 'b> fmt::Display for StmtTree<'src, 'a, 'b> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt_node(AnyNode::Stmt(self.0), f, "", true)
}
}
impl<'src, 'a, 'b> fmt::Display for ExprTree<'src, 'a, 'b> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt_node(AnyNode::Expr(self.0), f, "", true)
}
}
fn fmt_node<'src, 'a, 'b>(
node: AnyNode<'src, 'a, 'b>,
f: &mut fmt::Formatter<'_>,
prefix: &str,
is_last: bool,
) -> fmt::Result {
write!(f, "{}{}─ ", prefix, if is_last { "" } else { "" })?;
writeln!(f, "{}", label(&node))?;
let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "" });
let kids = children(node);
let len = kids.len();
for (i, child) in kids.into_iter().enumerate() {
let last = i + 1 == len;
fmt_node(child, f, &new_prefix, last)?;
}
Ok(())
}
/// ----- Labeling -----
fn label<'src, 'a, 'b>(node: &AnyNode<'src, 'a, 'b>) -> String {
match node {
AnyNode::Expr(e) => expr_label(e),
AnyNode::Stmt(s) => stmt_label(s),
AnyNode::Case(c) => case_label(c),
AnyNode::Text(s) => s.clone(),
AnyNode::Tagged(tag, inner) => format!("{tag}: {}", label(inner)),
}
}
fn quote_str(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for ch in s.chars() {
match ch {
'\\' => out.push_str("\\\\"),
'"' => out.push_str("\\\""),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c => out.push(c),
}
}
out.push('"');
out
}
fn expr_label<'src, 'a>(e: &Expression<'src, 'a>) -> String {
match e {
Expression::Binary(_, op, _) => format!("Binary {op}"),
Expression::LeftUnary(op, _) => format!("UnaryL {op}"),
Expression::RightUnary(_, op) => format!("UnaryR {op}"),
Expression::Identifier(s) => format!("Ident {s}"),
Expression::String(s) => {
// Avoid assuming ArenaString exposes &str; go via Display -> String.
format!("String {}", quote_str(&s.to_string()))
}
Expression::Integer(i) => format!("Int {i}"),
Expression::Float(x) => format!("Float {x}"),
Expression::Bool(true) => "Bool true".into(),
Expression::Bool(false) => "Bool false".into(),
Expression::None => "None".into(),
Expression::Parentheses(_) => "Parentheses".into(),
Expression::Block { statements, tail } => {
let n = statements.len() + usize::from(tail.is_some());
let tail_s = if tail.is_some() { " tail" } else { "" };
format!("BlockExpr ({n} items{tail_s})")
}
Expression::If { .. } => "IfExpr".into(),
Expression::While { .. } => "WhileExpr".into(),
Expression::DoUntil { .. } => "DoUntilExpr".into(),
Expression::ForEach { .. } => "ForEachExpr".into(),
Expression::For { .. } => "ForExpr".into(),
Expression::Switch {
cases,
default_arm: default,
..
} => {
let d = if default.is_some() { " yes" } else { " no" };
format!("SwitchExpr cases={} default:{}", cases.len(), d)
}
Expression::Goto(label) => format!("Goto {}", label.to_string()),
Expression::Continue => "Continue".into(),
Expression::Break(Some(_)) => "Break value".into(),
Expression::Break(None) => "Break".into(),
Expression::Return(Some(_)) => "Return value".into(),
Expression::Return(None) => "Return".into(),
Expression::Error => "Error".into(),
}
}
/// ----- Children collection -----
fn children<'src, 'a, 'b>(node: AnyNode<'src, 'a, 'b>) -> Vec<AnyNode<'src, 'a, 'b>> {
match node {
AnyNode::Expr(e) => expr_children(e),
AnyNode::Stmt(s) => stmt_children(s),
AnyNode::Case(c) => case_children(c),
AnyNode::Text(_) => vec![],
AnyNode::Tagged(_, inner) => children(*inner),
}
}
/// Expression children can include statements inside Block/Switch.
fn expr_children<'src, 'a, 'b>(e: &'b Expression<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
match e {
// Purely expression subtrees
Expression::Binary(lhs, _, rhs) => vec![AnyNode::Expr(&*lhs), AnyNode::Expr(&*rhs)],
Expression::LeftUnary(_, expr) => vec![AnyNode::Expr(&*expr)],
Expression::RightUnary(expr, _) => vec![AnyNode::Expr(&*expr)],
Expression::Parentheses(expr) => vec![AnyNode::Expr(&*expr)],
// Structured expression forms
Expression::Block { statements, tail } => {
let mut out: Vec<AnyNode<'src, 'a, 'b>> = statements
.iter()
.map(|s| AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*s))))
.collect();
if let Some(t) = tail.as_ref() {
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
}
out
}
Expression::If {
condition,
body,
else_body,
} => {
let mut out = vec![
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
];
if let Some(e) = else_body {
out.push(AnyNode::Tagged("else", Box::new(AnyNode::Expr(&*e))));
}
out
}
Expression::While { condition, body } => vec![
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
],
Expression::DoUntil { condition, body } => vec![
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
AnyNode::Tagged("until", Box::new(AnyNode::Expr(&*condition))),
],
Expression::ForEach { iterator, body } => vec![
AnyNode::Tagged("iter", Box::new(AnyNode::Expr(&*iterator))),
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
],
Expression::For {
init,
condition,
step,
body,
} => {
let mut out = Vec::with_capacity(4);
if let Some(i) = init {
out.push(AnyNode::Tagged("init", Box::new(AnyNode::Expr(&*i))));
}
if let Some(c) = condition {
out.push(AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*c))));
}
if let Some(s) = step {
out.push(AnyNode::Tagged("step", Box::new(AnyNode::Expr(&*s))));
}
out.push(AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))));
out
}
Expression::Switch {
selector,
cases,
default_arm: default,
tail,
} => {
let mut out: Vec<AnyNode<'src, 'a, 'b>> = vec![AnyNode::Tagged(
"selector",
Box::new(AnyNode::Expr(&*selector)),
)];
for case in cases.iter() {
out.push(AnyNode::Tagged("case", Box::new(AnyNode::Case(&*case))));
}
if let Some(d) = default.as_ref() {
for stmt in d.iter() {
out.push(AnyNode::Tagged("default", Box::new(AnyNode::Stmt(&*stmt))));
}
}
if let Some(t) = tail.as_ref() {
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
}
out
}
// Leaves
Expression::Identifier(_)
| Expression::String(_)
| Expression::Integer(_)
| Expression::Float(_)
| Expression::Bool(_)
| Expression::None
| Expression::Goto(_)
| Expression::Continue
| Expression::Break(None)
| Expression::Return(None)
| Expression::Error => vec![],
// Single optional-child leaves
Expression::Break(Some(v)) => vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))],
Expression::Return(Some(v)) => {
vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))]
}
}
}
fn stmt_label<'src, 'a>(s: &Statement<'src, 'a>) -> String {
use Statement::*;
match s {
Empty => "Empty ;".into(),
Expression(_) => "Expression".into(),
LocalVariableDeclaration {
type_name,
identifiers: variable_names,
} => {
let count = variable_names.len();
let names = variable_names
.iter()
.map(|n| n.to_string())
.collect::<Vec<_>>()
.join(", ");
format!("LocalVarDecl type={type_name} count={count} names=[{names}]")
}
VariableDeclaration {
type_name,
declarations: variable_names,
} => {
let total = variable_names.len();
let inits = variable_names
.iter()
.filter(|v| v.initializer.is_some())
.count();
let names = variable_names
.iter()
.map(|VariableDeclarator { name, .. }| name.to_string())
.collect::<Vec<_>>()
.join(", ");
format!("VarDecl type={type_name} vars={total} inits={inits} names=[{names}]")
}
Label(name) => format!("Label {name}"),
Error => "Error".into(),
}
}
fn stmt_children<'src, 'a, 'b>(s: &'b Statement<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
use Statement::*;
match s {
Empty | Label(_) | Error => vec![],
Expression(expr) => vec![AnyNode::Expr(&*expr)],
LocalVariableDeclaration {
identifiers: variable_names,
..
} => variable_names
.iter()
.map(|n| AnyNode::Text(format!("name: {n}")))
.collect(),
VariableDeclaration {
declarations: variable_names,
..
} => {
let mut out = Vec::new();
for VariableDeclarator {
name,
initializer: initial_value,
} in variable_names.iter()
{
out.push(AnyNode::Text(format!("var: {name}")));
if let Some(init_expr) = initial_value {
out.push(AnyNode::Tagged(
"init",
Box::new(AnyNode::Expr(&*init_expr)),
));
}
}
out
}
}
}
fn case_children<'src, 'a, 'b>(c: &'b SwitchCase<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
let mut out = Vec::new();
for lbl in c.labels.iter() {
out.push(AnyNode::Tagged("label", Box::new(AnyNode::Expr(&*lbl))));
}
for stmt in c.body.iter() {
out.push(AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*stmt))));
}
out
}
fn case_label<'src, 'a>(c: &SwitchCase<'src, 'a>) -> String {
let l = c.labels.len();
let b = c.body.len();
format!("Case labels={l} body_items={b}")
}

View File

@ -4,89 +4,190 @@
//! token is found. The sync target is chosen from [`SyncLevel`] based on //! token is found. The sync target is chosen from [`SyncLevel`] based on
//! the error kind. Methods on [`ParseResult`] let callers widen the error span, //! the error kind. Methods on [`ParseResult`] let callers widen the error span,
//! synchronize, report, and produce fallback values. //! synchronize, report, and produce fallback values.
//!
//! General idea is that any method that returns something other than an error
//! can be assumed to have reported it.
use crate::lexer::{Token, TokenLocation}; use crate::ast::{AstSpan, CallableKind, IdentifierToken, QualifiedIdentifier};
use crate::diagnostics::Diagnostic;
use crate::lexer::{Token, TokenPosition};
use crate::parser::{ParseError, ParseResult, Parser}; use crate::parser::{ParseError, ParseResult, Parser};
/// Synchronization groups the parser can stop at during recovery. /// Synchronization groups the parser can stop at during recovery.
/// ///
/// Stronger levels subsume weaker ones. The enum's variant order defines this /// The variant order defines recovery strength: later variants are treated as
/// ordering of strength via [`Ord`]; changing it changes recovery behavior. /// "stronger" boundaries, so synchronizing to a weaker level will also stop
/// at any stronger one.
///
/// This enum is intentionally coarse-grained and semantic. It is not meant to
/// encode arbitrary token sets.
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)]
pub(crate) enum SyncLevel { pub enum SyncLevel {
/// Tokens that appear inside expressions. /// Tokens that can reasonably continue or restart an expression.
/// ///
/// Includes operators, member access `.`, ternary `? :`, an opening `(`, /// This is the loosest recovery level.
/// and identifiers.
Expression, Expression,
/// List separator `,`.
/// Separator between homogeneous list elements, e.g. `,`.
///
/// Synchronizing here also stops at closing delimiters and stronger
/// structural boundaries.
ListSeparator, ListSeparator,
/// Close of a parenthesized subexpression `)`.
/// Closing `>` of an angle-bracket-delimited type/class argument list.
CloseAngleBracket,
/// Closing `)` of a parenthesized/grouped construct.
CloseParenthesis, CloseParenthesis,
/// Close of an index or list `]`.
/// Closing `]` of an index or bracket-delimited construct.
CloseBracket, CloseBracket,
/// Statement boundary or starter.
/// A statement boundary or statement starter.
///
/// Includes `;` and keywords that begin standalone statements /
/// statement-like control-flow forms.
Statement, Statement,
/// Block boundary braces (both `{` and `}`).
/// Start of a `switch` arm.
///
/// This is useful because `case` / `default` are stronger boundaries than
/// ordinary statements inside switch parsing.
SwitchArmStart,
/// Start of a declaration-like item.
///
/// Used for recovery in declaration-containing bodies where the next
/// sensible point is "the next member/declaration" rather than merely
/// "some statement".
DeclarationStart,
/// A hard block boundary.
///
/// This is the strongest normal recovery point.
BlockBoundary, BlockBoundary,
/// Start of a top-level or class-level declaration.
TopDeclaration,
} }
impl SyncLevel { impl SyncLevel {
/// Converts [`Token`] to its [`SyncLevel`], if it has one. /// Converts a token to its synchronization class, if any.
fn for_token(token: Token) -> Option<SyncLevel> { const fn for_token(token: Token) -> Option<Self> {
use SyncLevel::*; use crate::lexer::Keyword;
use Token::*; use SyncLevel::{
BlockBoundary, CloseAngleBracket, CloseBracket, CloseParenthesis, DeclarationStart,
Expression, ListSeparator, Statement, SwitchArmStart,
};
match token { match token {
Exponentiation | Increment | Decrement | Not | BitwiseNot | Dot | Cross | Multiply // Expression-level recovery points
| Divide | Modulo | Plus | Minus | ConcatSpace | Concat | LeftShift Token::Exponentiation
| LogicalRightShift | RightShift | Less | LessEqual | Greater | GreaterEqual | Token::Increment
| Equal | NotEqual | ApproximatelyEqual | ClockwiseFrom | BitwiseAnd | BitwiseOr | Token::Decrement
| BitwiseXor | And | Xor | Or | Assign | MultiplyAssign | DivideAssign | Token::Not
| ModuloAssign | PlusAssign | MinusAssign | ConcatAssign | ConcatSpaceAssign | Token::BitwiseNot
| Period | Question | Colon | LeftParenthesis | Identifier => Some(Expression), | Token::Multiply
| Token::Divide
Comma => Some(ListSeparator), | Token::Modulo
| Token::Plus
RightParenthesis => Some(CloseParenthesis), | Token::Minus
RightBracket => Some(CloseBracket), | Token::ConcatSpace
| Token::Concat
Case | Default | If | Else | Switch | For | ForEach | While | Do | Return | Break | Token::LeftShift
| Continue | Local | Semicolon => Some(Statement), | Token::LogicalRightShift
| Token::RightShift
Brace(_) | RightBrace => Some(BlockBoundary), | Token::LessEqual
| Token::GreaterEqual
Class | Struct | Enum | State | Function | Event | Delegate | Operator | Var | Token::Equal
| Replication | NativeReplication | DefaultProperties | CppText | ExecDirective => { | Token::NotEqual
Some(TopDeclaration) | Token::ApproximatelyEqual
| Token::BitwiseAnd
| Token::BitwiseOr
| Token::BitwiseXor
| Token::LogicalAnd
| Token::LogicalXor
| Token::LogicalOr
| Token::Assign
| Token::MultiplyAssign
| Token::DivideAssign
| Token::ModuloAssign
| Token::PlusAssign
| Token::MinusAssign
| Token::ConcatAssign
| Token::ConcatSpaceAssign
| Token::Period
| Token::Question
| Token::Colon
| Token::LeftParenthesis
| Token::Identifier
| Token::Keyword(Keyword::Dot | Keyword::Cross | Keyword::ClockwiseFrom) => {
Some(Expression)
} }
_ => Option::None, // List / delimiter boundaries
Token::Comma => Some(ListSeparator),
Token::Greater => Some(CloseAngleBracket),
Token::RightParenthesis => Some(CloseParenthesis),
Token::RightBracket => Some(CloseBracket),
// Statement-level boundaries
Token::Semicolon
| Token::Keyword(
Keyword::If
| Keyword::Else
| Keyword::Switch
| Keyword::For
| Keyword::ForEach
| Keyword::While
| Keyword::Do
| Keyword::Until
| Keyword::Return
| Keyword::Break
| Keyword::Continue
| Keyword::Local,
) => Some(Statement),
// Switch-specific stronger boundary
Token::Keyword(Keyword::Case | Keyword::Default) => Some(SwitchArmStart),
// Declaration/member starts
Token::Keyword(
Keyword::Class
| Keyword::Struct
| Keyword::Enum
| Keyword::State
| Keyword::Function
| Keyword::Event
| Keyword::Delegate
| Keyword::Operator
| Keyword::Var
| Keyword::Replication
| Keyword::NativeReplication
| Keyword::DefaultProperties
| Keyword::CppText
| Keyword::CppStruct,
)
| Token::ExecDirective => Some(DeclarationStart),
// Hard structural stop
Token::LeftBrace | Token::CppBlock | Token::RightBrace => Some(BlockBoundary),
_ => None,
} }
} }
} }
impl<'src, 'arena> Parser<'src, 'arena> { impl Parser<'_, '_> {
/// Converts a parse error into a diagnostic and queues it. /// Converts a parse error into a diagnostic and queues it.
/// ///
/// Placeholder implementation. /// Placeholder implementation.
fn handle_error(&mut self, error: ParseError) { pub fn report_error(&mut self, error: ParseError) {
let diagnostic = crate::diagnostics::DiagnosticBuilder::error(format!( self.diagnostics.push(Diagnostic::from(error));
"error {:?} while parsing",
error.kind
))
.primary_label(error.source_span, "happened here")
.build();
self.diagnostics.push(diagnostic);
} }
/// Reports a parser error with [`crate::parser::ParseErrorKind`] at /// Reports a parser error with [`crate::parser::ParseErrorKind`] at
/// the current location and queues an appropriate diagnostic. /// the current location and queues an appropriate diagnostic.
pub fn report_error_here(&mut self, error_kind: crate::parser::ParseErrorKind) { pub fn report_error_here(&mut self, error_kind: crate::parser::ParseErrorKind) {
let new_error = self.make_error_here(error_kind); let new_error = self.make_error_here(error_kind);
self.handle_error(new_error); self.report_error(new_error);
} }
/// Skips tokens until a token with `min_sync` level or stronger is found. /// Skips tokens until a token with `min_sync` level or stronger is found.
@ -108,18 +209,32 @@ impl<'src, 'arena> Parser<'src, 'arena> {
/// Supplies a fallback value after a parse error so parsing can continue and /// Supplies a fallback value after a parse error so parsing can continue and
/// reveal further errors. /// reveal further errors.
pub(crate) trait RecoveryFallback<'src, 'arena>: Sized { pub trait RecoveryFallback<'src, 'arena>: Sized {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self; fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self;
} }
/// Extends [`ParseResult`] with recovery-related methods for /// Extends [`ParseResult`] with recovery-related methods for
/// fluent error handling. /// fluent error handling.
pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized { /// !!!! Can we store a parser reference instead of passing it into every method?
pub trait ResultRecoveryExt<'src, 'arena, T>: Sized {
/// Extends the left end of the error span to `from`. /// Extends the left end of the error span to `from`.
/// ///
/// Does nothing if `Self` is `Ok(...)`. /// Does nothing if `Self` is `Ok(...)`.
#[must_use] #[must_use]
fn widen_error_span_from(self, from: TokenLocation) -> Self; fn widen_error_span_from(self, from: TokenPosition) -> Self;
fn blame(self, blame_span: AstSpan) -> Self;
fn related(self, related_span: AstSpan) -> Self;
fn blame_token(self, blame_position: TokenPosition) -> Self {
self.blame(AstSpan::new(blame_position))
}
fn extend_blame_to_covered_end(self) -> Self;
fn related_token(self, related_position: TokenPosition) -> Self {
self.related(AstSpan::new(related_position))
}
/// Extends the right end of the error span up to but not including /// Extends the right end of the error span up to but not including
/// the next token of the given sync `level`. /// the next token of the given sync `level`.
@ -137,28 +252,44 @@ pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized {
/// Either returns expected value or its best effort fallback. /// Either returns expected value or its best effort fallback.
#[must_use] #[must_use]
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T; fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
where
T: RecoveryFallback<'src, 'arena>;
/// Produces the contained value if successful, /// Produces the contained value if successful,
/// or a fallback if an error occurred. /// or a fallback if an error occurred.
fn report_error(self, parser: &mut Parser<'src, 'arena>); fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool;
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<T>;
} }
impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T> impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T> {
where fn widen_error_span_from(mut self, from: TokenPosition) -> Self {
T: RecoveryFallback<'src, 'arena>,
{
fn widen_error_span_from(mut self, from: TokenLocation) -> Self {
if let Err(ref mut error) = self { if let Err(ref mut error) = self {
error.source_span.from = std::cmp::min(error.source_span.from, from); error.covered_span.token_from = std::cmp::min(error.covered_span.token_from, from);
} }
self self
} }
fn blame(self, blame_span: AstSpan) -> Self {
self.map_err(|error| error.blame(blame_span))
}
fn extend_blame_to_covered_end(self) -> Self {
self.map_err(|error| error.extend_blame_to_covered_end())
}
fn related(self, related_span: AstSpan) -> Self {
self.map_err(|error| error.related(related_span))
}
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
if let Err(ref mut error) = self { if let Err(ref mut error) = self {
parser.recover_until(level); parser.recover_until(level);
error.source_span.to = parser.last_visited_location(); error.covered_span.token_to = std::cmp::max(
error.covered_span.token_to,
parser.last_consumed_position_or_start(),
);
} }
self self
} }
@ -166,60 +297,213 @@ where
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
if let Err(ref mut error) = self { if let Err(ref mut error) = self {
parser.recover_until(level); parser.recover_until(level);
error.source_span.to = parser.peek_location();
// If we're at end-of-file, this'll simply do nothing. // If we're at end-of-file, this'll simply do nothing.
parser.advance(); if parser
.peek_token()
.and_then(SyncLevel::for_token)
.is_some_and(|next_level| next_level == level)
{
parser.advance();
}
error.covered_span.token_to = parser.last_consumed_position_or_start(); // need to be peek
} }
self self
} }
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T { fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
where
T: RecoveryFallback<'src, 'arena>,
{
self.unwrap_or_else(|error| { self.unwrap_or_else(|error| {
let value = T::fallback_value(parser, &error); let value = T::fallback_value(parser, &error);
parser.handle_error(error); parser.report_error(error);
value value
}) })
} }
fn report_error(self, parser: &mut Parser<'src, 'arena>) { fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool {
if let Err(error) = self { if let Err(error) = self {
parser.handle_error(error); parser.report_error(error);
true
} else {
false
}
}
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<T> {
match self {
Ok(value) => Some(value),
Err(error) => {
parser.report_error(error);
None
}
} }
} }
} }
impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError { impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
fn widen_error_span_from(mut self, from: TokenLocation) -> Self { fn widen_error_span_from(mut self, from: TokenPosition) -> Self {
self.source_span.from = std::cmp::min(self.source_span.from, from); self.covered_span.token_from = std::cmp::min(self.covered_span.token_from, from);
self
}
fn blame(mut self, blame_span: AstSpan) -> Self {
self.blame_span = blame_span;
self
}
fn extend_blame_to_covered_end(mut self) -> Self {
self.blame_span.token_to = self.covered_span.token_to;
self
}
fn related(mut self, related_span: AstSpan) -> Self {
self.related_span = Some(related_span);
self self
} }
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
parser.recover_until(level); parser.recover_until(level);
self.source_span.to = parser.last_visited_location(); self.covered_span.token_to = parser.last_consumed_position_or_start();
self self
} }
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
parser.recover_until(level); parser.recover_until(level);
self.source_span.to = parser.peek_location();
// If we're at end-of-file, this'll simply do nothing. // If we're at end-of-file, this'll simply do nothing.
parser.advance(); parser.advance();
self.covered_span.token_to = parser.last_consumed_position_or_start();
self self
} }
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> () { fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) {
parser.handle_error(self); parser.report_error(self);
} }
fn report_error(self, parser: &mut Parser<'src, 'arena>) { fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool {
parser.handle_error(self); parser.report_error(self);
true
}
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<()> {
parser.report_error(self);
None
} }
} }
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation { impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 {
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
Self::default()
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 {
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
Self::default()
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::IdentifierToken {
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
error.source_span.to Self(error.covered_span.token_from)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena>
for crate::ast::CallableDefinitionRef<'src, 'arena>
{
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
// default return type: Named("") at error span
let ret_id = crate::ast::IdentifierToken(err.covered_span.token_from);
let return_type = crate::arena::ArenaNode::new_in(
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, ret_id)),
err.covered_span,
parser.arena,
);
let def = crate::ast::CallableDefinition {
name: crate::ast::CallableName::Identifier(IdentifierToken(
err.covered_span.token_from,
)),
kind: CallableKind::Function,
return_type_specifier: Some(return_type),
modifiers: parser.arena.vec(),
parameters: parser.arena.vec(),
body: None,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StructDefRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::StructDefinition {
name: None,
base_type_name: None,
modifiers: parser.arena.vec(),
fields: parser.arena.vec(),
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassVarDeclRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let dummy_ident = crate::ast::IdentifierToken(err.covered_span.token_from);
let type_spec = crate::arena::ArenaNode::new_in(
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(
parser.arena,
dummy_ident,
)),
err.covered_span,
parser.arena,
);
let def = crate::ast::ClassVarDecl {
paren_specs: None,
modifiers: parser.arena.vec(),
type_spec,
declarators: parser.arena.vec(),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena>
for crate::ast::ReplicationBlockRef<'src, 'arena>
{
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::ReplicationBlock {
rules: parser.arena.vec(),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StateDeclRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::StateDecl {
name: crate::ast::IdentifierToken(err.covered_span.token_from),
parent: None,
modifiers: parser.arena.vec(),
ignores: None,
body: parser.arena.vec(),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenPosition {
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
error.covered_span.token_to
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for (Token, TokenPosition) {
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
(Token::Error, error.covered_span.token_to)
} }
} }
@ -227,7 +511,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExpressionRef<
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
crate::arena::ArenaNode::new_in( crate::arena::ArenaNode::new_in(
crate::ast::Expression::Error, crate::ast::Expression::Error,
error.source_span, error.covered_span,
parser.arena, parser.arena,
) )
} }
@ -237,17 +521,51 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StatementRef<'
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
crate::arena::ArenaNode::new_in( crate::arena::ArenaNode::new_in(
crate::ast::Statement::Error, crate::ast::Statement::Error,
error.source_span, error.covered_span,
parser.arena, parser.arena,
) )
} }
} }
impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option<T> impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option<T> {
where fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
T: RecoveryFallback<'src, 'arena>, None
{ }
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { }
Some(T::fallback_value(parser, error))
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassConstDeclRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let name = crate::ast::IdentifierToken(err.covered_span.token_from);
let value = crate::ast::DeclarationLiteralRef {
literal: crate::ast::DeclarationLiteral::None,
position: err.covered_span.token_from,
};
let def = crate::ast::ClassConstDecl {
name,
value,
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::TypeSpecifierRef<'src, 'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let dummy = crate::ast::IdentifierToken(err.covered_span.token_from);
crate::arena::ArenaNode::new_in(
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, dummy)),
err.covered_span,
parser.arena,
)
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExecDirectiveRef<'arena> {
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
let def = crate::ast::ExecDirective {
text: parser.arena.string(""),
span: err.covered_span,
};
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
} }
} }

View File

@ -1,6 +1,6 @@
//! This module provides trivia token collection mechanism that lets parser code //! Records trivia separately from significant tokens so parser code can work
//! iterate over significant tokens while ignoring trivia and preserving //! with significant tokens without losing comments, whitespace, or line
//! full information for linting, formatting, and documentation. //! structure.
//! //!
//! Tokens considered *trivia* are: //! Tokens considered *trivia* are:
//! //!
@ -10,13 +10,27 @@
//! 4. [`crate::lexer::Token::Whitespace`]. //! 4. [`crate::lexer::Token::Whitespace`].
//! //!
//! Every other token is considered *significant*. //! Every other token is considered *significant*.
//!
//! ## Required usage
//!
//! This is an internal helper. Callers must follow the protocol below.
//!
//! [`TriviaIndexBuilder`] must be driven over a single token stream in
//! strictly increasing [`TokenPosition`] order.
//! Call [`TriviaIndexBuilder::record_trivia`] for each trivia token in source
//! order, and call [`TriviaIndexBuilder::record_significant_token`] for each
//! significant token.
//!
//! After the last significant token has been processed, call
//! [`TriviaIndexBuilder::into_index`] to attach any trailing trivia.
//!
//! Violating this protocol is a logic error.
use crate::lexer::TokenLocation; use crate::lexer::TokenPosition;
/// Types of trivia tokens, corresponding directly to the matching variants of /// Kinds of trivia tokens corresponding to variants of [`crate::lexer::Token`].
/// [`crate::lexer::Token`].
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub(crate) enum TriviaKind { pub enum TriviaKind {
Whitespace, Whitespace,
Newline, Newline,
LineComment, LineComment,
@ -29,269 +43,215 @@ impl std::convert::TryFrom<crate::lexer::Token> for TriviaKind {
fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> { fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> {
use crate::lexer::Token; use crate::lexer::Token;
match token { match token {
Token::Whitespace => Ok(TriviaKind::Whitespace), Token::Whitespace => Ok(Self::Whitespace),
Token::Newline => Ok(TriviaKind::Newline), Token::Newline => Ok(Self::Newline),
Token::LineComment => Ok(TriviaKind::LineComment), Token::LineComment => Ok(Self::LineComment),
Token::BlockComment => Ok(TriviaKind::BlockComment), Token::BlockComment => Ok(Self::BlockComment),
_ => Err(()), _ => Err(()),
} }
} }
} }
/// Complete description of a trivia token. /// A recorded trivia token.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub(crate) struct TriviaToken<'src> { pub struct TriviaToken<'src> {
/// Specific type of the trivia. /// Kind of trivia token.
pub kind: TriviaKind, pub kind: TriviaKind,
/// Actual content of the token. /// Source text of the token.
pub text: &'src str, pub text: &'src str,
/// Location of this trivia token in the token stream. /// Location of this trivia token in the token stream.
pub location: TokenLocation, pub position: TokenPosition,
} }
type TriviaRange = std::ops::Range<usize>; type TriviaRangeMap = std::collections::HashMap<BoundaryLocation, std::ops::Range<usize>>;
type TriviaMap = std::collections::HashMap<TriviaLocation, TriviaRange>;
/// Immutable index over all recorded trivia. /// Extends [`TokenPosition`] with start-of-file and end-of-file markers.
/// ///
/// Enables O(1) access to trivia immediately before/after any significant /// Regular [`TokenPosition`] values are enough for significant tokens, but
/// token, plus file-leading and file-trailing trivia. Returned slices alias /// trivia also needs to represent content before the first significant token
/// internal storage and live for `'src`. /// and after the last one.
#[derive(Clone, Debug, Default)]
#[allow(dead_code)]
pub(crate) struct TriviaIndex<'src> {
/// All trivia tokens, stored contiguously in file order.
tokens: Vec<TriviaToken<'src>>,
/// Maps token location to the trivia tokens stored right after it.
after_map: TriviaMap,
/// Maps token location to the trivia tokens stored right before it.
before_map: TriviaMap,
}
/// Extends [`TokenLocation`] with *start of file* value.
///
/// Regular [`TokenLocation`] does not need this value, but trivia requires
/// a way to express "trivia before any significant token".
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum TriviaLocation { enum BoundaryLocation {
/// Position before any tokens, trivia or otherwise.
StartOfFile, StartOfFile,
/// This variant can also express "end of file" through Token(TokenPosition),
/// [`TokenLocation::EndOfFile`]. EndOfFile,
At(TokenLocation),
} }
/// Mutable builder for `TriviaIndex`. /// Immutable index over recorded trivia.
/// ///
/// Used inside the parser to record trivia between successive significant /// Provides O(1) access to trivia immediately before or after any significant
/// tokens in file order, then frozen via `into_index`. /// token, as well as file-leading and file-trailing trivia. Returned slices
#[derive(Debug, Default)] /// borrow the index, and the contained token texts live for `'src`.
#[derive(Clone, Debug, PartialEq, Eq, Default)]
#[allow(dead_code)] #[allow(dead_code)]
pub(crate) struct TriviaComponent<'src> { pub struct TriviaIndex<'src> {
/// All trivia tokens, stored contiguously in file order. /// All trivia tokens, stored contiguously in file order.
tokens: Vec<TriviaToken<'src>>, tokens: Vec<TriviaToken<'src>>,
/// Maps token location to the trivia tokens stored right after it. /// Maps a trivia boundary location to the trivia tokens stored right
after_map: TriviaMap, /// after it.
/// Maps token location to the trivia tokens stored right before it. trivia_after_boundary: TriviaRangeMap,
before_map: TriviaMap, /// Maps a trivia boundary location to the trivia tokens stored right
/// Location of the last gap's right boundary, /// before it.
/// for debug-time invariant checks. trivia_before_boundary: TriviaRangeMap,
#[cfg(debug_assertions)]
last_right_boundary: Option<TriviaLocation>,
} }
impl<'src> TriviaComponent<'src> { /// Mutable builder for [`TriviaIndex`].
/// Records trivia tokens that lie strictly between ///
/// `previous_token_location` and `next_token_location`. /// Records trivia between successive significant tokens while the caller walks
/// /// a token stream in file order. Once all tokens have been processed, call
/// [`None`] for `previous_token_location` means beginning of file; /// [`TriviaIndexBuilder::into_index`] to finalize the index.
/// `next_token_location` may be [`TokenLocation::EndOfFile`]. #[derive(Debug)]
/// #[allow(dead_code)]
/// Empties `gap_trivia` without changing its capacity. pub struct TriviaIndexBuilder<'src> {
/// /// All trivia tokens, stored contiguously in file order.
/// Requirements (checked in debug builds): tokens: Vec<TriviaToken<'src>>,
/// - previous_token_location < next_token_location; /// Maps boundary location to the trivia tokens stored right after it.
/// - calls are monotonic: each gap starts at or after the last end; trivia_after_boundary: TriviaRangeMap,
/// - `collected` is nonempty and strictly ordered by `location`; /// Maps boundary location to the trivia tokens stored right before it.
/// - all `collected` lie strictly inside (prev, next). trivia_before_boundary: TriviaRangeMap,
pub(crate) fn record_between_locations(
&mut self,
previous_token_location: Option<TokenLocation>,
next_token_location: TokenLocation,
gap_trivia: &mut Vec<TriviaToken<'src>>,
) {
#[cfg(debug_assertions)]
self.debug_assert_valid_recording_batch(
previous_token_location,
next_token_location,
&gap_trivia,
);
if gap_trivia.is_empty() { /// Trivia collected since the last significant token (or file start),
/// not yet attached to a right boundary.
pending_trivia: Vec<TriviaToken<'src>>,
/// Left boundary of the currently open gap.
current_left_boundary: BoundaryLocation,
}
impl Default for TriviaIndexBuilder<'_> {
fn default() -> Self {
Self {
tokens: Vec::new(),
trivia_after_boundary: TriviaRangeMap::default(),
trivia_before_boundary: TriviaRangeMap::default(),
pending_trivia: Vec::new(),
current_left_boundary: BoundaryLocation::StartOfFile,
}
}
}
impl<'src> TriviaIndexBuilder<'src> {
/// Records `token` as trivia.
///
/// Tokens must be recorded in file order.
pub(crate) fn record_trivia(&mut self, token: TriviaToken<'src>) {
#[cfg(debug_assertions)]
self.debug_assert_position_is_in_order(token.position);
self.pending_trivia.push(token);
}
/// Records a significant token at `position`.
///
/// Positions must be recorded in file order.
pub(crate) fn record_significant_token(&mut self, position: TokenPosition) {
let right_boundary = BoundaryLocation::Token(position);
#[cfg(debug_assertions)]
self.debug_assert_position_is_in_order(position);
self.flush_pending_trivia_to_boundary(right_boundary);
self.current_left_boundary = right_boundary;
}
// Stores one trivia range under both neighboring boundaries so lookups
// from either side return the same slice.
fn flush_pending_trivia_to_boundary(&mut self, right_boundary: BoundaryLocation) {
if self.pending_trivia.is_empty() {
return; return;
} }
let previous_token_location = previous_token_location
.map(TriviaLocation::At)
.unwrap_or(TriviaLocation::StartOfFile);
let next_token_location = TriviaLocation::At(next_token_location);
let trivia_start = self.tokens.len(); let trivia_start = self.tokens.len();
self.tokens.append(gap_trivia); self.tokens.append(&mut self.pending_trivia);
let trivia_end = self.tokens.len(); let trivia_end = self.tokens.len();
self.trivia_after_boundary
self.after_map .insert(self.current_left_boundary, trivia_start..trivia_end);
.insert(previous_token_location, trivia_start..trivia_end); self.trivia_before_boundary
self.before_map .insert(right_boundary, trivia_start..trivia_end);
.insert(next_token_location, trivia_start..trivia_end);
} }
/// Freezes into an immutable, shareable index. /// Finalizes the builder and returns the completed trivia index.
///
/// Any pending trivia is recorded as trailing trivia.
#[must_use] #[must_use]
#[allow(dead_code)] #[allow(dead_code)]
pub(crate) fn into_index(self) -> TriviaIndex<'src> { pub(crate) fn into_index(mut self) -> TriviaIndex<'src> {
self.flush_pending_trivia_to_boundary(BoundaryLocation::EndOfFile);
TriviaIndex { TriviaIndex {
tokens: self.tokens, tokens: self.tokens,
after_map: self.after_map, trivia_after_boundary: self.trivia_after_boundary,
before_map: self.before_map, trivia_before_boundary: self.trivia_before_boundary,
} }
} }
/// Trivia immediately after the significant token at `location`. // Catches out-of-order recording during development; the builder relies
/// // on this ordering invariant.
/// Returns an empty slice if `location` is not pointing at
/// a significant token or if no trivia was recorded after it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.after_map)
}
/// Trivia immediately before the significant token at `location`.
///
/// Returns an empty slice if `location` is not pointing at
/// a significant token or if no trivia was recorded before it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.before_map)
}
/// Trivia before any significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
}
/// Trivia after the last significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
self.slice_for(
TriviaLocation::At(TokenLocation::EndOfFile),
&self.before_map,
)
}
// Helper: return the recorded slice or an empty slice if none.
#[track_caller]
#[allow(dead_code)]
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] {
if let Some(range) = map.get(&key) {
// Ranges are guaranteed to be valid by construction
&self.tokens[range.start..range.end]
} else {
&[]
}
}
/// Debug-only validation for `record_between_locations`'s contract.
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
fn debug_assert_valid_recording_batch( fn debug_assert_position_is_in_order(&self, position: TokenPosition) {
&mut self, let location = BoundaryLocation::Token(position);
previous_token_location: Option<TokenLocation>, debug_assert!(location > self.current_left_boundary);
next_token_location: TokenLocation, if let Some(last) = self.pending_trivia.last() {
collected: &[TriviaToken<'src>], debug_assert!(last.position < position);
) {
// Prevent zero-width or reversed gaps
debug_assert!(previous_token_location < Some(next_token_location));
let previous_token_location = previous_token_location
.map(TriviaLocation::At)
.unwrap_or(TriviaLocation::StartOfFile);
let next_token_location = TriviaLocation::At(next_token_location);
// Enforce monotonic gaps: we record in file order
if let Some(last_right) = self.last_right_boundary {
debug_assert!(previous_token_location >= last_right);
} }
self.last_right_boundary = Some(next_token_location);
let first_trivia_location = collected
.first()
.map(|token| TriviaLocation::At(token.location))
.expect("Provided trivia tokens array should not be empty.");
let last_trivia_location = collected
.last()
.map(|token| TriviaLocation::At(token.location))
.expect("Provided trivia tokens array should not be empty.");
// Ensure trivia lies strictly inside the gap
debug_assert!(previous_token_location < first_trivia_location);
debug_assert!(next_token_location > last_trivia_location);
// Ensure trivia locations are strictly increasing
debug_assert!(
collected
.windows(2)
.all(|window| window[0].location < window[1].location)
);
} }
} }
impl<'src> TriviaIndex<'src> { impl<'src> TriviaIndex<'src> {
/// Trivia immediately after the significant token at `location`. /// Returns the trivia immediately after the significant token at
/// `position`.
/// ///
/// Returns an empty slice if `location` is not pointing at /// Returns an empty slice if `position` does not identify a recorded
/// a significant token or if no trivia was recorded after it. /// significant token or if no trivia was recorded after it.
#[must_use] #[must_use]
#[allow(dead_code)] #[allow(dead_code)]
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] { pub(crate) fn trivia_after_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.after_map)
}
/// Trivia immediately before the significant token at `location`.
///
/// Returns an empty slice if `location` is not pointing at
/// a significant token or if no trivia was recorded before it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::At(location), &self.before_map)
}
/// Trivia before any significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
}
/// Trivia after the last significant token.
#[must_use]
#[allow(dead_code)]
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
self.slice_for( self.slice_for(
TriviaLocation::At(TokenLocation::EndOfFile), BoundaryLocation::Token(position),
&self.before_map, &self.trivia_after_boundary,
) )
} }
// Helper: return the recorded slice or an empty slice if none. /// Returns the trivia immediately before the significant token at `position`.
#[track_caller] ///
/// Returns an empty slice if `position` does not identify a recorded
/// significant token or if no trivia was recorded before it.
#[must_use]
#[allow(dead_code)] #[allow(dead_code)]
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] { pub(crate) fn trivia_before_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
if let Some(range) = map.get(&key) { self.slice_for(
// Ranges are guaranteed to be valid by construction BoundaryLocation::Token(position),
&self.tokens[range.start..range.end] &self.trivia_before_boundary,
} else { )
&[] }
/// Returns the trivia before the first significant token.
///
/// If no significant tokens were recorded, returns all recorded trivia.
#[must_use]
#[allow(dead_code)]
pub(crate) fn leading_trivia(&self) -> &[TriviaToken<'src>] {
self.slice_for(BoundaryLocation::StartOfFile, &self.trivia_after_boundary)
}
/// Returns the trivia after the last significant token.
///
/// If no significant tokens were recorded, returns all recorded trivia.
#[must_use]
#[allow(dead_code)]
pub(crate) fn trailing_trivia(&self) -> &[TriviaToken<'src>] {
self.slice_for(BoundaryLocation::EndOfFile, &self.trivia_before_boundary)
}
#[allow(dead_code)]
fn slice_for(&self, key: BoundaryLocation, map: &TriviaRangeMap) -> &[TriviaToken<'src>] {
match map.get(&key) {
Some(range) => {
// Ranges are guaranteed to be valid by construction
debug_assert!(range.start <= range.end);
debug_assert!(range.end <= self.tokens.len());
self.tokens.get(range.clone()).unwrap_or(&[])
}
None => &[],
} }
} }
} }

63
rottlib/tests/common.rs Normal file
View File

@ -0,0 +1,63 @@
use std::path::{Path, PathBuf};
use rottlib::lexer::{Token, TokenData, TokenPosition, TokenizedFile};
pub fn fixture_path(name: &str) -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join(name)
}
pub fn read_fixture(name: &str) -> String {
let path = fixture_path(name);
std::fs::read_to_string(&path)
.unwrap_or_else(|e| panic!("failed to read fixture {}: {e}", path.display()))
}
pub fn with_fixture(name: &str, f: impl for<'src> FnOnce(&'src str, TokenizedFile<'src>)) {
let source = read_fixture(name);
let file = TokenizedFile::tokenize(&source);
f(&source, file);
}
pub fn line_lexemes<'file, 'src>(file: &'file TokenizedFile<'src>, line: usize) -> Vec<&'src str> {
file.line_tokens(line).map(|(_, t)| t.lexeme).collect()
}
pub fn line_tokens<'src>(file: &TokenizedFile<'src>, line: usize) -> Vec<Token> {
file.line_tokens(line).map(|(_, t)| t.token).collect()
}
pub fn line_positions<'src>(file: &TokenizedFile<'src>, line: usize) -> Vec<TokenPosition> {
file.line_tokens(line).map(|(pos, _)| pos).collect()
}
pub fn line_pairs<'file, 'src>(
file: &'file TokenizedFile<'src>,
line: usize,
) -> Vec<(Token, &'src str)> {
file.line_tokens(line)
.map(|(_, t)| (t.token, t.lexeme))
.collect()
}
pub fn all_lexemes<'file, 'src>(file: &'file TokenizedFile<'src>) -> Vec<&'src str> {
file.iter().map(|(_, t)| t.lexeme).collect()
}
pub fn all_tokens<'src>(file: &TokenizedFile<'src>) -> Vec<Token> {
file.iter().map(|(_, t)| t.token).collect()
}
pub fn token_at<'src>(file: &TokenizedFile<'src>, index: usize) -> Option<TokenData<'src>> {
file.token_at(TokenPosition(index))
}
pub fn reconstruct_source<'file, 'src>(file: &'file TokenizedFile<'src>) -> String {
file.iter().map(|(_, t)| t.lexeme).collect()
}
pub fn find_line<'src>(file: &TokenizedFile<'src>, needle: &str) -> Option<usize> {
(0..file.line_count()).find(|&line| file.line_text(line).as_deref() == Some(needle))
}

1578
rottlib/tests/fixtures/CommandAPI.uc vendored Normal file

File diff suppressed because it is too large Load Diff

1199
rottlib/tests/fixtures/DBRecord.uc vendored Normal file

File diff suppressed because it is too large Load Diff

326
rottlib/tests/fixtures/KVehicle.uc vendored Normal file
View File

@ -0,0 +1,326 @@
// Generic 'Karma Vehicle' base class that can be controlled by a Pawn.
class KVehicle extends Vehicle
native
abstract;
cpptext
{
#ifdef WITH_KARMA
virtual void PostNetReceive();
virtual void PostEditChange();
virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV);
virtual void TickSimulated( FLOAT DeltaSeconds );
virtual void TickAuthoritative( FLOAT DeltaSeconds );
#endif
}
// Effect spawned when vehicle is destroyed
var (KVehicle) class<Actor> DestroyEffectClass;
// Simple 'driving-in-rings' logic.
var (KVehicle) bool bAutoDrive;
// The factory that created this vehicle.
//var KVehicleFactory ParentFactory;
// Weapon system
var bool bVehicleIsFiring, bVehicleIsAltFiring;
const FilterFrames = 5;
var vector CameraHistory[FilterFrames];
var int NextHistorySlot;
var bool bHistoryWarmup;
// Useful function for plotting data to real-time graph on screen.
native final function GraphData(string DataName, float DataValue);
// if _RO_
function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation,
vector momentum, class<DamageType> damageType, optional int HitIndex)
// else UT
//function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation,
// vector momentum, class<DamageType> damageType)
{
Super.TakeDamage(Damage,instigatedBy,HitLocation,Momentum,DamageType);
}
// You got some new info from the server (ie. VehicleState has some new info).
event VehicleStateReceived();
// Called when a parameter of the overall articulated actor has changed (like PostEditChange)
// The script must then call KUpdateConstraintParams or Actor Karma mutators as appropriate.
simulated event KVehicleUpdateParams();
// The pawn Driver has tried to take control of this vehicle
function bool TryToDrive(Pawn P)
{
if ( P.bIsCrouched || (P.Controller == None) || (Driver != None) || !P.Controller.bIsPlayer )
return false;
if ( !P.IsHumanControlled() || !P.Controller.IsInState('PlayerDriving') )
{
KDriverEnter(P);
return true;
}
return false;
}
// Events called on driver entering/leaving vehicle
simulated function ClientKDriverEnter(PlayerController pc)
{
pc.myHUD.bCrosshairShow = false;
pc.myHUD.bShowWeaponInfo = false;
pc.myHUD.bShowPoints = false;
pc.bBehindView = true;
pc.bFreeCamera = true;
pc.SetRotation(rotator( vect(-1, 0, 0) >> Rotation ));
}
function KDriverEnter(Pawn P)
{
local PlayerController PC;
local Controller C;
// Set pawns current controller to control the vehicle pawn instead
Driver = P;
// Move the driver into position, and attach to car.
Driver.SetCollision(false, false);
Driver.bCollideWorld = false;
Driver.bPhysicsAnimUpdate = false;
Driver.Velocity = vect(0,0,0);
Driver.SetPhysics(PHYS_None);
Driver.SetBase(self);
// Disconnect PlayerController from Driver and connect to KVehicle.
C = P.Controller;
p.Controller.Unpossess();
Driver.SetOwner(C); // This keeps the driver relevant.
C.Possess(self);
PC = PlayerController(C);
if ( PC != None )
{
PC.ClientSetViewTarget(self); // Set playercontroller to view the vehicle
// Change controller state to driver
PC.GotoState('PlayerDriving');
ClientKDriverEnter(PC);
}
}
simulated function ClientKDriverLeave(PlayerController pc)
{
pc.bBehindView = false;
pc.bFreeCamera = false;
// This removes any 'roll' from the look direction.
//exitLookDir = Vector(pc.Rotation);
//pc.SetRotation(Rotator(exitLookDir));
pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow;
pc.myHUD.bShowWeaponInfo = pc.myHUD.default.bShowWeaponInfo;
pc.myHUD.bShowPoints = pc.myHUD.default.bShowPoints;
// Reset the view-smoothing
NextHistorySlot = 0;
bHistoryWarmup = true;
}
// Called from the PlayerController when player wants to get out.
function bool KDriverLeave(bool bForceLeave)
{
local PlayerController pc;
local int i;
local bool havePlaced;
local vector HitLocation, HitNormal, tryPlace;
// Do nothing if we're not being driven
if(Driver == None)
return false;
// Before we can exit, we need to find a place to put the driver.
// Iterate over array of possible exit locations.
if (!bRemoteControlled)
{
Driver.bCollideWorld = true;
Driver.SetCollision(true, true);
havePlaced = false;
for(i=0; i < ExitPositions.Length && havePlaced == false; i++)
{
//Log("Trying Exit:"$i);
tryPlace = Location + (ExitPositions[i] >> Rotation);
// First, do a line check (stops us passing through things on exit).
if( Trace(HitLocation, HitNormal, tryPlace, Location, false) != None )
continue;
// Then see if we can place the player there.
if( !Driver.SetLocation(tryPlace) )
continue;
havePlaced = true;
}
// If we could not find a place to put the driver, leave driver inside as before.
if(!havePlaced && !bForceLeave)
{
Log("Could not place driver.");
Driver.bCollideWorld = false;
Driver.SetCollision(false, false);
return false;
}
}
pc = PlayerController(Controller);
ClientKDriverLeave(pc);
// Reconnect PlayerController to Driver.
pc.Unpossess();
pc.Possess(Driver);
pc.ClientSetViewTarget(Driver); // Set playercontroller to view the persone that got out
Controller = None;
Driver.PlayWaiting();
Driver.bPhysicsAnimUpdate = Driver.Default.bPhysicsAnimUpdate;
// Do stuff on client
//pc.ClientSetBehindView(false);
//pc.ClientSetFixedCamera(true);
if (!bRemoteControlled)
{
Driver.Acceleration = vect(0, 0, 24000);
Driver.SetPhysics(PHYS_Falling);
Driver.SetBase(None);
}
// Car now has no driver
Driver = None;
// Put brakes on before you get out :)
Throttle=0;
Steering=0;
// Stop firing when you get out!
bVehicleIsFiring = false;
bVehicleIsAltFiring = false;
return true;
}
// Special calc-view for vehicles
simulated function bool SpecialCalcView(out actor ViewActor, out vector CameraLocation, out rotator CameraRotation )
{
local vector CamLookAt, HitLocation, HitNormal;
local PlayerController pc;
local int i, averageOver;
pc = PlayerController(Controller);
// Only do this mode we have a playercontroller viewing this vehicle
if(pc == None || pc.ViewTarget != self)
return false;
ViewActor = self;
CamLookAt = Location + (vect(-100, 0, 100) >> Rotation);
//////////////////////////////////////////////////////
// Smooth lookat position over a few frames.
CameraHistory[NextHistorySlot] = CamLookAt;
NextHistorySlot++;
if(bHistoryWarmup)
averageOver = NextHistorySlot;
else
averageOver = FilterFrames;
CamLookAt = vect(0, 0, 0);
for(i=0; i<averageOver; i++)
CamLookAt += CameraHistory[i];
CamLookAt /= float(averageOver);
if(NextHistorySlot == FilterFrames)
{
NextHistorySlot = 0;
bHistoryWarmup=false;
}
//////////////////////////////////////////////////////
CameraLocation = CamLookAt + (vect(-600, 0, 0) >> CameraRotation);
if( Trace( HitLocation, HitNormal, CameraLocation, CamLookAt, false, vect(10, 10, 10) ) != None )
{
CameraLocation = HitLocation;
}
return true;
}
simulated function Destroyed()
{
// If there was a driver in the vehicle, destroy him too
if ( Driver != None )
Driver.Destroy();
// Trigger any effects for destruction
if(DestroyEffectClass != None)
spawn(DestroyEffectClass, , , Location, Rotation);
Super.Destroyed();
}
simulated event Tick(float deltaSeconds)
{
}
// Includes properties from KActor
defaultproperties
{
Steering=0
Throttle=0
ExitPositions(0)=(X=0,Y=0,Z=0)
DrivePos=(X=0,Y=0,Z=0)
DriveRot=()
bHistoryWarmup = true;
Physics=PHYS_Karma
bEdShouldSnap=True
bStatic=False
bShadowCast=False
bCollideActors=True
bCollideWorld=False
bProjTarget=True
bBlockActors=True
bBlockNonZeroExtentTraces=True
bBlockZeroExtentTraces=True
bWorldGeometry=False
bBlockKarma=True
bAcceptsProjectors=True
bCanBeBaseForPawns=True
bAlwaysRelevant=True
RemoteRole=ROLE_SimulatedProxy
bNetInitialRotation=True
bSpecialCalcView=True
//bSpecialHUD=true
}

View File

@ -0,0 +1,135 @@
use std::{fs, path::PathBuf};
use rottlib::lexer::{Keyword, Token, TokenizedFile};
/// Returns the path to a fixture file in `tests/fixtures/`.
fn fixture_file_path(name: &str) -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join(name)
}
/// Loads a fixture source file as UTF-8 text.
fn read_fixture_source(name: &str) -> String {
fs::read_to_string(fixture_file_path(name))
.unwrap_or_else(|e| panic!("failed to read fixture {name}: {e}"))
}
/// Returns the token at the given token index on a physical line.
///
/// Here `line` is 1-based, to match human line numbers in fixture files.
/// `token_index` is 0-based within `TokenizedFile::line_tokens`.
fn token_on_line(file: &TokenizedFile<'_>, line: usize, token_index: usize) -> Option<Token> {
file.line_tokens(line - 1)
.nth(token_index)
.map(|(_, token_data)| token_data.token)
}
/// Returns reconstructed visible text for a physical line.
///
/// Here `line` is 1-based, to match human line numbers in fixture files.
fn line_text(file: &TokenizedFile<'_>, line: usize) -> Option<String> {
file.line_text(line - 1)
}
#[test]
fn command_api_fixture_queries() {
let source = read_fixture_source("CommandAPI.uc");
let file = TokenizedFile::tokenize(&source);
assert_eq!(file.line_count(), 1578);
assert_eq!(
line_text(&file, 704).as_deref(),
Some(
"public final function CommandConfigInfo ResolveCommandForUserID(BaseText itemName, UserID id) {"
)
);
assert_eq!(
line_text(&file, 806).as_deref(),
Some(" _.memory.Free(wrapper);")
);
assert_eq!(
line_text(&file, 1274).as_deref(),
Some("/// Method must be called after [`Voting`] with a given name is added.")
);
assert_eq!(
line_text(&file, 14).as_deref(),
Some(" * Acedia is distributed in the hope that it will be useful,")
);
let token = token_on_line(&file, 22, 0).unwrap();
assert_eq!(token, Token::Keyword(Keyword::Class));
let token = token_on_line(&file, 1577, 0).unwrap();
assert_eq!(token, Token::Keyword(Keyword::DefaultProperties));
let token = token_on_line(&file, 649, 4).unwrap();
assert_eq!(token, Token::Whitespace);
}
#[test]
fn dbrecord_fixture_queries() {
let source = read_fixture_source("DBRecord.uc");
let file = TokenizedFile::tokenize(&source);
assert_eq!(file.line_count(), 1199);
assert_eq!(
line_text(&file, 149).as_deref(),
Some(" * However, JSON pointers are not convenient or efficient enough for that,")
);
assert_eq!(
line_text(&file, 787).as_deref(),
Some(" * 3. 'number' -> either `IntBox` or `FloatBox`, depending on")
);
assert_eq!(
line_text(&file, 1023).as_deref(),
Some(" bool makeMutable)")
);
assert_eq!(
line_text(&file, 29).as_deref(),
Some(" config(AcediaDB);")
);
let token = token_on_line(&file, 565, 0).unwrap();
assert_eq!(token, Token::BlockComment);
let token = token_on_line(&file, 467, 10).unwrap();
assert_eq!(token, Token::Identifier);
let token = token_on_line(&file, 467, 9).unwrap();
assert_eq!(token, Token::LeftParenthesis);
}
#[test]
fn kvehicle_fixture_queries() {
let source = read_fixture_source("KVehicle.uc");
let file = TokenizedFile::tokenize(&source);
assert_eq!(file.line_count(), 326);
assert_eq!(
line_text(&file, 12).as_deref(),
Some(" virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV);")
);
assert_eq!(
line_text(&file, 127).as_deref(),
Some(" pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow;")
);
assert_eq!(
line_text(&file, 264).as_deref(),
Some(" //////////////////////////////////////////////////////")
);
assert_eq!(
line_text(&file, 299).as_deref(),
Some(" ExitPositions(0)=(X=0,Y=0,Z=0)")
);
let token = token_on_line(&file, 17, 0).unwrap();
assert_eq!(token, Token::Newline);
let token = token_on_line(&file, 20, 7).unwrap();
assert_eq!(token, Token::Less);
let token = token_on_line(&file, 246, 2).unwrap();
assert_eq!(token, Token::Increment);
}

View File

@ -37,7 +37,7 @@ impl tower_lsp::LanguageServer for RottLanguageServer {
// Measure lexing performance to track parser responsiveness. // Measure lexing performance to track parser responsiveness.
let start_time = std::time::Instant::now(); let start_time = std::time::Instant::now();
let has_errors = let has_errors =
rottlib::lexer::TokenizedFile::from_str(&params.text_document.text).has_errors(); rottlib::lexer::TokenizedFile::tokenize(&params.text_document.text).has_errors();
let elapsed_time = start_time.elapsed(); let elapsed_time = start_time.elapsed();
self.client self.client

97
test.uc Normal file
View File

@ -0,0 +1,97 @@
/// BOF line comment
/* BOF block comment */
class TestClass extends Actor
abstract
native;
//nativereplication;
/* One blank line follows to test has_blank_line_after() */
function int fuck_you(int a, float b, string c)
{
// ---- locals with an error to trigger recovery to comma/semicolon
local int i, /* oops */ , k;
local int a, b, c;
// ---- builtins: valid + error + various initializers
int a = 1, b, , c = 3;
float f = (1.0 + 2.0) * 0.5;
bool flag = true;
string s = "hi\n\"ok\"";
name tag;
array nums;
// ---- label + goto
start:
goto start2;
// ---- if / else with tail-as-value and missing semicolons inside
if (a + c > 0) {
while (a < 5) {
if (flag) {
break;
}
a + 1; // ok
continue
} // missing ';' before '}' should be fine (SelfTerminating)
} else {
{
a + 2;
b // tail expression (no ';') becomes block tail
}
}
// ---- for with header pieces using statement-as-value
for (i; i < 10; i += 1) {
j + i;
i + j // no semicolon, next is '}' so this is a tail
}
// ---- assert with a block-as-value (statement-as-value)
assert {
i = i + 1;
i // tail is the value of the block
};
// ---- foreach (paren and no-paren forms)
foreach (nums) {
i++
}
foreach nums {
--i; // prefix and postfix in play
j--
}
// ---- do ... until (paren and no-paren) + semicolon handling
do {
i = i + 1
} until (i > 3);
do i = i + 1; until i > 5;
// ---- switch with multi-label case, recovery, and default
switch (a + c) {
case 0:
case 1:
a = a + 10
// missing ';' here forces recovery to next boundary (case/default/})
case 2:
assert (a > 0); // regular statement
break;
case 3, 4:
break;
default:
// some stray token sequence to poke "unexpected token in switch body"
/* block comment with
newlines */
a + ; // malformed expr; recover to boundary
continue; // legal statement after recovery
}
// ---- second label target for goto
start2:
return a; // final return
}
// EOF trailing line comment
/* EOF trailing block comment */

10
test_full.uc Normal file
View File

@ -0,0 +1,10 @@
// #[config(JustConfig)]
abstract class NewWay extends AcediaObject {
// #[config(MaxWavesAmount)]
var int _value;
}
class TestClass extends Actor
abstract
native
nativereplication;