diff --git a/Cargo.lock b/Cargo.lock index f6bfb73..b4dc4f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,6 +78,12 @@ version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + [[package]] name = "bytes" version = "1.10.1" @@ -96,6 +102,73 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a48563284b67c003ba0fb7243c87fab68885e1532c605704228a80238512e31" +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crossterm" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" +dependencies = [ + "bitflags 2.9.1", + "crossterm_winapi", + "derive_more", + "document-features", + "mio", + "parking_lot", + "rustix", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -109,13 +182,38 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "dev_tests" version = "0.1.0" dependencies = [ "chardet", "encoding_rs", + "is-terminal", + "libc", "rottlib", + "sysinfo", "walkdir", ] @@ -130,6 +228,21 @@ dependencies = [ "syn", ] +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -139,6 +252,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + [[package]] name = "fnv" version = "1.0.7" @@ -243,6 +366,12 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "httparse" version = "1.10.1" @@ -367,6 +496,17 @@ dependencies = [ "libc", ] +[[package]] +name = "is-terminal" +version = "0.4.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + [[package]] name = "itoa" version = "1.0.15" @@ -385,12 +525,24 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + [[package]] name = "litemap" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + [[package]] name = "lock_api" version = "0.4.13" @@ -401,6 +553,12 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + [[package]] name = "logos" version = "0.15.0" @@ -470,10 +628,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", + "log", "wasi", "windows-sys", ] +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "object" version = "0.36.7" @@ -577,6 +745,26 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.17" @@ -596,6 +784,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" name = "rottlib" version = "0.1.0" dependencies = [ + "backtrace", + "bumpalo", + "crossterm", "logos", ] @@ -623,6 +814,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags 2.9.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "ryu" version = "1.0.20" @@ -693,6 +897,27 @@ dependencies = [ "syn", ] +[[package]] +name = "signal-hook" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc" +dependencies = [ + "libc", + "mio", + "signal-hook", +] + [[package]] name = "signal-hook-registry" version = "1.4.5" @@ -752,6 +977,21 @@ dependencies = [ "syn", ] +[[package]] +name = "sysinfo" +version = "0.30.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "rayon", + "windows", +] + [[package]] name = "tinystr" version = "0.8.1" @@ -903,6 +1143,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" + [[package]] name = "url" version = "2.5.4" @@ -937,6 +1183,22 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -946,6 +1208,31 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/dev_tests/Cargo.toml b/dev_tests/Cargo.toml index 70f7783..560fe5f 100644 --- a/dev_tests/Cargo.toml +++ b/dev_tests/Cargo.toml @@ -3,22 +3,21 @@ name = "dev_tests" version = "0.1.0" edition = "2024" -[[bin]] -name = "dump_tokens" -path = "src/dump_tokens.rs" - [[bin]] name = "uc_lexer_verify" path = "src/uc_lexer_verify.rs" [[bin]] -name = "temp" -path = "src/temp.rs" +name = "verify_expr" +path = "src/verify_expr.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] rottlib = { version = "0", path = "../rottlib", features = ["debug"] } +is-terminal = "0.4" +libc = "0.2" +sysinfo = "0.30" walkdir="2.5" encoding_rs="0.8" chardet="0.2" diff --git a/dev_tests/src/dump_tokens.rs b/dev_tests/src/dump_tokens.rs deleted file mode 100644 index 1a5c846..0000000 --- a/dev_tests/src/dump_tokens.rs +++ /dev/null @@ -1,76 +0,0 @@ -use std::{ - fs, - path::{Path, PathBuf}, -}; - -use encoding_rs::{Encoding, UTF_8}; -use rottlib::lexer::{DebugTools, TokenizedFile}; - -/// Recursively search `root` for the first file whose *basename* matches -/// `needle` (case-sensitive). -/// -/// Returns the absolute path. -fn find_file(root: &Path, needle: &str) -> Option { - for entry in walkdir::WalkDir::new(root) - .into_iter() - .filter_map(Result::ok) - { - let path = entry.path(); - if path.is_file() && (path.file_name().and_then(|name| name.to_str()) == Some(needle)) { - return fs::canonicalize(path).ok(); - } - } - None -} - -/// CLI: `dump_tokens ` - searches for `` -/// recursively inside ``. -/// -/// This utility takes *root directory* and *file name* instead of the full path -/// to help us avoid searching for them typing names out: -/// -/// - We know where all the sources are; -/// - We usually just know the name of the file that is being problematic. -fn main() { - let mut args = std::env::args().skip(1); - let root_dir = args.next().unwrap_or_else(|| { - eprintln!("Usage: inspect_uc "); - std::process::exit(1); - }); - let file_name = args.next().unwrap_or_else(|| { - eprintln!("Usage: inspect_uc "); - std::process::exit(1); - }); - - let root = PathBuf::from(&root_dir); - if !root.exists() { - eprintln!("Root directory '{root_dir}' does not exist."); - std::process::exit(1); - } - - let found_path = find_file(&root, &file_name).map_or_else( - || { - eprintln!("File '{file_name}' not found under '{root_dir}'."); - std::process::exit(1); - }, - |path| path, - ); - - // Read & decode - let raw_bytes = match fs::read(&found_path) { - Ok(sources) => sources, - Err(error) => { - eprintln!("Could not read {}: {error}", found_path.display()); - std::process::exit(1); - } - }; - - let (encoding_label, _, _) = chardet::detect(&raw_bytes); - let encoding = Encoding::for_label(encoding_label.as_bytes()).unwrap_or(UTF_8); - let (decoded_str, _, _) = encoding.decode(&raw_bytes); - - let source_text = decoded_str.to_string(); - let tokenized_file = TokenizedFile::from_str(&source_text); - - tokenized_file.dump_debug_layout(); -} diff --git a/dev_tests/src/pretty.rs b/dev_tests/src/pretty.rs new file mode 100644 index 0000000..d47150c --- /dev/null +++ b/dev_tests/src/pretty.rs @@ -0,0 +1,14 @@ +// diagnostics_render.rs + +use rottlib::diagnostics::{Diagnostic}; +use rottlib::lexer::TokenizedFile; + +pub fn render_diagnostic( + diag: &Diagnostic, + _file: &TokenizedFile, + file_name: Option<&str>, + colors: bool, +) -> String { + diag.render(_file, file_name.unwrap_or("")); + "fuck it".to_string() +} diff --git a/dev_tests/src/temp.rs b/dev_tests/src/temp.rs deleted file mode 100644 index 34cd8af..0000000 --- a/dev_tests/src/temp.rs +++ /dev/null @@ -1,129 +0,0 @@ -//! src/main.rs -//! -------------------------------------------- -//! Build & run: -//! cargo run -//! -------------------------------------------- - -use std::env; -use std::fs; -use std::io::{self, Read, Write}; -use std::path::Path; - -use rottlib::arena::Arena; -use rottlib::lexer::TokenizedFile; -use rottlib::parser::{ParseError, Parser, pretty::ExprTree}; - -/* -- Convenient array definitions: [1, 3, 5, 2, 4] -- Boolean dynamic arrays -- Structures in default properties -- Auto conversion of arrays into strings -- Making 'var' and 'local' unnecessary -- Allowing variable creation in 'for' loops -- Allowing variable creation at any place inside a function -- Default parameters for functions -- Function overloading? -- repeat/until -- The syntax of the default properties block is pretty strict for an arcane reason. Particularly adding spaces before or after the "=" will lead to errors in pre-UT2003 versions. -- Scopes -- different names for variables and in config file -- anonymous pairs (objects?) and value destruction ->>> AST > HIR > MIR > byte code -*/ - -/// Closest plan: -/// - Add top-level declaration parsing -/// - Handle pretty.rs shit somehow -/// - COMMITS -/// --------------------------------------- -/// - Add fancy error reporting -/// - Make a fancy REPL -/// - Add evaluation -/// -/// WARNINGS: -/// - Empty code/switch blocks - -fn parse_and_print(src: &str) -> Result<(), ParseError> { - let tokenized = TokenizedFile::from_str(src); - let arena = Arena::new(); - let mut parser = Parser::new(&tokenized, &arena); - - let expr = parser.parse_expression(); // ArenaNode - println!("{}", ExprTree(&*expr)); // if ArenaNode - // or: println!("{}", ExprTree(expr.as_ref())); // if no Deref - Ok(()) -} - -fn repl_once() -> Result<(), ParseError> { - print!("Enter an statement > "); - io::stdout().flush().unwrap(); - - let mut input = String::new(); - if io::stdin().read_line(&mut input).is_err() { - eprintln!("failed to read input"); - return Ok(()); - } - - if input.trim().is_empty() { - return Ok(()); - } - - parse_and_print(&input) -} - -fn read_stdin_all() -> io::Result { - let mut buf = String::new(); - io::stdin().read_to_string(&mut buf)?; - Ok(buf) -} - -fn read_file_to_string(path: &Path) -> io::Result { - fs::read_to_string(path) -} - -fn main() -> Result<(), ParseError> { - // Accept a single positional arg as the input path. - // "-" means read all of stdin. - let mut args = env::args().skip(1); - - if let Some(arg1) = args.next() { - if arg1 == "-h" || arg1 == "--help" { - println!("Usage:"); - println!( - " {} # REPL", - env::args().next().unwrap_or_else(|| "prog".into()) - ); - println!( - " {} # parse file", - env::args().next().unwrap_or_else(|| "prog".into()) - ); - println!( - " {} - # read source from stdin", - env::args().next().unwrap_or_else(|| "prog".into()) - ); - return Ok(()); - } - - if arg1 == "-" { - match read_stdin_all() { - Ok(src) => return parse_and_print(&src), - Err(e) => { - eprintln!("stdin read error: {}", e); - return Ok(()); - } - } - } else { - let path = Path::new(&arg1); - match read_file_to_string(path) { - Ok(src) => return parse_and_print(&src), - Err(e) => { - eprintln!("file read error ({}): {}", path.display(), e); - return Ok(()); - } - } - } - } - - // No filename provided -> keep REPL behavior - repl_once() -} diff --git a/dev_tests/src/uc_lexer_verify.rs b/dev_tests/src/uc_lexer_verify.rs index 30273c6..64b661e 100644 --- a/dev_tests/src/uc_lexer_verify.rs +++ b/dev_tests/src/uc_lexer_verify.rs @@ -1,122 +1,341 @@ -use std::{collections::HashSet, fs, path::PathBuf}; +#![allow( + clippy::all, + clippy::pedantic, + clippy::nursery, + clippy::cargo, + clippy::restriction +)] -use rottlib::lexer::{DebugTools, TokenizedFile}; +use std::{ + collections::HashSet, + fs, + io::{self, Write}, + path::PathBuf, + time::Instant, +}; -/// Read `ignore.txt` (one path per line, `#` for comments) from root directory -/// and turn it into a canonicalized [`HashSet`]. +use encoding_rs::Encoding; +use rottlib::diagnostics::Diagnostic as Diag; +use rottlib::lexer::TokenizedFile; +use rottlib::parser::Parser; + +mod pretty; + +// ---------- CONFIG ---------- +const FILE_LIMIT: usize = 10000; // cap on files scanned +const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics +const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics +/// If true, print the old debug struct dump after each pretty diagnostic. +const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true; + +// Cargo.toml additions: +// is-terminal = "0.4" +// sysinfo = { version = "0.30", features = ["multithread"] } +// walkdir = "2" +// chardet = "0.2" +// encoding_rs = "0.8" + +// Linux-only accurate RSS in MB. Fallback uses sysinfo. +fn rss_mb() -> u64 { + #[cfg(target_os = "linux")] + { + use std::io::Read; + let mut s = String::new(); + if let Ok(mut f) = std::fs::File::open("/proc/self/statm") + && f.read_to_string(&mut s).is_ok() + && let Some(rss_pages) = s + .split_whitespace() + .nth(1) + .and_then(|x| x.parse::().ok()) + { + let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 }; + return (rss_pages * page) / (1024 * 1024); + } + } + use sysinfo::{System, get_current_pid}; + let mut sys = System::new(); + sys.refresh_processes(); + let Ok(pid) = get_current_pid() else { return 0 }; + sys.process(pid).map_or(0, |p| p.memory() / 1024) +} + +fn mark(label: &str, t0: Instant) { + println!( + "[{:>14}] t={:>8.2?} rss={} MB", + label, + t0.elapsed(), + rss_mb() + ); +} + +/// Read `ignore.txt` next to `root` and build a canonicalized set. fn load_ignore_set(root: &std::path::Path) -> HashSet { let ignore_file = root.join("ignore.txt"); if !ignore_file.exists() { return HashSet::new(); } - let content = match fs::read_to_string(&ignore_file) { - Ok(content) => content, - Err(error) => { - eprintln!("Could not read {}: {error}", ignore_file.display()); + Ok(s) => s, + Err(e) => { + eprintln!("Could not read {}: {e}", ignore_file.display()); return HashSet::new(); } }; - content .lines() .map(str::trim) - .filter(|line| !line.is_empty() && !line.starts_with('#')) + .filter(|l| !l.is_empty() && !l.starts_with('#')) .filter_map(|line| { - let next_path = PathBuf::from(line); - let absolute_path = if next_path.is_absolute() { - next_path - } else { - root.join(next_path) - }; - fs::canonicalize(absolute_path).ok() + let p = PathBuf::from(line); + let abs = if p.is_absolute() { p } else { root.join(p) }; + fs::canonicalize(abs).ok() }) .collect() } -/// CLI: `verify_uc ` - find all `.uc` files in the provided directory -/// (except those listed in `ignore.txt` in the root) and test them all. -/// -/// Reported execution time is the tokenization time, without considering time -/// it takes to read files from disk. -/// -/// `ignore.txt` is for listing specific files, not directories. -fn main() { - let root_dir = std::env::args().nth(1).unwrap(); // it is fine to crash debug utility - let root = PathBuf::from(&root_dir); +/// Wait for Enter if running in a TTY, shown before printing errors. +fn wait_before_errors(msg: &str) { + let _ = io::stdout().flush(); + if is_terminal::is_terminal(io::stdin()) { + eprint!("{msg}"); + let _ = io::stderr().flush(); + let mut s = String::new(); + let _ = io::stdin().read_line(&mut s); + } +} +/// CLI: `verify_uc [file_name]` +/// +fn main() { + let mut args = std::env::args().skip(1); + let root_dir = args.next().unwrap_or_else(|| { + eprintln!("Usage: verify_uc [file_name]"); + std::process::exit(1); + }); + + let target_raw = args.next(); // optional file name hint + let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase()); + let single_mode = target_ci.is_some(); + + let root = PathBuf::from(&root_dir); if !root.exists() { eprintln!("Root directory '{root_dir}' does not exist."); std::process::exit(1); } - // Load files - let ignored_paths = load_ignore_set(&root); + let t0 = Instant::now(); + mark("baseline", t0); + + // Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode + let ignored = load_ignore_set(&root); let mut uc_files: Vec<(PathBuf, String)> = Vec::new(); + let mut seen = 0usize; + let mut picked_any = false; + for entry in walkdir::WalkDir::new(&root) .into_iter() - .filter_map(Result::ok) // for debug tool this is ok - .filter(|entry| { - let path = entry.path(); - // Skip anything explicitly ignored - if let Ok(absolute_path) = fs::canonicalize(path) { - if ignored_paths.contains(&absolute_path) { - return false; - } + .filter_map(Result::ok) + .filter(|e| { + let path = e.path(); + if let Ok(abs) = fs::canonicalize(path) + && ignored.contains(&abs) + { + return false; } - // Must be *.uc path.is_file() && path .extension() - .and_then(|extension| extension.to_str()) - .is_some_and(|extension| extension.eq_ignore_ascii_case("uc")) + .and_then(|e| e.to_str()) + .is_some_and(|e| e.eq_ignore_ascii_case("uc")) }) { + if !single_mode && seen >= FILE_LIMIT { + break; + } + + // If in single-file mode, keep only the first whose file name matches. + if let Some(needle) = target_ci.as_deref() { + let fname = entry + .path() + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or(""); + let fname_lc = fname.to_ascii_lowercase(); + if !(fname_lc == needle || fname_lc.contains(needle)) { + continue; + } + } + + seen += 1; + let path = entry.path(); match fs::read(path) { - Ok(raw_bytes) => { - // Auto-detect encoding for old Unreal script sources - let (encoding_label, _, _) = chardet::detect(&raw_bytes); - let encoding = encoding_rs::Encoding::for_label(encoding_label.as_bytes()) - .unwrap_or(encoding_rs::UTF_8); - let (decoded_text, _, _) = encoding.decode(&raw_bytes); - uc_files.push((path.to_path_buf(), decoded_text.into_owned())); + Ok(raw) => { + let (label, _, _) = chardet::detect(&raw); + let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8); + let (txt, _, _) = enc.decode(&raw); + uc_files.push((path.to_path_buf(), txt.into_owned())); + picked_any = true; + if single_mode { + // Only the first match. + break; + } } - Err(error) => { - eprintln!("Failed to read `{}`: {error}", path.display()); + Err(e) => { + wait_before_errors("Read error detected. Press Enter to print details..."); + eprintln!("Failed to read `{}`: {e}", path.display()); std::process::exit(1); } } } - println!("Loaded {} .uc files into memory.", uc_files.len()); - // Tokenize and measure performance - let start_time = std::time::Instant::now(); - let tokenized_files: Vec<(PathBuf, TokenizedFile)> = uc_files - .iter() - .map(|(path, source_code)| { - let tokenized_file = TokenizedFile::from_str(source_code); - if tokenized_file.has_errors() { - println!("TK: {}", path.display()); - } - (path.clone(), tokenized_file) - }) - .collect(); - let elapsed_time = start_time.elapsed(); + if single_mode && !picked_any { + let needle = target_raw.as_deref().unwrap(); + eprintln!( + "No .uc file matching '{needle}' found under '{}'.", + root.display() + ); + std::process::exit(1); + } + + println!( + "Loaded {} .uc files into memory (cap={}, reached={}).", + uc_files.len(), + FILE_LIMIT, + if !single_mode && uc_files.len() >= FILE_LIMIT { + "yes" + } else { + "no" + } + ); + mark("after_read", t0); + + // Stage 1: tokenize all + let t_tok = Instant::now(); + let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len()); + let mut tk_error_idx: Option = None; + + for (i, (path, source)) in uc_files.iter().enumerate() { + let tf = TokenizedFile::tokenize(source); + if tk_error_idx.is_none() && tf.has_errors() { + tk_error_idx = Some(i); + } + tokenized.push((path.clone(), tf)); + } println!( "Tokenized {} files in {:.2?}", - tokenized_files.len(), - elapsed_time + tokenized.len(), + t_tok.elapsed() ); + mark("after_tokenize", t0); - // Round-trip check - for ((path, original), (_, tokenized_file)) in uc_files.iter().zip(tokenized_files.iter()) { - let reconstructed = tokenized_file.reconstruct_source(); - if original != &reconstructed { - eprintln!("Reconstruction mismatch in `{}`!", path.display()); - std::process::exit(1); + // If tokenization error: wait, dump tokens for the first failing file, then exit. + if let Some(idx) = tk_error_idx { + let (bad_path, _) = &tokenized[idx]; + wait_before_errors("Tokenization error found. Press Enter to dump tokens..."); + eprintln!("--- Tokenization error in: {}", bad_path.display()); + //bad_tf.dump_debug_layout(); // from DebugTools + std::process::exit(1); + } + + // Stage 2: parse all with ONE arena kept alive + let arena = rottlib::arena::Arena::new(); + let t_parse = Instant::now(); + + // First failing parse: (tokenized_index, diagnostics, fatal) + let mut first_fail: Option<(usize, Vec, Option)> = None; + + for (i, (path, tk)) in tokenized.iter().enumerate() { + // --- progress line BEFORE parsing this file --- + { + use std::io::Write; + eprint!( + "Parsing [{}/{}] {} | rss={} MB\r\n", + i + 1, + tokenized.len(), + path.display(), + rss_mb() + ); + let _ = io::stderr().flush(); + } + + let mut parser = Parser::new(tk, &arena); + + match parser.parse_source_file() { + Ok(_) => { + if !parser.diagnostics.is_empty() && first_fail.is_none() { + first_fail = Some((i, parser.diagnostics.clone(), None)); + } + } + Err(e) => { + if first_fail.is_none() { + first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}")))); + } + } } } - println!("All .uc files matched successfully."); + println!( + "Parsed {} files in {:.2?}", + tokenized.len(), + t_parse.elapsed() + ); + mark("after_parse", t0); + + // Summary + println!("--- Summary ---"); + println!("Files processed: {}", tokenized.len()); + println!("File cap: {FILE_LIMIT}"); + + if let Some((idx, diags, fatal)) = first_fail { + wait_before_errors("Parse issues detected. Press Enter to print diagnostics..."); + let (path, tf) = &tokenized[idx]; + eprintln!("--- Parse issues in first failing file ---"); + eprintln!("File: {}", path.display()); + if let Some(f) = &fatal { + eprintln!("Fatal parse error: {f}"); + } + if diags.is_empty() && fatal.is_none() { + eprintln!("(no diagnostics captured)"); + } else { + let use_colors = is_terminal::is_terminal(io::stderr()); + let fname = path.display().to_string(); + let total = diags.len(); + let first_n = DIAG_SHOW_FIRST.min(total); + let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n)); + + if total > first_n + last_n { + // first window + for (k, d) in diags.iter().take(first_n).enumerate() { + let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors); + eprintln!("{s}"); + if ALSO_PRINT_DEBUG_AFTER_PRETTY { + eprintln!("#{}: {:#?}", k + 1, d); + } + } + eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n)); + // last window + let start = total - last_n; + for (offset, d) in diags.iter().skip(start).enumerate() { + let idx_global = start + offset + 1; + let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors); + eprintln!("{s}"); + if ALSO_PRINT_DEBUG_AFTER_PRETTY { + eprintln!("#{idx_global}: {d:#?}"); + } + } + } else { + for (k, d) in diags.iter().enumerate() { + let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors); + eprintln!("{s}"); + if ALSO_PRINT_DEBUG_AFTER_PRETTY { + eprintln!("#{}: {:#?}", k + 1, d); + } + } + } + } + std::process::exit(1); + } + + println!("All files parsed without diagnostics."); } diff --git a/dev_tests/src/verify_expr.rs b/dev_tests/src/verify_expr.rs new file mode 100644 index 0000000..f8b0b78 --- /dev/null +++ b/dev_tests/src/verify_expr.rs @@ -0,0 +1,85 @@ +#![allow( + clippy::all, + clippy::pedantic, + clippy::nursery, + clippy::cargo, + clippy::restriction +)] + +use rottlib::arena::Arena; +use rottlib::lexer::TokenizedFile; +use rottlib::parser::Parser; + +mod pretty; + +/// Expressions to test. +/// +/// Add, remove, or edit entries here. +/// Using `(&str, &str)` gives each case a human-readable label. +const TEST_CASES: &[(&str, &str)] = &[ + ("simple_add", "1 + 2 * 3"), + ("member_call", "Foo.Bar(1, 2)"), + ("index_member", "arr[5].X"), + ("tagged_name", "Class'MyPackage.MyThing'"), + ("broken_expr", "a + (]\n//AAA\n//BBB\n//CCC\n//DDD\n//EEE\n//FFF"), +]; + +/// If true, print the parsed expression using Debug formatting. +const PRINT_PARSED_EXPR: bool = false; + +/// If true, print diagnostics even when parsing returned a value. +const ALWAYS_PRINT_DIAGNOSTICS: bool = true; + +fn main() { + let arena = Arena::new(); + + println!("Running {} expression test case(s)...", TEST_CASES.len()); + println!(); + + let mut had_any_problem = false; + + for (idx, (label, source)) in TEST_CASES.iter().enumerate() { + println!("============================================================"); + println!("Case #{:02}: {}", idx + 1, label); + println!("Source: {}", source); + println!("------------------------------------------------------------"); + + let tf = TokenizedFile::tokenize(source); + + let mut parser = Parser::new(&tf, &arena); + let expr = parser.parse_expression(); + + println!("parse_expression() returned."); + + if PRINT_PARSED_EXPR { + println!("Parsed expression:"); + println!("{expr:#?}"); + } + + if parser.diagnostics.is_empty() { + println!("Diagnostics: none"); + } else { + had_any_problem = true; + println!("Diagnostics: {}", parser.diagnostics.len()); + + if ALWAYS_PRINT_DIAGNOSTICS { + let use_colors = false; + for (k, diag) in parser.diagnostics.iter().enumerate() { + let rendered = pretty::render_diagnostic(diag, &tf, Some(label), use_colors); + println!("Diagnostic #{}:", k + 1); + println!("{rendered}"); + } + } + } + + println!(); + } + + println!("============================================================"); + if had_any_problem { + println!("Done. At least one case had tokenization or parse diagnostics."); + std::process::exit(1); + } else { + println!("Done. All cases completed without diagnostics."); + } +} \ No newline at end of file diff --git a/perf.data.old b/perf.data.old new file mode 100644 index 0000000..5b25c69 Binary files /dev/null and b/perf.data.old differ diff --git a/rottlib/Cargo.toml b/rottlib/Cargo.toml index 9a0146a..78fdbdc 100644 --- a/rottlib/Cargo.toml +++ b/rottlib/Cargo.toml @@ -7,6 +7,11 @@ edition = "2024" default = [] debug = [] +[lints] +workspace = true + [dependencies] logos = "0.15" -bumpalo = { version = "3", features = ["boxed", "collections"] } \ No newline at end of file +bumpalo = { version = "3", features = ["boxed", "collections"] } +backtrace = "0.3" +crossterm = "0.*" \ No newline at end of file diff --git a/rottlib/src/arena.rs b/rottlib/src/arena.rs index f12111a..77112a8 100644 --- a/rottlib/src/arena.rs +++ b/rottlib/src/arena.rs @@ -1,21 +1,26 @@ -//! Arena submodule defining types that exist in their own memory space and -//! allow multiple cheap allocations (both performance- and fragmentation-wise). +//! Arena submodule defining types allocated from a dedicated bump arena, +//! allowing many cheap allocations with fast bulk reclamation. //! -//! ## Memory safety +//! ## Destruction and resource management //! -//! Dropping the [`Arena`] frees all its memory at once and does not run -//! [`Drop`] for values allocated within it. Avoid storing types that implement -//! [`Drop`] or own external resources inside [`ArenaNode`], [`ArenaVec`], or -//! [`ArenaString`]. If you must, arrange an explicit "drain/drop" pass before -//! the arena is dropped. +//! Dropping the [`Arena`] reclaims the arena's memory in bulk. Destructors are +//! not run for arena allocations that are still live at that point. Therefore, +//! avoid storing types whose cleanup must reliably happen at arena release, +//! especially types that own memory allocations or external resources outside +//! the arena. +//! +//! [`ArenaNode`], [`ArenaVec`], and [`ArenaString`] are provided so commonly +//! used owned data can keep their storage inside the arena rather than in +//! separate global-heap allocations. +use core::borrow::Borrow; use core::fmt::{Debug, Display, Formatter, Result}; use core::ops::{Deref, DerefMut}; use bumpalo::{Bump, boxed, collections}; use crate::ast::AstSpan; -use crate::lexer::TokenLocation; +use crate::lexer::TokenPosition; /// Object that manages a separate memory space, which can be deallocated all /// at once after use. @@ -23,11 +28,8 @@ use crate::lexer::TokenLocation; /// All allocations borrow the arena immutably. /// /// Dropping the [`Arena`] does not run [`Drop`] for values allocated within it -/// (including values contained in [`ArenaNode`], [`ArenaVec`] -/// and [`ArenaString`]). -/// -/// This arena is not thread-safe (`!Send`, `!Sync`). Values borrow the arena -/// and therefore cannot be sent across threads independently. +/// (including values contained in [`ArenaNode`], [`ArenaVec`] and +/// [`ArenaString`]). #[derive(Debug)] pub struct Arena { bump: Bump, @@ -48,38 +50,47 @@ impl Arena { ArenaVec(collections::Vec::new_in(&self.bump)) } - ///Allocates a copy of `string` in this arena and returns + /// Allocates a copy of `string` in this arena and returns /// an [`ArenaString`]. #[must_use] pub fn string(&self, string: &str) -> ArenaString<'_> { ArenaString(collections::String::from_str_in(string, &self.bump)) } - /// Allocates `value` in this arena with the given `span`, - /// returning an [`ArenaNode`]. + /// Allocates `value` in this arena and attaches `span`. /// - /// The node's storage borrows this arena and cannot outlive it. - /// - /// Note: `T`'s [`Drop`] is not run when the arena is dropped. + /// The returned node borrows the arena and cannot outlive it. + /// If it is still live when the arena is dropped, its destructor is not run. #[must_use] - pub fn alloc(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> { + pub fn alloc_node(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> { ArenaNode { - inner: boxed::Box::new_in(value, &self.bump), + value: boxed::Box::new_in(value, &self.bump), span, } } - pub fn alloc_between( + /// Allocates `value` in this arena and attaches the span from `start` to + /// `end`. + /// + /// The returned node borrows the arena and cannot outlive it. + /// If it is still live when the arena is dropped, its destructor is not run. + #[must_use] + pub fn alloc_node_between( &self, value: T, - from: TokenLocation, - to: TokenLocation, + start: TokenPosition, + end: TokenPosition, ) -> ArenaNode<'_, T> { - self.alloc(value, AstSpan { from, to }) + self.alloc_node(value, AstSpan::range(start, end)) } - pub fn alloc_at(&self, value: T, at: TokenLocation) -> ArenaNode<'_, T> { - self.alloc(value, AstSpan { from: at, to: at }) + /// Allocates `value` in this arena and attaches a span covering `at`. + /// + /// The returned node borrows the arena and cannot outlive it. + /// If it is still live when the arena is dropped, its destructor is not run. + #[must_use] + pub fn alloc_node_at(&self, value: T, at: TokenPosition) -> ArenaNode<'_, T> { + self.alloc_node(value, AstSpan::new(at)) } } @@ -91,15 +102,11 @@ impl Default for Arena { /// An arena-allocated box with an attached source span. /// -/// Equality and hashing take into account both the contained `T` and the `span` -/// (when `T: Eq + Hash`). -/// -/// Note: `T`'s [`Drop`] is not run when the arena is dropped. +/// Dropping the node normally runs `Drop` for the inner value. +/// Dropping the arena does not itself perform a separate destructor pass. #[derive(Hash, PartialEq, Eq)] pub struct ArenaNode<'arena, T> { - /// Value allocated in the arena; this node owns it. - inner: boxed::Box<'arena, T>, - /// Token range covered by the value. + value: boxed::Box<'arena, T>, span: AstSpan, } @@ -108,74 +115,53 @@ impl<'arena, T> ArenaNode<'arena, T> { #[must_use] pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self { Self { - inner: boxed::Box::new_in(value, &arena.bump), + value: boxed::Box::new_in(value, &arena.bump), span, } } - /// Creates a new [`ArenaNode`] for an AST node that spans a single token. - pub fn from_token_location( - value: T, - token_location: crate::lexer::TokenLocation, - arena: &'arena Arena, - ) -> Self { - Self { - inner: boxed::Box::new_in(value, &arena.bump), - span: AstSpan { - from: token_location, - to: token_location, - }, - } - } - - pub fn span_mut(&mut self) -> &mut AstSpan { + /// Returns a mutable reference to the token span covered by this node. + #[must_use] + pub const fn span_mut(&mut self) -> &mut AstSpan { &mut self.span } - pub fn extend_to(&mut self, to: TokenLocation) { - self.span.to = to; - } - - pub fn extend_from(&mut self, from: TokenLocation) { - self.span.from = from; - } - /// Returns the token span covered by this node. - pub fn span(&self) -> &AstSpan { + #[must_use] + pub const fn span(&self) -> &AstSpan { &self.span } } -impl<'arena, T> Deref for ArenaNode<'arena, T> { +impl Deref for ArenaNode<'_, T> { type Target = T; fn deref(&self) -> &T { - &self.inner + &self.value } } -impl<'arena, T> DerefMut for ArenaNode<'arena, T> { +impl DerefMut for ArenaNode<'_, T> { fn deref_mut(&mut self) -> &mut T { - &mut self.inner + &mut self.value } } -impl<'arena, T: Debug> Debug for ArenaNode<'arena, T> { +impl Debug for ArenaNode<'_, T> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { f.debug_struct("ArenaNode") .field("inner", &**self) - .field("span", &self.span()) + .field("span", self.span()) .finish() } } -/// Version of [`Vec`] that can be safely used inside a memory arena. +/// Version of [`Vec`] whose backing storage lives in the arena. /// -/// Elements do not have their destructors run when the arena is dropped. -/// -/// This type dereferences to `[T]` and supports iteration by reference -/// (`&ArenaVec` and `&mut ArenaVec` implement [`IntoIterator`]). -#[derive(Clone, Debug, Hash, PartialEq, Eq)] +/// Elements are dropped when the `ArenaVec` itself is dropped normally. +/// Capacity growth may leave old buffers in the arena until the whole arena +/// is reclaimed. +#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct ArenaVec<'arena, T>(collections::Vec<'arena, T>); impl<'arena, T> ArenaVec<'arena, T> { @@ -190,18 +176,28 @@ impl<'arena, T> ArenaVec<'arena, T> { /// Growth is backed by the arena; increasing capacity allocates new space /// in the arena and never frees previous blocks. pub fn push(&mut self, value: T) { - self.0.push(value) + self.0.push(value); } + /// Reserves capacity for at least `additional` more elements. + /// + /// The collection may reserve more space to avoid frequent reallocations. + /// If growth requires a new allocation, the previous buffer remains in the + /// arena until the arena is reclaimed. pub fn reserve(&mut self, additional: usize) { - self.0.reserve(additional) + self.0.reserve(additional); } - pub fn extend>(&mut self, it: I) { - self.0.extend(it) + + /// Extends the vector with the contents of `items`. + /// + /// Growth may allocate a new buffer in the arena and leave the previous + /// buffer in place until the arena is reclaimed. + pub fn extend>(&mut self, items: I) { + self.0.extend(items); } } -impl<'arena, T> Deref for ArenaVec<'arena, T> { +impl Deref for ArenaVec<'_, T> { type Target = [T]; fn deref(&self) -> &Self::Target { @@ -209,48 +205,41 @@ impl<'arena, T> Deref for ArenaVec<'arena, T> { } } -impl<'arena, T> DerefMut for ArenaVec<'arena, T> { +impl DerefMut for ArenaVec<'_, T> { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } -impl<'arena, 's, T> IntoIterator for &'s ArenaVec<'arena, T> { - type Item = &'s T; - type IntoIter = core::slice::Iter<'s, T>; +impl<'iter, T> IntoIterator for &'iter ArenaVec<'_, T> { + type Item = &'iter T; + type IntoIter = core::slice::Iter<'iter, T>; fn into_iter(self) -> Self::IntoIter { self.0.iter() } } -impl<'arena, 's, T> IntoIterator for &'s mut ArenaVec<'arena, T> { - type Item = &'s mut T; - type IntoIter = core::slice::IterMut<'s, T>; +impl<'iter, T> IntoIterator for &'iter mut ArenaVec<'_, T> { + type Item = &'iter mut T; + type IntoIter = core::slice::IterMut<'iter, T>; fn into_iter(self) -> Self::IntoIter { self.0.iter_mut() } } -/// Version of [`String`] that can be safely used inside a memory arena. -/// -/// This type dereferences to [`str`] and implements [`AsRef`] and -/// [`core::borrow::Borrow`] for ergonomic use with APIs expecting string -/// slices. -/// -/// The string borrows the arena and cannot outlive it. Dropping the arena -/// frees its memory without running `Drop` for the string contents. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] +/// Version of [`String`] whose backing storage lives in the arena. +#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct ArenaString<'arena>(collections::String<'arena>); impl<'arena> ArenaString<'arena> { /// Allocates a copy of `string` in `arena` and returns an [`ArenaString`]. #[must_use] - pub fn from_str_in(string: &str, arena: &'arena Arena) -> Self { - Self(collections::String::from_str_in(string, &arena.bump)) + pub fn from_str_in(text: &str, arena: &'arena Arena) -> Self { + Self(collections::String::from_str_in(text, &arena.bump)) } } -impl<'arena> Deref for ArenaString<'arena> { +impl Deref for ArenaString<'_> { type Target = str; fn deref(&self) -> &Self::Target { @@ -258,19 +247,19 @@ impl<'arena> Deref for ArenaString<'arena> { } } -impl<'arena> AsRef for ArenaString<'arena> { +impl AsRef for ArenaString<'_> { fn as_ref(&self) -> &str { &self.0 } } -impl<'arena> core::borrow::Borrow for ArenaString<'arena> { +impl Borrow for ArenaString<'_> { fn borrow(&self) -> &str { &self.0 } } -impl<'arena> Display for ArenaString<'arena> { +impl Display for ArenaString<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { Display::fmt(&self.0, f) } diff --git a/rottlib/src/ast.rs b/rottlib/src/ast.rs deleted file mode 100644 index 4a06f9d..0000000 --- a/rottlib/src/ast.rs +++ /dev/null @@ -1,387 +0,0 @@ -use crate::arena::ArenaVec; - -use super::lexer::TokenLocation; - -use core::fmt; - -use crate::arena::{Arena, ArenaNode, ArenaString}; - -// All inclusive! -#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] -pub struct AstSpan { - pub from: TokenLocation, - pub to: TokenLocation, -} - -impl AstSpan { - pub fn merge(left_span: &AstSpan, right_span: &AstSpan) -> AstSpan { - AstSpan { - from: left_span.from, - to: right_span.to, - } - } - - pub fn new(single_location: TokenLocation) -> AstSpan { - AstSpan { - from: single_location, - to: single_location, - } - } - - pub fn range(from: TokenLocation, to: TokenLocation) -> AstSpan { - AstSpan { from, to } - } - - pub fn extend_to(&mut self, right_most_location: TokenLocation) { - if right_most_location > self.to { - self.to = right_most_location - } - } -} - -#[derive(Clone, Copy, Debug)] -pub enum PrefixOperator { - Not, - Minus, - BitwiseNot, - Increment, - Decrement, -} - -#[derive(Clone, Copy, Debug)] -pub enum PostfixOperator { - Increment, - Decrement, -} - -#[derive(Clone, Copy, Debug)] -pub enum InfixOperator { - // Assignments - Assign, - MultiplyAssign, - DivideAssign, - ModuloAssign, - PlusAssign, - MinusAssign, - ConcatAssign, - ConcatSpaceAssign, - // String operations - ConcatSpace, - Concat, - // Logical - And, - Xor, - Or, - // Bit-wise - BitwiseAnd, - BitwiseOr, - BitwiseXor, - // Not-equal - NotEqual, - // Comparison - Equal, - ApproximatelyEqual, - Less, - LessEqual, - Greater, - GreaterEqual, - ClockwiseFrom, - // Shifts - LeftShift, - LogicalRightShift, - RightShift, - // Terms - Plus, - Minus, - // Modulo - Modulo, - // Factor - Multiply, - Divide, - Dot, - Cross, - // Exponentiation - Exponentiation, -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug)] -pub enum Expression<'src, 'arena> { - Binary( - ExpressionRef<'src, 'arena>, - InfixOperator, - ExpressionRef<'src, 'arena>, - ), - LeftUnary(PrefixOperator, ExpressionRef<'src, 'arena>), - RightUnary(ExpressionRef<'src, 'arena>, PostfixOperator), - - Identifier(&'src str), - String(ArenaString<'arena>), - Integer(i128), - Float(f64), - - Bool(bool), - None, - Parentheses(ExpressionRef<'src, 'arena>), - - Block { - // All these end with `;` - statements: ArenaVec<'arena, StatementRef<'src, 'arena>>, - // Last statement, but only if it doesn't end with `;` - tail: Option>, - }, - If { - condition: ExpressionRef<'src, 'arena>, - body: ExpressionRef<'src, 'arena>, - else_body: Option>, - }, - While { - condition: ExpressionRef<'src, 'arena>, - body: ExpressionRef<'src, 'arena>, - }, - DoUntil { - condition: ExpressionRef<'src, 'arena>, - body: ExpressionRef<'src, 'arena>, - }, - ForEach { - iterator: ExpressionRef<'src, 'arena>, - body: ExpressionRef<'src, 'arena>, - }, - For { - init: Option>, - condition: Option>, - step: Option>, - body: ExpressionRef<'src, 'arena>, - }, - Switch { - selector: ExpressionRef<'src, 'arena>, - cases: ArenaVec<'arena, CaseRef<'src, 'arena>>, - // default case - default_arm: Option>>, - // last statement of the case block - tail: Option>, - }, - Goto(ArenaString<'arena>), - Continue, - Break(Option>), - Return(Option>), - // For injecting in place of parts that couldn't be parsed - // (along with text that wasn't able to be parsed) - Error, -} - -pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>; - -#[derive(Debug)] -pub struct VariableDeclarator<'src, 'arena> { - pub name: ArenaString<'arena>, - pub initializer: Option>, -} - -#[derive(Debug)] -pub struct SwitchCase<'src, 'arena> { - pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>, // UScript allows expressions; multiple labels ok - pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, // allow fallthrough unless a Break/Goto ends it -} - -pub type CaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>; - -#[derive(Debug)] -pub enum Statement<'src, 'arena> { - // For the cases where user just used too many semi-colons `;;;;` - Empty, - Expression(ExpressionRef<'src, 'arena>), - // Just declarations without assignment: - // `local int i, j, k` - LocalVariableDeclaration { - type_name: ArenaString<'arena>, - identifiers: ArenaVec<'arena, ArenaString<'arena>>, - }, - // Just `int i, j = 3, k = 0` - VariableDeclaration { - type_name: ArenaString<'arena>, - declarations: ArenaVec<'arena, VariableDeclarator<'src, 'arena>>, - }, - Label(ArenaString<'arena>), - // For injecting in place of parts that couldn't be parsed - // (along with text that wasn't able to be parsed) - Error, -} - -pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>; - -impl<'src, 'arena> Expression<'src, 'arena> { - pub fn new_prefix( - arena: &'arena Arena, - op_position: TokenLocation, - op: PrefixOperator, - rhs: ArenaNode<'arena, Self>, - ) -> ArenaNode<'arena, Self> { - let span = AstSpan { - from: op_position, - to: rhs.span().to, - }; - ArenaNode::new_in(Self::LeftUnary(op, rhs), span, arena) - } - pub fn new_postfix( - arena: &'arena Arena, - lhs: ArenaNode<'arena, Self>, - op: PostfixOperator, - op_position: TokenLocation, - ) -> ArenaNode<'arena, Self> { - let span = AstSpan { - from: lhs.span().from, - to: op_position, - }; - ArenaNode::new_in(Self::RightUnary(lhs, op), span, arena) - } - pub fn new_binary( - arena: &'arena Arena, - lhs: ArenaNode<'arena, Self>, - op: InfixOperator, - rhs: ArenaNode<'arena, Self>, - ) -> ArenaNode<'arena, Self> { - let span = AstSpan::merge(&lhs.span(), &rhs.span()); - ArenaNode::new_in(Self::Binary(lhs, op, rhs), span, arena) - } -} - -pub enum DeclarationLiteral<'src, 'arena> { - None, - Bool(bool), - Integer(i128), - Float(f64), - String(ArenaString<'arena>), - Identifier(&'src str), -} - -pub type DeclarationLiteralRef<'src, 'arena> = (DeclarationLiteral<'src, 'arena>, TokenLocation); - -/// Returns `true` for expressions that require `;` when used as a statement -/// (i.e., everything except blocky control-flow forms). -pub trait NeedsSemi { - fn needs_semicolon(&self) -> bool; -} - -impl<'src, 'arena> NeedsSemi for Expression<'src, 'arena> { - #[inline] - fn needs_semicolon(&self) -> bool { - match self { - Expression::Block { .. } - | Expression::If { .. } - | Expression::While { .. } - | Expression::DoUntil { .. } - | Expression::ForEach { .. } - | Expression::For { .. } - | Expression::Error => false, - - // All other expressions require `;` when used as a statement. - _ => true, - } - } -} - -// If `ArenaNode` derefs to `T`, this works as-is. -// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`. -impl<'src, 'arena> NeedsSemi for ExpressionRef<'src, 'arena> { - #[inline] - fn needs_semicolon(&self) -> bool { - (**self).needs_semicolon() - } -} - -impl<'src, 'arena> NeedsSemi for Statement<'src, 'arena> { - #[inline] - fn needs_semicolon(&self) -> bool { - match self { - Statement::Empty | Statement::Label { .. } | Statement::Error { .. } => false, - // All other expressions require `;` when used as a statement. - _ => true, - } - } -} - -// If `ArenaNode` derefs to `T`, this works as-is. -// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`. -impl<'src, 'arena> NeedsSemi for StatementRef<'src, 'arena> { - #[inline] - fn needs_semicolon(&self) -> bool { - (**self).needs_semicolon() - } -} - -impl fmt::Display for PrefixOperator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - PrefixOperator::Not => "!", - PrefixOperator::Minus => "-", - PrefixOperator::BitwiseNot => "~", - PrefixOperator::Increment => "++.", - PrefixOperator::Decrement => "--.", - }; - write!(f, "{s}") - } -} -impl fmt::Display for PostfixOperator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let s = match self { - PostfixOperator::Increment => ".++", - PostfixOperator::Decrement => ".--", - }; - write!(f, "{s}") - } -} -impl fmt::Display for InfixOperator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - use InfixOperator::*; - let s = match self { - // Assignments - Assign => "=", - MultiplyAssign => "*=", - DivideAssign => "/=", - ModuloAssign => "%=", - PlusAssign => "+=", - MinusAssign => "-=", - ConcatAssign => "$=", - ConcatSpaceAssign => "@=", - // String operations - ConcatSpace => "@", - Concat => "$", - // Logical - And => "&&", - Xor => "^^", - Or => "||", - // Bitwise - BitwiseAnd => "&", - BitwiseOr => "|", - BitwiseXor => "^", - // Not equal - NotEqual => "!=", - // Comparison - Equal => "==", - ApproximatelyEqual => "~+", - Less => "<", - LessEqual => "<=", - Greater => ">", - GreaterEqual => ">=", - ClockwiseFrom => "ClockwiseFrom", - // Shift - LeftShift => "<<", - LogicalRightShift => ">>>", - RightShift => ">>", - // Term - Plus => "+", - Minus => "-", - // Modulo - Modulo => "%", - // Factor - Multiply => "*", - Divide => "/", - Dot => "Dot", - Cross => "Cross", - // Exp - Exponentiation => "**", - }; - write!(f, "{s}") - } -} diff --git a/rottlib/src/ast/callables.rs b/rottlib/src/ast/callables.rs new file mode 100644 index 0000000..6251c56 --- /dev/null +++ b/rottlib/src/ast/callables.rs @@ -0,0 +1,235 @@ +//! Callable-declaration AST nodes. +//! +//! This module defines function-like declarations together with their +//! parameter lists and callable modifiers. +//! +//! The language groups several callable forms under a largely shared header +//! structure, including ordinary functions, events, delegates, and operator +//! declarations. This module preserves those forms as AST nodes together with +//! source-relevant modifier and parameter information. + +use super::{ + AstSpan, BlockBody, ExpressionRef, IdentifierToken, InfixOperatorName, PostfixOperatorName, + PrefixOperatorName, TypeSpecifierRef, +}; +use crate::arena::ArenaVec; +use crate::lexer::{Keyword, TokenPosition}; + +use crate::arena::ArenaNode; + +use core::convert::TryFrom; + +/// Parameter modifier kind. +/// +/// These modifiers apply to a single callable parameter and are preserved in +/// source order on the parameter node. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ParameterModifierKind { + Optional, + Out, + Skip, + Coerce, +} + +/// Parameter modifier together with the source position of its token. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ParameterModifier { + pub kind: ParameterModifierKind, + pub position: TokenPosition, +} + +/// One callable parameter declaration. +#[derive(Debug, PartialEq)] +pub struct Parameter<'src, 'arena> { + /// Parameter modifiers in source order. + pub modifiers: ArenaVec<'arena, ParameterModifier>, + /// Declared parameter type. + pub type_specifier: TypeSpecifierRef<'src, 'arena>, + /// Declared parameter name. + pub name: IdentifierToken, + /// Optional array-size expression from `[expr]`. + pub array_size: Option>, + /// Optional default-value expression after `=`. + pub default_value: Option>, +} + +/// Stable arena reference to a parameter node. +pub type ParameterRef<'src, 'arena> = ArenaNode<'arena, Parameter<'src, 'arena>>; + +/// Syntactic callable declaration kind. +/// +/// This enum distinguishes ordinary callable declarations from operator +/// declarations and preserves operator fixity / precedence where applicable. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum CallableKind { + /// Ordinary function declaration: `function`. + Function, + /// Event declaration: `event`. + Event, + /// Delegate declaration: `delegate`. + Delegate, + /// Prefix operator declaration: `preoperator`. + PrefixOperator, + /// Infix operator declaration: `operator()`. + /// + /// Precedence can be skipped as all supported operators already have + /// built-in precedence value that can't actually be changed in + /// `UnrealScript`. So omitting precedence when redefining operators is + /// a better approach. + InfixOperator(Option), + /// Postfix operator declaration: `postoperator`. + PostfixOperator, +} + +impl TryFrom for CallableKind { + type Error = (); + + /// Converts a keyword into a [`CallableKind`] when the callable form + /// is fully determined by the keyword alone. + /// + /// Returns `Err(())` for keywords that either do not represent callable + /// declarations or require additional syntax to determine the final kind + /// (for example `operator()`). + fn try_from(keyword: Keyword) -> Result { + let kind = match keyword { + Keyword::Function => Self::Function, + Keyword::Event => Self::Event, + Keyword::Delegate => Self::Delegate, + Keyword::PreOperator => Self::PrefixOperator, + Keyword::PostOperator => Self::PostfixOperator, + _ => return Err(()), + }; + Ok(kind) + } +} + +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum CallableName { + Identifier(IdentifierToken), + PrefixOperator(PrefixOperatorName), + InfixOperator(InfixOperatorName), + PostfixOperator(PostfixOperatorName), +} + +/// Callable definition. +/// +/// This node represents the common syntactic shape shared by function-like +/// declarations, including ordinary functions, events, delegates, and +/// operator forms. +#[derive(Debug, PartialEq)] +pub struct CallableDefinition<'src, 'arena> { + /// Declared callable name. + pub name: CallableName, + /// Callable declaration form. + pub kind: CallableKind, + /// Optional return type. + /// + /// Some callable forms may omit a return type entirely. + pub return_type_specifier: Option>, + /// Declaration modifiers attached to the callable header. + pub modifiers: ArenaVec<'arena, CallableModifier>, + /// Formal parameters in source order. + pub parameters: ArenaVec<'arena, ParameterRef<'src, 'arena>>, + /// Optional callable body. + /// + /// `None` represents a header-only declaration terminated by `;`. + /// `Some(...)` stores the parsed block statements belonging to the body. + pub body: Option>, +} + +/// Stable arena reference to a callable definition node. +pub type CallableDefinitionRef<'src, 'arena> = ArenaNode<'arena, CallableDefinition<'src, 'arena>>; + +/// Callable declaration modifier kind. +/// +/// These modifiers apply to the callable declaration itself rather than to an +/// individual parameter. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum CallableModifierKind { + Final, + /// `native` or `native()` + Native(Option), + Abstract, + Transient, + Public, + Protected, + Private, + Static, + /// `config()` + Config(IdentifierToken), + Const, + Deprecated, + NoExport, + Export, + Simulated, + Latent, + Iterator, + Singular, + Exec, + Reliable, + Unreliable, + NativeReplication, +} + +impl TryFrom for CallableModifierKind { + type Error = (); + + /// Converts a keyword into a [`CallableModifierKind`] when the modifier + /// is fully determined by the keyword alone. + /// + /// Returns `Err(())` for keywords that either do not represent callable + /// modifiers or require additional syntax + /// (e.g. `native(...)`, `config(...)`). + #[allow(clippy::enum_glob_use)] + fn try_from(keyword: Keyword) -> Result { + use CallableModifierKind::*; + + let kind = match keyword { + Keyword::Final => Final, + Keyword::Abstract => Abstract, + Keyword::Transient => Transient, + Keyword::Public => Public, + Keyword::Protected => Protected, + Keyword::Private => Private, + Keyword::Static => Static, + Keyword::Const => Const, + Keyword::Deprecated => Deprecated, + Keyword::NoExport => NoExport, + Keyword::Export => Export, + Keyword::Simulated => Simulated, + Keyword::Latent => Latent, + Keyword::Iterator => Iterator, + Keyword::Singular => Singular, + Keyword::Exec => Exec, + Keyword::Reliable => Reliable, + Keyword::Unreliable => Unreliable, + Keyword::NativeReplication => NativeReplication, + _ => return Err(()), + }; + Ok(kind) + } +} + +/// Callable modifier together with its full source span. +/// +/// A modifier may occupy more than one token in source, for example when it +/// carries an argument like `native(12)` or `config(System)`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct CallableModifier { + /// Modifier kind. + pub kind: CallableModifierKind, + /// Span covering the full modifier syntax. + pub span: AstSpan, +} + +impl Keyword { + #[must_use] + pub fn is_callable_modifier(self) -> bool { + matches!(self, Self::Native | Self::Config) || CallableModifierKind::try_from(self).is_ok() + } + + #[must_use] + pub fn is_callable_kind_keyword(self) -> bool { + matches!(self, Self::Operator) || CallableKind::try_from(self).is_ok() + } +} diff --git a/rottlib/src/ast/expressions.rs b/rottlib/src/ast/expressions.rs new file mode 100644 index 0000000..998c77e --- /dev/null +++ b/rottlib/src/ast/expressions.rs @@ -0,0 +1,290 @@ +//! Expression AST nodes. +//! +//! This module defines ordinary expressions together with expression-shaped +//! control-flow and block forms parsed by the language. +use super::{ + AstSpan, IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator, + QualifiedIdentifierRef, StatementRef, +}; +use crate::arena::ArenaVec; + +use super::super::lexer::TokenPosition; + +use crate::arena::{Arena, ArenaNode, ArenaString}; + +/// Expression node used for both ordinary expressions and expression-shaped +/// statement/control-flow forms. +/// +/// This AST is intentionally broad: besides operators and literals, it also +/// includes blocks and control-flow constructs that syntactically occupy +/// expression parsing positions in the language. +#[allow(clippy::large_enum_variant)] +#[derive(Debug, PartialEq)] +pub enum Expression<'src, 'arena> { + /// Plain identifier expression. + Identifier(IdentifierToken), + /// String literal. + /// + /// The contents stored in arena memory are transformed (unescaped) version + /// of raw strings from the source. + String(ArenaString<'arena>), + /// Integer literal. + Integer(u128), + /// Floating-point literal. + Float(f64), + /// Boolean literal. + Bool(bool), + /// `None` literal / null-like language value. + None, + /// Explicit parenthesized subexpression: `(expr)`. + /// + /// Parentheses are preserved as a node instead of being discarded so later + /// stages can retain grouping information for diagnostics, formatting, or + /// source-faithful reconstruction. + Parentheses(ExpressionRef<'src, 'arena>), + /// Class-type reference parsed as a qualified identifier path. + /// + /// This is used for class-like type mentions that are not represented as a + /// tagged name literal. + ClassType(QualifiedIdentifierRef<'arena>), + /// Tagged or untagged quoted name literal. + /// + /// Examples: + /// - `class'Foo'` + /// - `Texture'Pkg.Group.Name'` + /// - `'Pkg.Group.Name'` if the grammar permits an untagged form + /// + /// `tag` stores the leading identifier token when present. `name` is the + /// raw content between quotes and is preserved exactly as written. + NameLiteral { + tag: Option, + name: &'src str, + }, + /// Indexing operation: `target[index]`. + /// + /// This is produced after postfix parsing and binds tighter than any infix + /// operator. + Index { + target: ExpressionRef<'src, 'arena>, + index: ExpressionRef<'src, 'arena>, + }, + /// Member access: `target.name`. + /// + /// The member name is stored as a token reference rather than an owned + /// string so later stages can resolve exact spelling and source location + /// from the lexer/token stream. + Member { + target: ExpressionRef<'src, 'arena>, + name: IdentifierToken, + }, + /// Call expression: `callee(arg1, arg2, ...)`. + /// + /// Arguments are stored as `Option` to preserve omitted + /// arguments in syntaxes that allow empty slots. + Call { + callee: ExpressionRef<'src, 'arena>, + arguments: ArenaVec<'arena, Option>>, + }, + /// Prefix unary operator application: `op rhs`. + PrefixUnary(PrefixOperator, ExpressionRef<'src, 'arena>), + /// Postfix unary operator application: `lhs op`. + PostfixUnary(ExpressionRef<'src, 'arena>, PostfixOperator), + /// Binary operator application: `lhs op rhs`. + Binary( + ExpressionRef<'src, 'arena>, + InfixOperator, + ExpressionRef<'src, 'arena>, + ), + /// Block expression / statement block: `{ ... }`. + /// + /// The contained statements are preserved in source order. + Block(StatementList<'src, 'arena>), + /// Conditional expression / statement. + /// + /// Both arms use `BranchBody` so the parser can preserve legacy one-line + /// bodies, optional trailing semicolons, and recovery anchors. + If { + condition: ExpressionRef<'src, 'arena>, + body: BranchBody<'src, 'arena>, + else_body: Option>, + }, + /// `while (condition) body` + While { + condition: ExpressionRef<'src, 'arena>, + body: BranchBody<'src, 'arena>, + }, + /// `do body until (condition)` + DoUntil { + condition: ExpressionRef<'src, 'arena>, + body: BranchBody<'src, 'arena>, + }, + /// `foreach iterator body` + /// + /// The iteration source / iterator expression is stored as a normal + /// expression node because the language permits nontrivial syntax there. + ForEach { + iterated_expression: ExpressionRef<'src, 'arena>, + body: BranchBody<'src, 'arena>, + }, + /// Traditional three-part `for` loop. + /// + /// Each header component is optional to support forms such as: + /// - `for (;;)` + /// - `for (init;;)` + /// - `for (;cond;)` + /// - `for (;;step)` + For { + initialization: Option>, + condition: Option>, + step: Option>, + body: BranchBody<'src, 'arena>, + }, + /// `switch` construct. + /// + /// `cases` contains all explicit case arms in source order. + /// `default_arm` stores the statements of the default branch, if present. + Switch { + selector: ExpressionRef<'src, 'arena>, + cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>, + default_arm: Option>>, + }, + /// `goto` statement. + /// + /// Stores the token position of the target token rather than duplicating + /// its textual representation in the AST. On successful parsing refers to + /// either identifier or name literal. + Goto(TokenPosition), + /// `continue` statement. + Continue, + /// `break` statement, optionally with an attached expression if the + /// language form allows one. + Break(Option>), + /// `return` statement, optionally carrying a returned expression. + Return(Option>), + /// Object construction / allocation form using the language's `new` syntax. + /// + /// The first three arguments are optional positional control arguments. + /// `class_specifier` is the required class expression that identifies what + /// should be constructed. + New { + outer_argument: Option>, + name_argument: Option>, + flags_argument: Option>, + class_specifier: ExpressionRef<'src, 'arena>, + }, + /// Recovery placeholder inserted when an expression could not be parsed. + /// + /// This allows the parser to continue building a larger AST and report more + /// than one error in a single pass. + Error, +} + +/// Statements contained in a `{ ... }` block. +pub type StatementList<'src, 'arena> = ArenaVec<'arena, StatementRef<'src, 'arena>>; + +/// Statements contained in a `{ ... }` block with a span. +#[derive(Debug, PartialEq)] +pub struct BlockBody<'src, 'arena> { + pub statements: StatementList<'src, 'arena>, + pub span: AstSpan, +} + +/// Stable arena reference to an expression node. +pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>; + +/// Optional expression payload used in grammar positions where an expression +/// may be omitted entirely. +pub type OptionalExpression<'src, 'arena> = Option>; + +/// Body of a control-flow branch. +/// +/// Branch bodies are stored separately so constructs such as `if`, `while`, +/// and `for` can preserve both the parsed body and branch-specific source +/// details. +#[derive(Debug, PartialEq)] +pub struct BranchBody<'src, 'arena> { + /// Parsed branch payload. + /// + /// This is `None` when the body is absent or could not be parsed in a + /// recoverable way. + pub expression: Option>, + + /// Optional semicolon that appears immediately after a non-block branch + /// body in legacy constructs such as `if`, `for`, `while`, etc. + /// + /// This is intentionally preserved rather than normalized away so later + /// stages can diagnose or reproduce source structure more precisely. + pub semicolon_position: Option, + + /// Token position that can be used as a fallback end anchor for spans and + /// diagnostics when the body itself is missing. + /// + /// In malformed constructs this may be the only reliable location attached + /// to the branch. + pub end_anchor_token_position: TokenPosition, +} + +/// One `case` arm inside a `switch`. +/// +/// UnrealScript-style syntax allows each arm to have multiple labels and uses +/// statement lists as bodies, with fallthrough being possible unless control +/// flow terminates explicitly. +#[derive(Debug, PartialEq)] +pub struct SwitchCase<'src, 'arena> { + /// Case labels associated with this arm. + /// + /// Labels are stored as expressions because the language allows + /// expression-valued labels rather than only simple constants. + pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>, + + /// Statements belonging to the arm body. + pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, +} + +/// Stable arena reference to a `switch` case arm. +pub type SwitchCaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>; + +impl<'arena> Expression<'_, 'arena> { + /// Construct a binary expression and assign it a span from `left_hand_side` + /// through `right_hand_side`. + #[must_use] + pub fn new_binary( + arena: &'arena Arena, + left_hand_side: ArenaNode<'arena, Self>, + op: InfixOperator, + right_hand_side: ArenaNode<'arena, Self>, + ) -> ArenaNode<'arena, Self> { + let span = AstSpan::merge(left_hand_side.span(), right_hand_side.span()); + ArenaNode::new_in( + Self::Binary(left_hand_side, op, right_hand_side), + span, + arena, + ) + } + + /// Construct a prefix unary expression and assign it a span from the + /// operator token through the end of `right_hand_side`. + #[must_use] + pub fn new_prefix( + arena: &'arena Arena, + operation_position: TokenPosition, + operation: PrefixOperator, + right_hand_side: ArenaNode<'arena, Self>, + ) -> ArenaNode<'arena, Self> { + let span = AstSpan::range(operation_position, right_hand_side.span().token_to); + ArenaNode::new_in(Self::PrefixUnary(operation, right_hand_side), span, arena) + } + + /// Construct a postfix unary expression and assign it a span from the start + /// of `left_hand_side` through the operator token. + #[must_use] + pub fn new_postfix( + arena: &'arena Arena, + left_hand_side: ArenaNode<'arena, Self>, + operation: PostfixOperator, + operation_position: TokenPosition, + ) -> ArenaNode<'arena, Self> { + let span = AstSpan::range(left_hand_side.span().token_from, operation_position); + ArenaNode::new_in(Self::PostfixUnary(left_hand_side, operation), span, arena) + } +} diff --git a/rottlib/src/ast/mod.rs b/rottlib/src/ast/mod.rs new file mode 100644 index 0000000..4fba79d --- /dev/null +++ b/rottlib/src/ast/mod.rs @@ -0,0 +1,343 @@ +// `;` are encoded in spans of statement nodes as very last token +// Need to do a proper check to figure out what should and shouldn't be a node +use crate::arena::ArenaVec; + +use super::lexer::TokenPosition; + +use crate::arena::{Arena, ArenaNode, ArenaString}; + +pub mod callables; +pub mod expressions; +pub mod operators; +pub mod types; + +pub use callables::*; +pub use expressions::*; +pub use operators::*; +pub use types::*; + +// Get rid of identifier field +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub struct IdentifierToken(pub TokenPosition); + +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub struct OperatorToken(pub TokenPosition); + +#[derive(Debug, Hash, PartialEq, Eq)] +pub struct QualifiedIdentifier<'arena> { + pub head: IdentifierToken, + pub tail: Option>, // None => single segment +} +pub type QualifiedIdentifierRef<'arena> = ArenaNode<'arena, QualifiedIdentifier<'arena>>; + +// All inclusive! +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub struct AstSpan { + pub token_from: TokenPosition, + pub token_to: TokenPosition, +} + +impl AstSpan { + // -------- existing coord-based API (unchanged externally) -------- + + #[must_use] + pub const fn merge(left_span: &Self, right_span: &Self) -> Self { + Self { + // assumes both were constructed in the same style; good enough for the refactor + token_from: left_span.token_from, + token_to: right_span.token_to, + } + } + + // -------- NEW: 4 constructors based on TokenIndex -------- + + /// Single-token span from an index (coords are dummy for now). + #[inline] + #[must_use] + pub const fn new(single_index: TokenPosition) -> Self { + Self { + token_from: single_index, + token_to: single_index, + } + } + + /// Span from two indices (coords are dummy for now). + #[inline] + #[must_use] + pub const fn range(from: TokenPosition, to: TokenPosition) -> Self { + Self { + token_from: from, + token_to: to, + } + } + + /// Immutable extension by index (keeps coords as-is). + #[inline] + #[must_use] + pub fn extended(&self, right_most_index: TokenPosition) -> Self { + Self { + token_from: self.token_from, + token_to: std::cmp::max(self.token_to, right_most_index), + } + } + + /// In-place extension by index (coords unchanged). + #[inline] + pub fn extend_to(&mut self, right_most_index: TokenPosition) { + if right_most_index > self.token_to { + self.token_to = right_most_index; + } + } +} + +impl<'arena> QualifiedIdentifier<'arena> { + #[inline] + #[must_use] + pub const fn is_single(&self) -> bool { + self.tail.is_none() + } + + #[inline] + #[allow(clippy::len_without_is_empty)] // Suppress useless suggestion for `is_empty()` + #[must_use] + pub fn len(&self) -> usize { + 1 + self.tail.as_ref().map_or(0, |v| v.len()) + } + + #[inline] + #[must_use] + pub const fn head(&self) -> IdentifierToken { + self.head + } + + /// Iterates all identifier segments in order without allocating. + pub fn iter(&self) -> impl Iterator + '_ { + core::iter::once(self.head).chain(self.tail.iter().flat_map(|v| v.iter().copied())) + } + + /// Cheap constructor from a single identifier. No Vec allocated. + pub fn from_ident(arena: &'arena Arena, id: IdentifierToken) -> QualifiedIdentifierRef<'arena> { + let span = AstSpan::new(id.0); + ArenaNode::new_in( + Self { + head: id, + tail: None, + }, + span, + arena, + ) + } + /// Cheap constructor from a single identifier. No Vec allocated. + pub fn from_position( + arena: &'arena Arena, + position: TokenPosition, + ) -> QualifiedIdentifierRef<'arena> { + let span = AstSpan::new(position); + ArenaNode::new_in( + Self { + head: IdentifierToken(position), + tail: None, + }, + span, + arena, + ) + } +} + +#[derive(Debug, PartialEq)] +pub enum Statement<'src, 'arena> { + // For the cases where user just used too many semi-colons `;;;;` + Empty, + Expression(ExpressionRef<'src, 'arena>), + // Just declarations without assignment: + // `local int i, j, k` + LocalVariableDeclaration { + type_spec: TypeSpecifierRef<'src, 'arena>, + declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // CHANGED + }, + Label(ArenaString<'arena>), + /// Nested function definitions inside blocks or states. + Function(CallableDefinitionRef<'src, 'arena>), + // For injecting in place of parts that couldn't be parsed + Error, +} +pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>; + +#[derive(Debug)] +pub enum DeclarationLiteral<'src, 'arena> { + None, + Bool(bool), + Integer(i128), + Float(f64), + String(ArenaString<'arena>), + Identifier(&'src str), + TaggedName { + tag: IdentifierToken, + quoted: ArenaString<'arena>, + }, // NEW +} + +#[derive(Debug)] +pub struct DeclarationLiteralRef<'src, 'arena> { + pub literal: DeclarationLiteral<'src, 'arena>, + pub position: TokenPosition, +} + +impl IdentifierToken { + #[must_use] + pub const fn span(self) -> AstSpan { + AstSpan::new(self.0) + } +} + +pub enum ClassModifier<'arena> { + Final, + Native, + Abstract, + Transient, + Public, + Protected, + Private, + Static, + Config(Option), + NativeReplication, + ExportStructs, + SafeReplace, + + Const, + Deprecated, + NoExport, + Export, + + Localized, + Placeable, + NotPlaceable, + Instanced, + EditConst, + EditInline, + EditInlineNew, + NotEditInlineNew, + CollapseCategories, + DontCollapseCategories, + HideCategories(ArenaVec<'arena, IdentifierToken>), + ShowCategories(ArenaVec<'arena, IdentifierToken>), + Within(IdentifierToken), + DependsOn(IdentifierToken), + GlobalConfig, + PerObjectConfig, + DynamicRecompile, + HideDropdown, + ParseConfig, + CacheExempt, +} + +pub type ClassModifierRef<'arena> = ArenaNode<'arena, ClassModifier<'arena>>; + +pub struct ClassDeclaration<'arena> { + pub name: IdentifierToken, + pub parent: Option, + pub modifiers: Vec>, +} + +// --- in ast.rs --- + +#[derive(Debug)] +pub struct ClassVarDecl<'src, 'arena> { + /// var(<...>) e.g. var(Display, "Advanced") + /// Each item is an `ArenaNode`, so token locations are preserved. + pub paren_specs: Option>>, + + /// variable modifiers like public/protected/private/static/const/... + /// Each modifier is an `ArenaNode` capturing its span; order preserved. + pub modifiers: ArenaVec<'arena, VarModifier>, + + pub type_spec: TypeSpecifierRef<'src, 'arena>, // Named/InlineEnum/InlineStruct + pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // a, b=expr + pub span: AstSpan, +} +pub type ClassVarDeclRef<'src, 'arena> = ArenaNode<'arena, ClassVarDecl<'src, 'arena>>; + +#[derive(Debug)] +pub struct ClassConstDecl<'src, 'arena> { + pub name: IdentifierToken, + pub value: DeclarationLiteralRef<'src, 'arena>, + pub span: AstSpan, +} +pub type ClassConstDeclRef<'src, 'arena> = ArenaNode<'arena, ClassConstDecl<'src, 'arena>>; + +pub enum ClassMember<'src, 'arena> +where + 'src: 'arena, +{ + Function(CallableDefinitionRef<'src, 'arena>), + TypeDefEnum(EnumDefRef<'src, 'arena>), + TypeDefStruct(StructDefRef<'src, 'arena>), + Var(ClassVarDeclRef<'src, 'arena>), + + Replication(ReplicationBlockRef<'src, 'arena>), + State(StateDeclRef<'src, 'arena>), + Const(ClassConstDeclRef<'src, 'arena>), + Exec(ExecDirectiveRef<'arena>), +} + +pub type ClassMemberRef<'src, 'arena> = ArenaNode<'arena, ClassMember<'src, 'arena>>; + +#[derive(Clone, Copy, Debug)] +pub enum Reliability { + Reliable, + Unreliable, +} + +#[derive(Debug)] +pub struct ReplicationRule<'src, 'arena> { + pub reliability: Reliability, // reliable|unreliable + pub condition: Option>, // if () or None + pub members: ArenaVec<'arena, IdentifierToken>, // a, b, Foo() + pub span: AstSpan, +} +pub type ReplicationRuleRef<'src, 'arena> = ArenaNode<'arena, ReplicationRule<'src, 'arena>>; + +#[derive(Debug)] +pub struct ReplicationBlock<'src, 'arena> { + pub rules: ArenaVec<'arena, ReplicationRuleRef<'src, 'arena>>, + pub span: AstSpan, +} +pub type ReplicationBlockRef<'src, 'arena> = ArenaNode<'arena, ReplicationBlock<'src, 'arena>>; + +// ---------- States ---------- + +#[derive(Clone, Copy, Debug)] +pub enum StateModifier { + Auto, // 'auto' + Simulated, // 'simulated' +} + +#[derive(Debug)] +pub struct StateDecl<'src, 'arena> { + pub name: IdentifierToken, + pub parent: Option, // 'extends BaseState' + pub modifiers: ArenaVec<'arena, StateModifier>, // auto, simulated + pub ignores: Option>, // 'ignores Foo, Bar;' + /// Body: ordinary statements plus nested function definitions (see `Statement::Function`). + pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, + pub span: AstSpan, +} +pub type StateDeclRef<'src, 'arena> = ArenaNode<'arena, StateDecl<'src, 'arena>>; + +// NEW: exec directive node +#[derive(Debug)] +pub struct ExecDirective<'arena> { + pub text: ArenaString<'arena>, // full line without trailing newline(s) + pub span: AstSpan, +} +pub type ExecDirectiveRef<'arena> = ArenaNode<'arena, ExecDirective<'arena>>; + +/// Keep your existing `ClassDeclaration` as the header. +/// Optionally: `pub type ClassHeader<'src, 'arena> = ClassDeclaration<'src, 'arena>;` +pub struct ClassDefinition<'src, 'arena> +where + 'src: 'arena, +{ + pub header: ClassDeclaration<'arena>, // or ClassHeader if you rename + pub members: ArenaVec<'arena, ClassMemberRef<'src, 'arena>>, +} diff --git a/rottlib/src/ast/operators.rs b/rottlib/src/ast/operators.rs new file mode 100644 index 0000000..9c24674 --- /dev/null +++ b/rottlib/src/ast/operators.rs @@ -0,0 +1,268 @@ +//! Operator AST nodes. +//! +//! This module defines the prefix, postfix, and infix operator kinds used by +//! expression AST nodes. +//! +//! The enums here represent only the *syntactic operator category* recorded in +//! the AST. They do not encode precedence, associativity, overload behavior, +//! or token spelling details beyond the normalized operator kind itself. +//! Those concerns are handled by the expression parser and precedence tables. + +use crate::lexer::{Keyword, Token, TokenPosition}; + +use core::convert::TryFrom; + +/// Prefix unary operators. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum PrefixOperator { + /// Logical negation: `!expr`. + Not, + /// Arithmetic negation: `-expr`. + Minus, + /// Unary plus: `+expr`. + Plus, + /// Bitwise negation: `~expr`. + BitwiseNot, + /// Prefix increment: `++expr`. + Increment, + /// Prefix decrement: `--expr`. + Decrement, +} + +/// Postfix unary operators. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum PostfixOperator { + /// Postfix increment: `expr++`. + Increment, + /// Postfix decrement: `expr--`. + Decrement, +} + +/// Binary / infix operators. +/// +/// These operators appear between left-hand side and right-hand side operands. +/// This enum stores only the normalized AST-level operator kind. +/// +/// The parser assigns precedence and associativity separately. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum InfixOperator { + /// Simple assignment: `left_hand_side = right_hand_side`. + Assign, + /// Multiplicative assignment: `left_hand_side *= right_hand_side`. + MultiplyAssign, + /// Division assignment: `left_hand_side /= right_hand_side`. + DivideAssign, + /// Modulo assignment: `left_hand_side %= right_hand_side`. + ModuloAssign, + /// Additive assignment: `left_hand_side += right_hand_side`. + PlusAssign, + /// Subtractive assignment: `left_hand_side -= right_hand_side`. + MinusAssign, + /// String concatenation assignment: `left_hand_side $= right_hand_side`. + ConcatAssign, + /// Space-concatenation assignment: `left_hand_side @= right_hand_side`. + ConcatSpaceAssign, + + /// String concatenation without inserted whitespace: + /// `left_hand_side $ right_hand_side`. + Concat, + /// String concatenation with an inserted space: + /// `left_hand_side @ right_hand_side`. + ConcatSpace, + + /// Logical conjunction: `left_hand_side && right_hand_side`. + And, + /// Logical exclusive-or: `left_hand_side ^^ right_hand_side`. + Xor, + /// Logical disjunction: `left_hand_side || right_hand_side`. + Or, + + /// Bitwise AND: `left_hand_side & right_hand_side`. + BitwiseAnd, + /// Bitwise OR: `left_hand_side | right_hand_side`. + BitwiseOr, + /// Bitwise XOR: `left_hand_side ^ right_hand_side`. + BitwiseXor, + + /// Inequality test: `left_hand_side != right_hand_side`. + NotEqual, + /// Equality test: `left_hand_side == right_hand_side`. + Equal, + /// Approximate equality test: `left_hand_side ~= right_hand_side`. + ApproximatelyEqual, + /// Less-than comparison: `left_hand_side < right_hand_side`. + Less, + /// Less-than-or-equal comparison: `left_hand_side <= right_hand_side`. + LessEqual, + /// Greater-than comparison: `left_hand_side > right_hand_side`. + Greater, + /// Greater-than-or-equal comparison: `left_hand_side >= right_hand_side`. + GreaterEqual, + /// UnrealScript-specific directional comparison: + /// `left_hand_side ClockwiseFrom right_hand_side`. + ClockwiseFrom, + + /// Left shift: `left_hand_side << right_hand_side`. + LeftShift, + /// Logical right shift: `left_hand_side >>> right_hand_side`. + LogicalRightShift, + /// Arithmetic / ordinary right shift: `left_hand_side >> right_hand_side`. + RightShift, + + /// Addition: `left_hand_side + right_hand_side`. + Plus, + /// Subtraction: `left_hand_side - right_hand_side`. + Minus, + + /// Remainder / modulo: `left_hand_side % right_hand_side`. + Modulo, + /// Multiplication: `left_hand_side * right_hand_side`. + Multiply, + /// Division: `left_hand_side / right_hand_side`. + Divide, + + /// Dot product: `left_hand_side Dot right_hand_side`. + /// + /// This is spelled as a keyword-level operator in source. + Dot, + /// Cross product: `left_hand_side Cross right_hand_side`. + /// + /// This is spelled as a keyword-level operator in source. + Cross, + + /// Exponentiation: `left_hand_side ** right_hand_side`. + Exponentiation, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct PrefixOperatorName { + pub kind: PrefixOperator, + pub position: TokenPosition, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct InfixOperatorName { + pub kind: InfixOperator, + pub position: TokenPosition, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct PostfixOperatorName { + pub kind: PostfixOperator, + pub position: TokenPosition, +} + +impl TryFrom for PostfixOperator { + type Error = (); + + fn try_from(token: Token) -> Result { + use PostfixOperator::{Decrement, Increment}; + + match token { + Token::Increment => Ok(Increment), + Token::Decrement => Ok(Decrement), + _ => Err(()), + } + } +} + +impl TryFrom for PrefixOperator { + type Error = (); + + fn try_from(token: Token) -> Result { + use PrefixOperator::{BitwiseNot, Decrement, Increment, Minus, Not, Plus}; + + match token { + Token::Not => Ok(Not), + Token::Minus => Ok(Minus), + Token::Plus => Ok(Plus), + Token::BitwiseNot => Ok(BitwiseNot), + Token::Increment => Ok(Increment), + Token::Decrement => Ok(Decrement), + _ => Err(()), + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) struct InfixOperatorInfo { + pub operator: InfixOperator, + pub right_precedence_rank: u8, +} + +pub(crate) const fn infix_operator_info(token: Token) -> Option { + use InfixOperator::{ + And, ApproximatelyEqual, Assign, BitwiseAnd, BitwiseOr, BitwiseXor, ClockwiseFrom, Concat, + ConcatAssign, ConcatSpace, ConcatSpaceAssign, Cross, Divide, DivideAssign, Dot, Equal, + Exponentiation, Greater, GreaterEqual, LeftShift, Less, LessEqual, LogicalRightShift, + Minus, MinusAssign, Modulo, ModuloAssign, Multiply, MultiplyAssign, NotEqual, Or, Plus, + PlusAssign, RightShift, Xor, + }; + + let (precedence_rank, operator) = match token { + Token::Exponentiation => (12, Exponentiation), + + Token::Multiply => (16, Multiply), + Token::Divide => (16, Divide), + Token::Keyword(Keyword::Cross) => (16, Cross), + Token::Keyword(Keyword::Dot) => (16, Dot), + + Token::Modulo => (18, Modulo), + + Token::Plus => (20, Plus), + Token::Minus => (20, Minus), + + Token::LeftShift => (22, LeftShift), + Token::RightShift => (22, RightShift), + Token::LogicalRightShift => (22, LogicalRightShift), + + Token::Less => (24, Less), + Token::LessEqual => (24, LessEqual), + Token::Greater => (24, Greater), + Token::GreaterEqual => (24, GreaterEqual), + Token::Equal => (24, Equal), + Token::ApproximatelyEqual => (24, ApproximatelyEqual), + Token::Keyword(Keyword::ClockwiseFrom) => (24, ClockwiseFrom), + + Token::NotEqual => (26, NotEqual), + + Token::BitwiseAnd => (28, BitwiseAnd), + Token::BitwiseXor => (28, BitwiseXor), + Token::BitwiseOr => (28, BitwiseOr), + + Token::LogicalAnd => (30, And), + Token::LogicalXor => (30, Xor), + + Token::LogicalOr => (32, Or), + + Token::MultiplyAssign => (34, MultiplyAssign), + Token::DivideAssign => (34, DivideAssign), + Token::PlusAssign => (34, PlusAssign), + Token::MinusAssign => (34, MinusAssign), + Token::Assign => (34, Assign), + Token::ModuloAssign => (34, ModuloAssign), + + Token::Concat => (40, Concat), + Token::ConcatSpace => (40, ConcatSpace), + + Token::ConcatAssign => (44, ConcatAssign), + Token::ConcatSpaceAssign => (44, ConcatSpaceAssign), + + _ => return None, + }; + + Some(InfixOperatorInfo { + operator, + right_precedence_rank: precedence_rank, + }) +} + +impl TryFrom for InfixOperator { + type Error = (); + + fn try_from(token: Token) -> Result { + infix_operator_info(token) + .map(|info| info.operator) + .ok_or(()) + } +} diff --git a/rottlib/src/ast/types.rs b/rottlib/src/ast/types.rs new file mode 100644 index 0000000..6ec341a --- /dev/null +++ b/rottlib/src/ast/types.rs @@ -0,0 +1,277 @@ +//! Type-specifier and declaration AST nodes. +//! +//! This module defines syntactic forms used to represent type names, inline +//! type declarations, variable declarators, and declaration modifiers. +use super::{AstSpan, ExpressionRef, IdentifierToken, QualifiedIdentifierRef}; + +use crate::arena::{ArenaNode, ArenaString, ArenaVec}; +use crate::lexer::{Keyword, Token, TokenPosition}; + +use core::convert::TryFrom; + +/// Type syntax used in declarations, fields, and other type-annotated grammar +/// positions. +/// +/// This enum covers both named types and inline type-definition forms supported +/// by the language. +#[derive(Debug, PartialEq)] +pub enum TypeSpecifier<'src, 'arena> { + /// Named type reference such as `EDrawType` or `Pkg.Group.Type`. + Named(QualifiedIdentifierRef<'arena>), + /// Inline enum definition used directly in type position. + /// + /// Example: + /// `enum EMyKind { A, B, C }` + InlineEnum(EnumDefRef<'src, 'arena>), + /// Inline struct definition used directly in type position. + /// + /// Example: + /// `struct SMyData { var int X; }` + InlineStruct(StructDefRef<'src, 'arena>), + /// Generic array type: `array<...>`. + /// + /// The parser currently allows a sequence of variable-style modifiers to + /// appear before the inner type and preserves them here. + Array { + /// Modifiers parsed before the inner type inside `array<...>`. + element_modifiers: ArenaVec<'arena, VarModifier>, + /// Element / inner type. + element_type: TypeSpecifierRef<'src, 'arena>, + }, + /// `class` or `class`. + /// + /// `None` represents a bare `class` with no type argument. + Class(Option>), +} + +/// Stable arena reference to a type-specifier node. +pub type TypeSpecifierRef<'src, 'arena> = ArenaNode<'arena, TypeSpecifier<'src, 'arena>>; + +/// Enum definition used either inline in a type position or elsewhere in the +/// declaration grammar. +#[derive(Debug, PartialEq, Eq)] +pub struct EnumDefinition<'arena> { + /// Declared enum name. + pub name: IdentifierToken, + /// Enum variants in source order. + pub variants: ArenaVec<'arena, IdentifierToken>, +} + +/// Stable arena reference to an enum definition. +pub type EnumDefRef<'src, 'arena> = ArenaNode<'arena, EnumDefinition<'arena>>; + +/// Struct-level modifier kind. +/// +/// These are modifiers that apply to the struct declaration itself rather than +/// to an individual field. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum StructModifierKind { + Native, + Export, + NoExport, + Transient, + Deprecated, + Init, + Long, +} + +/// Struct declaration modifier together with its source token position. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct StructModifier { + /// Modifier kind. + pub kind: StructModifierKind, + /// Position of the modifier token in the source stream. + pub position: TokenPosition, +} + +impl StructModifier { + /// Span covering just this modifier token. + #[must_use] + pub const fn span(self) -> AstSpan { + AstSpan::new(self.position) + } + /// Construct a struct modifier from kind and token position. + #[must_use] + pub const fn new(kind: StructModifierKind, token: TokenPosition) -> Self { + Self { + kind, + position: token, + } + } +} + +/// Struct field declaration. +/// +/// A field stores the declared type together with one or more declarators +/// sharing that type, plus optional `var(...)` editor specifiers and ordinary +/// declaration modifiers. +#[derive(Debug, PartialEq)] +pub struct StructField<'src, 'arena> { + /// Field type. + pub type_specifier: TypeSpecifierRef<'src, 'arena>, + /// One or more declarators declared with the same type. + /// + /// Examples: + /// - `var int A;` + /// - `var int A, B[4], C = 10;` + pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, + /// Optional `var(...)` editor specifiers attached to the field declaration. + /// + /// Example: + /// `var(Display, "Advanced/Hidden")` + pub editor_specifiers: Option>>, + /// Declaration modifiers attached to the field. + /// + /// These are preserved in source order. + pub declaration_modifiers: ArenaVec<'arena, VarModifier>, +} + +/// Stable arena reference to a struct field declaration. +pub type StructFieldRef<'src, 'arena> = ArenaNode<'arena, StructField<'src, 'arena>>; + +/// Struct definition used either inline in a type position or elsewhere in the +/// declaration grammar. +#[derive(Debug, PartialEq)] +pub struct StructDefinition<'src, 'arena> { + /// Struct name, if present. + /// + /// Anonymous inline structs use `None`. + pub name: Option, + /// Optional base struct after `extends`. + pub base_type_name: Option>, + /// Modifiers attached to the struct declaration itself. + pub modifiers: ArenaVec<'arena, StructModifier>, + /// Struct fields in source order. + pub fields: ArenaVec<'arena, StructFieldRef<'src, 'arena>>, +} + +/// Stable arena reference to a struct definition. +pub type StructDefRef<'src, 'arena> = ArenaNode<'arena, StructDefinition<'src, 'arena>>; + +/// One declared variable name together with optional array size and initializer. +/// +/// This node represents one declarator inside a declaration that may contain +/// several comma-separated declarators sharing the same type. +#[derive(Debug, PartialEq)] +pub struct VariableDeclarator<'src, 'arena> { + /// Declared variable name. + pub name: IdentifierToken, + /// Optional initializer after `=`. + pub initializer: Option>, + /// Optional array-size expression from `[expr]`. + pub array_size: Option>, +} + +/// Stable arena reference to a variable declarator. +/// +/// The node span is expected to cover the entire declarator, not only the +/// identifier token. +pub type VariableDeclaratorRef<'src, 'arena> = ArenaNode<'arena, VariableDeclarator<'src, 'arena>>; + +/// One item inside `var(...)` editor specifiers. +#[derive(Debug, PartialEq, Eq)] +pub enum VarEditorSpecifier<'arena> { + /// Identifier-like editor specifier such as `Display` or `Advanced`. + Identifier(IdentifierToken), + /// String editor specifier such as `"Category/Sub"`. + String(ArenaString<'arena>), +} + +/// Stable arena reference to an editor specifier. +pub type VarEditorSpecifierRef<'src, 'arena> = ArenaNode<'arena, VarEditorSpecifier<'arena>>; + +/// Field / variable declaration modifier kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum VarModifierKind { + Transient, + Public, + Protected, + Private, + Static, + Const, + Deprecated, + NoExport, + Export, + Config, + Localized, + GlobalConfig, + PerObjectConfig, + Input, + EdFindable, + EditConst, + EditConstArray, + EditInline, + EditInlineUse, + EditInlineNew, + EditInlineNotify, + NotEditInlineNew, + Automated, + Native, + Travel, + Cache, +} + +/// Variable-style declaration modifier together with its token position. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct VarModifier { + /// Modifier kind. + pub kind: VarModifierKind, + /// Position of the modifier token in the source stream. + pub position: TokenPosition, +} + +impl TryFrom for VarModifierKind { + type Error = (); + + fn try_from(keyword: Keyword) -> Result { + use VarModifierKind::{ + Automated, Cache, Config, Const, Deprecated, EdFindable, EditConst, EditConstArray, + EditInline, EditInlineNew, EditInlineNotify, EditInlineUse, Export, GlobalConfig, + Input, Localized, Native, NoExport, NotEditInlineNew, PerObjectConfig, Private, + Protected, Public, Static, Transient, Travel, + }; + + let kind = match keyword { + Keyword::Transient => Transient, + Keyword::Public => Public, + Keyword::Protected => Protected, + Keyword::Private => Private, + Keyword::Static => Static, + Keyword::Const => Const, + Keyword::Deprecated => Deprecated, + Keyword::NoExport => NoExport, + Keyword::Export => Export, + Keyword::Config => Config, + Keyword::Localized => Localized, + Keyword::GlobalConfig => GlobalConfig, + Keyword::PerObjectConfig => PerObjectConfig, + Keyword::EdFindable => EdFindable, + Keyword::EditConst => EditConst, + Keyword::EditConstArray => EditConstArray, + Keyword::EditInline => EditInline, + Keyword::EditInlineUse => EditInlineUse, + Keyword::EditInlineNew => EditInlineNew, + Keyword::EditInlineNotify => EditInlineNotify, + Keyword::NotEditInlineNew => NotEditInlineNew, + Keyword::Automated => Automated, + Keyword::Native => Native, + Keyword::Input => Input, + Keyword::Travel => Travel, + Keyword::Cache => Cache, + _ => return Err(()), + }; + Ok(kind) + } +} + +impl TryFrom<(Token, TokenPosition)> for VarModifier { + type Error = (); + + fn try_from((token, position): (Token, TokenPosition)) -> Result { + let Token::Keyword(keyword) = token else { + return Err(()); + }; + let kind = VarModifierKind::try_from(keyword)?; + Ok(Self { kind, position }) + } +} diff --git a/rottlib/src/diagnostics/expression.rs b/rottlib/src/diagnostics/expression.rs new file mode 100644 index 0000000..519805d --- /dev/null +++ b/rottlib/src/diagnostics/expression.rs @@ -0,0 +1,190 @@ +use super::{Diagnostic, DiagnosticBuilder}; +use crate::ast::AstSpan; +use crate::lexer::TokenPosition; +use crate::parser::{ParseError, ParseErrorKind}; +use std::convert::From; + +fn diagnostic_parenthesized_expression_empty( + error: ParseError, + left_parenthesis_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("empty parenthesized expression") + .primary_label(error.blame_span, "expected an expression before this `)`") + .secondary_label( + AstSpan::new(left_parenthesis_position), + "parenthesized expression starts here", + ) + .help("Remove the parentheses or put an expression inside them.") + .build() +} + +fn diagnostic_class_type_missing_type_argument( + error: ParseError, + left_angle_bracket_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("missing type argument in `class<...>`") + .primary_label(error.blame_span, "expected a type name here") + .secondary_label( + AstSpan::new(left_angle_bracket_position), + "type argument list starts here", + ) + .help("Write a type name, for example `class`.") + .build() +} + +fn diagnostic_class_type_missing_closing_angle_bracket( + error: ParseError, + left_angle_bracket_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("missing closing `>` in `class<...>`") + .primary_label(error.blame_span, "expected `>` here") + .secondary_label( + AstSpan::new(left_angle_bracket_position), + "this `<` starts the type argument", + ) + .help("Add `>` to close the class type expression.") + .build() +} + +fn diagnostic_parenthesized_expression_missing_closing_parenthesis( + error: ParseError, + left_parenthesis_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("missing closing `)`") + .primary_label(error.blame_span, "expected `)` here") + .secondary_label( + AstSpan::new(left_parenthesis_position), + "this `(` starts the parenthesized expression", + ) + .help("Add `)` to close the expression.") + .build() +} + +fn diagnostic_expression_expected(error: ParseError) -> Diagnostic { + let mut builder = DiagnosticBuilder::error("expected expression") + .primary_label(error.blame_span, "this token cannot start an expression") + .help( + "Expressions can start with literals, identifiers, `(`, `{`, or expression keywords.", + ); + + if let Some(related_span) = error.related_span { + builder = builder.secondary_label(related_span, "expression context starts here"); + } + + builder.build() +} + +fn diagnostic_class_type_invalid_type_argument( + error: ParseError, + left_angle_bracket_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("invalid type argument in `class<...>`") + .primary_label(error.blame_span, "expected a qualified type name here") + .secondary_label( + AstSpan::new(left_angle_bracket_position), + "type argument list starts here", + ) + .note("Only a qualified type name is accepted between `<` and `>` here.") + .build() +} + +fn diagnostic_new_too_many_arguments( + error: ParseError, + left_parenthesis_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("too many arguments in `new(...)`") + .primary_label(error.blame_span, "unexpected extra argument") + .secondary_label( + AstSpan::new(left_parenthesis_position), + "this argument list accepts at most three arguments", + ) + .note("The three slots are `outer`, `name`, and `flags`.") + .help("Remove the extra argument.") + .build() +} + +fn diagnostic_new_missing_closing_parenthesis( + error: ParseError, + left_parenthesis_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("missing closing `)` in `new(...)`") + .primary_label(error.blame_span, "expected `)` here") + .secondary_label( + AstSpan::new(left_parenthesis_position), + "this argument list starts here", + ) + .help("Add `)` to close the argument list.") + .build() +} + +fn diagnostic_new_missing_class_specifier( + error: ParseError, + new_keyword_position: TokenPosition, +) -> Diagnostic { + let mut builder = DiagnosticBuilder::error("missing class specifier in `new` expression") + .primary_label( + error.blame_span, + "expected the class or expression to instantiate here", + ) + .secondary_label( + AstSpan::new(new_keyword_position), + "`new` expression starts here", + ) + .help("Add the class or expression to instantiate after `new` or `new(...)`."); + + if let Some(related_span) = error.related_span { + builder = builder.secondary_label(related_span, "optional `new(...)` arguments end here"); + } + + builder.build() +} + +impl From for Diagnostic { + fn from(error: ParseError) -> Self { + match error.kind { + ParseErrorKind::ParenthesizedExpressionEmpty { + left_parenthesis_position, + } => diagnostic_parenthesized_expression_empty(error, left_parenthesis_position), + + ParseErrorKind::ClassTypeMissingTypeArgument { + left_angle_bracket_position, + } => diagnostic_class_type_missing_type_argument(error, left_angle_bracket_position), + + ParseErrorKind::ClassTypeMissingClosingAngleBracket { + left_angle_bracket_position, + } => diagnostic_class_type_missing_closing_angle_bracket( + error, + left_angle_bracket_position, + ), + + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { + left_parenthesis_position, + } => diagnostic_parenthesized_expression_missing_closing_parenthesis( + error, + left_parenthesis_position, + ), + + ParseErrorKind::ExpressionExpected => diagnostic_expression_expected(error), + + ParseErrorKind::ClassTypeInvalidTypeArgument { + left_angle_bracket_position, + } => diagnostic_class_type_invalid_type_argument(error, left_angle_bracket_position), + + ParseErrorKind::NewTooManyArguments { + left_parenthesis_position, + } => diagnostic_new_too_many_arguments(error, left_parenthesis_position), + + ParseErrorKind::NewMissingClosingParenthesis { + left_parenthesis_position, + } => diagnostic_new_missing_closing_parenthesis(error, left_parenthesis_position), + + ParseErrorKind::NewMissingClassSpecifier { + new_keyword_position, + } => diagnostic_new_missing_class_specifier(error, new_keyword_position), + + _ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind)) + .primary_label(error.covered_span, "happened here") + .build(), + } + } +} diff --git a/rottlib/src/diagnostics.rs b/rottlib/src/diagnostics/mod.rs similarity index 96% rename from rottlib/src/diagnostics.rs rename to rottlib/src/diagnostics/mod.rs index 57106b8..2ce5107 100644 --- a/rottlib/src/diagnostics.rs +++ b/rottlib/src/diagnostics/mod.rs @@ -4,6 +4,9 @@ //! parsing or doing lightweight frontend checks. They are intentionally small, //! depend only on [`AstSpan`], and are easy to construct and store. +mod expression; +mod render; + use crate::ast::AstSpan; /// Classification of a diagnostic by its impact. @@ -110,43 +113,51 @@ impl Diagnostic { } /// Returns `true` iff severity is [`Severity::Error`]. + #[must_use] pub fn stops_compilation(&self) -> bool { self.severity == Severity::Error } /// Returns the diagnostic code if present. /// - /// See [DiagnosticBuilder::code] for code scheme. + /// See [`DiagnosticBuilder::code`] for code scheme. + #[must_use] pub fn code(&self) -> Option<&str> { self.code.as_deref() } /// Returns the primary label, if any. - pub fn primary_label(&self) -> Option<&Label> { + #[must_use] + pub const fn primary_label(&self) -> Option<&Label> { self.primary_label.as_ref() } /// Returns the secondary labels in insertion order. + #[must_use] pub fn secondary_labels(&self) -> &[Label] { &self.secondary_labels } /// Returns the headline. + #[must_use] pub fn headline(&self) -> &str { &self.headline } /// Returns the severity. - pub fn severity(&self) -> Severity { + #[must_use] + pub const fn severity(&self) -> Severity { self.severity } /// Returns the notes. + #[must_use] pub fn notes(&self) -> &[String] { &self.notes } /// Returns the help message, if any. + #[must_use] pub fn help(&self) -> Option<&str> { self.help.as_deref() } diff --git a/rottlib/src/diagnostics/render.rs b/rottlib/src/diagnostics/render.rs new file mode 100644 index 0000000..b30f4a0 --- /dev/null +++ b/rottlib/src/diagnostics/render.rs @@ -0,0 +1,491 @@ +use crate::ast::AstSpan; +use crate::diagnostics::{self, Diagnostic, Severity}; +use crate::lexer::TokenizedFile; + +use core::convert::Into; +use crossterm::style::Stylize; +use crossterm::terminal::disable_raw_mode; +use std::cmp::max; +use std::collections::HashMap; +use std::ops::RangeInclusive; + +const INDENT: &str = " "; +const MAX_LINES_LIMIT: usize = 10; + +/* +error: expected one of `,`, `:`, or `}`, found `token_to` + --> rottlib/src/ast/mod.rs:80:13 + | +78 | Self { + | ---- while parsing this struct +79 | token_from: self.token_from,scd + | --- while parsing this struct field +80 | token_to: std::cmp::max(self.token_to, right_most_index), + | ^^^^^^^^ expected one of `,`, `:`, or `}` + */ + +/* + | +76 | / "asdasdas +77 | | asd1 +78 | | asd2 +79 | | asdasd" + | |___________________^ expected `()`, found `&str` + */ + +/* + 1. Get each span's range and total lines covered by spans as ranges; + 2. We need `+N` more lines for `N` labels; + 3. +*/ + +// These are abstract rendering events, not self-contained draw commands. +// They are emitted in increasing order of "significant lines" (range starts/ends). +// The actual source span for a label is recovered later from its LabelType. +#[derive(PartialEq, Eq, Clone, Copy)] +enum RendererCommands { + StartRange { + label_type: LabelType, + column: usize, + }, + FinishRange { + label_type: LabelType, + column: usize, + }, + SingleRange { + label_type: LabelType, + }, +} + +enum LineIndexType { + Normal(usize), + Missing, + Ellipsis, +} + +// Label ordering is semantic: primary first, then secondaries in diagnostic order. +// That order is also used to break visual ties when multiple labels would otherwise +// start or end on the same source line. +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +enum LabelType { + Primary, + Secondary(usize), +} + +struct RangeSet { + primary_range: Option>, + secondary_ranges: Vec>, +} + +impl RangeSet { + fn get(&self, index: usize) -> Option<&RangeInclusive> { + if self.primary_range.is_some() { + if index == 0 { + return self.primary_range.as_ref(); + } else { + self.secondary_ranges.get(index - 1) + } + } else { + self.secondary_ranges.get(index) + } + } + + fn len(&self) -> usize { + self.secondary_ranges.len() + if self.primary_range.is_some() { 1 } else { 0 } + } + + fn iter(&self) -> impl Iterator> { + self.primary_range + .iter() + .chain(self.secondary_ranges.iter()) + } + + fn iter_labeled(&self) -> impl Iterator)> { + self.primary_range + .iter() + .map(|range| (LabelType::Primary, range)) + .chain( + self.secondary_ranges + .iter() + .enumerate() + .map(|(index, range)| (LabelType::Secondary(index), range)), + ) + } + + fn get_first_bound_above(&self, line_number: Option) -> Option { + self.iter() + .filter_map(|range| { + let start = *range.start(); + let end = *range.end(); + + let start_ok = line_number.is_none_or(|n| start > n).then_some(start); + let end_ok = line_number.is_none_or(|n| end > n).then_some(end); + + match (start_ok, end_ok) { + (Some(a), Some(b)) => Some(a.min(b)), + (Some(a), None) => Some(a), + (None, Some(b)) => Some(b), + (None, None) => None, + } + }) + .min() + } +} + +// Converts labeled line ranges into an ordered stream of renderer events. +// +// Important invariants: +// +// 1. Commands are ordered by increasing significant line. +// A significant line is any line on which some label starts or ends. +// +// 2. If multiple labels would visually terminate on the same source line, +// the renderer treats them as ending on distinct phantom rows, ordered by +// diagnostic priority (primary/secondary order). This prevents intersections +// and means that same-line closings are intentionally linearized rather than +// treated as a geometric tie. +// +// 3. RendererCommands do not store source line numbers directly. +// Later rendering recovers the underlying span from LabelType and uses the +// event order to know when labels become active/inactive. +// +// 4. When a label starts on the same significant line where another label ends, +// starts are processed first. This is intentional: longer-lived/opening labels +// must occupy earlier columns so that shorter-lived/closing labels bend around +// them without intersecting. +fn make_renderer_commands(ranges: RangeSet) -> Vec<(usize, RendererCommands)> { + // Maps currently-open labels to the index of their StartRange command so that + // we can patch in the final column once the label closes. + let mut open_ranges = HashMap::new(); + let mut commands = Vec::new(); + let mut current_line = None; + while let Some(next_significant_line) = ranges.get_first_bound_above(current_line) { + current_line = Some(next_significant_line); + // First process all new ranges because they'll live longer and have + // to have earlier columns + for (label, range) in ranges.iter_labeled() { + if *range.start() == next_significant_line { + if range.start() != range.end() { + commands.push(( + *range.start(), + RendererCommands::StartRange { + label_type: label, + column: 0, + }, + )); + open_ranges.insert(label, commands.len() - 1); + } else { + commands.push(( + *range.start(), + RendererCommands::SingleRange { label_type: label }, + )); + } + } + } + + // Closing pass. + // The assigned column is the number of ranges that remain open after removing + // this label. Because same-line visual ties are already linearized by label + // priority / phantom rows, processing labels in iter_labeled() order is + // intentional here. + for (label, range) in ranges.iter_labeled() { + if *range.end() == next_significant_line { + if let Some(index) = open_ranges.remove(&label) { + // Column meaning: + // 0 = outermost / earliest lane + // larger values = further inward lanes + // + // We assign the column at close time, not at open time, because the final lane + // depends on which other ranges outlive this one. + let column = open_ranges.len(); + if let Some((line_number, RendererCommands::StartRange { .. })) = + commands.get(index) + { + commands[index] = ( + *line_number, + RendererCommands::StartRange { + label_type: label, + column, + }, + ); + } + commands.push(( + *range.end(), + RendererCommands::FinishRange { + label_type: label, + column, + }, + )); + } + } + } + } + commands +} + +fn max_line_number_width(ranges: &RangeSet) -> usize { + let max_line = ranges.iter().map(|range| *range.end()).max().unwrap_or(0); + + if max_line == 0 { + 1 + } else { + max_line.ilog10() as usize + 1 + } +} + +fn span_to_range<'src>(span: AstSpan, file: &TokenizedFile<'src>) -> Option> { + let start_line = file.token_line(span.token_from)?; + let end_line = file.token_line(span.token_to)?; + + if start_line <= end_line { + Some(start_line..=end_line) + } else { + None + } +} + +fn make_ranges<'src>(file: &TokenizedFile<'src>, diagnostic: &Diagnostic) -> RangeSet { + let mut result = RangeSet { + primary_range: None, + secondary_ranges: Vec::new(), + }; + result.primary_range = diagnostic + .primary_label() + .and_then(|label| span_to_range(label.span, file)); + for secondary in diagnostic.secondary_labels() { + if let Some(range) = span_to_range(secondary.span, file) { + result.secondary_ranges.push(range); + } + } + result +} + +impl Diagnostic { + pub fn render<'src>(&self, file: &TokenizedFile<'src>, file_path: impl Into) { + self.render_header(); + println!("{INDENT}{}: {}", "in file".blue().bold(), file_path.into()); + self.render_lines(file); + } + /*StartRange { + label_type: LabelType, + column: usize, + }, + FinishRange { + label_type: LabelType, + }, + SingleRange { + label_type: LabelType, + }, */ + fn label_data(&self, label_type: LabelType) -> Option<(AstSpan, String)> { + match label_type { + LabelType::Primary => self + .primary_label() + .map(|label| (label.span, label.message.clone())), + LabelType::Secondary(id) => Some(( + self.secondary_labels()[id].span, + self.secondary_labels()[id].message.clone(), + )), + } + } + fn render_lines<'src>(&self, file: &TokenizedFile<'src>) { + let ranges = make_ranges(file, &self); + let max_line_number_width = max(max_line_number_width(&ranges), 3); + let commands = make_renderer_commands(ranges); + let mut max_column = 0; + for command in &commands { + if let (_, RendererCommands::StartRange { column, .. }) = command { + max_column = max(max_column, *column); + } + } + let mut vertical_stack = Vec::new(); + vertical_stack.resize(max_column + 1, None); + + let mut i = 0; + while i < commands.len() { + let mut current_line = commands[i].0; + let mut single_commands = Vec::new(); + let mut start_commands = Vec::new(); + let mut finish_commands = Vec::new(); + while i < commands.len() && current_line == commands[i].0 { + match commands[i].1 { + RendererCommands::SingleRange { label_type } => { + single_commands.push(label_type) + } + RendererCommands::StartRange { label_type, column } => { + start_commands.push((label_type, column)); + } + RendererCommands::FinishRange { label_type, column } => { + finish_commands.push((label_type, column)) + } + } + i += 1; + } + // !!!!!!!!!!!!!!!! + // First - update line drawing stack + for (label_type, column) in start_commands { + vertical_stack[column] = Some(label_type); + } + // Next - draw the line + self.draw_line(current_line, max_line_number_width, file, &vertical_stack); + for label_type in single_commands { + self.render_single_command( + label_type, + max_line_number_width, + file, + &vertical_stack, + ); + } + // Next - render finish commands (drop for now) + for (label_type, column) in finish_commands { + self.render_single_command( + label_type, + max_line_number_width, + file, + &vertical_stack, + ); + vertical_stack[column] = None; + } + // !!!!!!!!!!!!!!!! + // Render some more lines + let mut countdown = 3; + current_line += 1; + while current_line < commands[i].0 { + if countdown == 0 { + if current_line + 1 == commands[i].0 { + self.draw_line(current_line, max_line_number_width, file, &vertical_stack); + } else { + println!( + "{}", + self.make_line_prefix( + LineIndexType::Ellipsis, + max_line_number_width, + &vertical_stack + ) + ); + } + break; + } else { + self.draw_line(current_line, max_line_number_width, file, &vertical_stack); + } + current_line += 1; + countdown -= 1; + } + } + } + + fn render_single_command<'src>( + &self, + label_type: LabelType, + max_line_number_width: usize, + file: &TokenizedFile<'src>, + vertical_stack: &[Option], + ) { + let Some((span, message)) = self.label_data(label_type) else { + return; + }; + let Some(visible) = file.span_visible_on_line(span) else { + return; + }; + + let mut builder = self.make_line_prefix( + LineIndexType::Missing, + max_line_number_width, + vertical_stack, + ); + + builder.push_str(&" ".repeat(visible.columns.start)); + + let underline_width = (visible.columns.end - visible.columns.start).max(1); + let mut underline_label = "^".repeat(underline_width); + underline_label.push_str(&format!(" {}", message)); + + match label_type { + LabelType::Primary => { + if self.severity == Severity::Error { + builder.push_str(&underline_label.red().bold().to_string()); + } else { + builder.push_str(&underline_label.yellow().bold().to_string()); + } + } + LabelType::Secondary(_) => { + builder.push_str(&underline_label.blue().bold().to_string()); + } + } + + println!("{builder}"); + } + + fn draw_line<'src>( + &self, + current_line: usize, + max_line_number_width: usize, + file: &TokenizedFile<'src>, + vertical_stack: &[Option], + ) { + println!( + "{}{}", + self.make_line_prefix( + LineIndexType::Normal(current_line), + max_line_number_width, + vertical_stack + ), + file.line_text(current_line).unwrap_or_default() + ); + } + + fn make_line_prefix<'src>( + &self, + current_line: LineIndexType, + max_line_number_width: usize, + vertical_stack: &[Option], + ) -> String { + let line_text = match current_line { + LineIndexType::Normal(current_line) => (current_line + 1).to_string(), + LineIndexType::Missing => "".to_string(), + LineIndexType::Ellipsis => "...".to_string(), + }; + let line_padding = " ".repeat(max_line_number_width - line_text.len()); + let mut builder = format!(" {}{} | ", line_padding, line_text) + .blue() + .bold() + .to_string(); + + for vertical_line in vertical_stack { + if let Some(label) = vertical_line { + let piece = match label { + LabelType::Primary => { + if self.severity == Severity::Error { + " |".red() + } else { + " |".yellow() + } + } + LabelType::Secondary(_) => " |".blue(), + } + .to_string(); + builder.push_str(&piece); + } else { + builder.push_str(" "); + } + } + builder + } + + fn render_header(&self) { + let severity_label = match self.severity { + Severity::Error => "error".red(), + Severity::Warning => "warning".yellow(), + }; + if let Some(ref code) = self.code { + println!( + "{}", + format!("{}[{}]: {}", severity_label, code, self.headline).bold() + ); + } else { + println!( + "{}", + format!("{}: {}", severity_label, self.headline).bold() + ); + } + } +} diff --git a/rottlib/src/lexer/debug_tools.rs b/rottlib/src/lexer/debug_tools.rs deleted file mode 100644 index 2ad9c53..0000000 --- a/rottlib/src/lexer/debug_tools.rs +++ /dev/null @@ -1,83 +0,0 @@ -//! Debug-only helpers for [`TokenizedFile`] -//! -//! This module is **compiled only if** -//! -//! * the current build profile has `debug_assertions` enabled, or -//! * the crate is built with the `debug` cargo feature. -//! -//! These checks have been moved to the parent module. - -/// A technical trait that adds debug helpers to the lexer. -pub trait DebugTools { - /// Pretty-prints the internal layout of the tokenised file - useful when - /// writing new passes or hunting lexer bugs. - /// - /// This method writes the layout directly to standard output. - /// - /// The format is unspecified, may change, and is not intended for - /// external tools. - /// - /// Each line in the printed layout starts with its 0-based number for - /// convenience. - fn dump_debug_layout(&self); - - /// Reconstructs the exact, lossless source text that was fed to - /// [`super::TokenizedFile::from_source`] from internal representation - - /// useful for manually verifying that the lexer works. - fn reconstruct_source(&self) -> String; -} - -impl<'src> DebugTools for super::TokenizedFile<'src> { - fn reconstruct_source(&self) -> String { - self.buffer.iter().map(|span| span.lexeme).collect() - } - - fn dump_debug_layout(&self) { - for (row_idx, line) in self.lines.iter().enumerate() { - println!("Line {}", row_idx + 1); - - match (line.continued_from, line.local_range()) { - // Stand-alone line (all tokens start here) - (None, Some(range)) => { - println!("\t[Standalone]"); - dump_spans(&self.buffer[range.clone()]); - } - - // Pure continuation - the only thing on this line is - // the remainder of a multi-line token that started earlier. - (Some(origin_row), None) => { - println!( - "\t[Continued from line {} - no new tokens here]", - origin_row + 1 - ); - } - - // Continuation **plus** some fresh tokens that begin here. - (Some(origin_row), Some(range)) => { - println!("\t[Continued from line {} + new tokens]", origin_row + 1); - dump_spans(&self.buffer[range.clone()]); - } - - // An empty physical line (should be rare, but let's be safe). - (None, None) => { - println!("\t[Empty line]"); - } - } - } - } -} - -/// Helper that prints every span in `spans` together with its UTF-16 -/// column boundaries. -fn dump_spans<'src>(spans: &[super::TokenPiece<'src>]) { - let mut col_utf16 = 0usize; - for span in spans { - let start = col_utf16; - let end = start + span.length_utf16; - println!( - "\t\t{:?} @ {}-{}: {:?}", - span.token, start, end, span.lexeme - ); - col_utf16 = end; - } -} diff --git a/rottlib/src/lexer/iterator.rs b/rottlib/src/lexer/iterator.rs deleted file mode 100644 index b4a0bda..0000000 --- a/rottlib/src/lexer/iterator.rs +++ /dev/null @@ -1,200 +0,0 @@ -//! Sub-module that adds an iterator to [`TokenizedFile`] which yields tokens in -//! the order they appear in the source code. -//! -//! ## Examples -//! -//! ```rust -//! let iter = TokenizedFile::from_str("0 / 0").tokens().without_whitespace(); -//! ``` -//! -//! ## Terminology: continued tokens -//! -//! Some [`super::Token`]s (e.g. [`super::Token::CppText`] or -//! [`super::Token::BlockComment`] can span multiple lines and are recorded on -//! every line on which they appear (usually as the first, and sometimes -//! the only, token). -//! In this module these are referred to as "continued" or -//! "carried-over" tokens. -//! Since our iterator needs to return each token only once, we take special -//! care to skip such continued tokens during iteration. - -use super::{TokenLocation, TokenPiece, TokenizedFile}; - -/// An immutable iterator over all tokens in a [`TokenizedFile`], preserving -/// their order of appearance in the original source file. -/// -/// After exhaustion it keeps returning [`None`]. -#[must_use] -#[derive(Clone, Debug)] -pub struct Tokens<'src> { - /// [`TokenLocation`] of the next token to be returned. - cursor: TokenLocation, - /// [`TokenizedFile`] whose tokens we're iterating over. - source_file: &'src TokenizedFile<'src>, - /// When `true`, whitespace tokens are skipped. - skip_whitespace: bool, -} - -// Because we can only return [`None`] after we've returned it once. -impl<'src> std::iter::FusedIterator for Tokens<'src> {} - -impl<'src> Tokens<'src> { - /// Makes the iterator skip all whitespace tokens. - #[must_use] - #[inline] - pub fn without_whitespace(mut self) -> Self { - self.skip_whitespace = true; - self - } - - // Returns the position of the next new token, skipping carried-over pieces - // and blank lines. - fn advance_position(&self, position: TokenLocation) -> TokenLocation { - let TokenLocation::Position { - mut line, - mut column, - } = position - else { - return TokenLocation::EndOfFile; - }; - if let Some(current_line) = self.source_file.lines.get(line) { - // `Line::len()` also counts a possible token that continued from - // the previous line. - if column + 1 < current_line.len() { - column += 1; - return TokenLocation::Position { line, column }; - } - } - // Current line is exhausted: walk downward until we find the first line - // that **owns local tokens**, because we only want *new* token, - // not continued from previous lines (they were already iterated over). - line += 1; - while let Some(next_line) = self.source_file.lines.get(line) { - if next_line.local_range().is_some() { - // Start at the first *local* token, - // skipping any carried-over one - column = if next_line.continued_from.is_some() { - 1 - } else { - 0 - }; - return TokenLocation::Position { line, column }; - } - line += 1; // keep skipping empty / pure-carried lines - } - // No more tokens. - TokenLocation::EndOfFile - } - - // Creates a new iterator. - fn new(source_file: &'src TokenizedFile) -> Tokens<'src> { - let mut new_iterator = Tokens { - source_file, - cursor: TokenLocation::Position { line: 0, column: 0 }, - skip_whitespace: false, - }; - // We need to land on the first existing token so [`Iterator::next`] - // can assume cursor is valid. - while new_iterator.cursor != TokenLocation::EndOfFile { - if new_iterator.source_file.get(new_iterator.cursor).is_some() { - break; - } - new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor); - } - new_iterator - } -} - -impl<'src> Iterator for Tokens<'src> { - type Item = (TokenLocation, TokenPiece<'src>); - - fn next(&mut self) -> Option { - // We only ever loop to discard whitespaces when the flag is on - while self.cursor != TokenLocation::EndOfFile { - let token_location = self.cursor; - let token_piece = *self.source_file.get(self.cursor)?; - self.cursor = self.advance_position(self.cursor); - - // Optional whitespace-skip - if !self.skip_whitespace || !token_piece.token.is_whitespace() { - return Some((token_location, token_piece)); - } - } - None - } -} - -impl<'src> TokenizedFile<'src> { - // Returns the final local token in `line_number` - // (used to resolve column 0 of a continued line). - fn last_piece_in_line(&self, line_number: usize) -> Option<&TokenPiece> { - self.lines - .get(line_number) - .and_then(|line| line.local_range()) - // `Line::local_range()` is guaranteed to return non-empty `Range`. - .and_then(|range| self.buffer.get(range.end - 1)) - } - - /// Returns [`TokenPiece`] at a given location if it exists. - /// - /// If the line specified by [`TokenLocation`] starts with a token that - /// continues from the previous line - column `0` refers to that token. - /// - /// Never panics, invalid position returns [`None`]. - /// - /// ## Examples - /// - /// ```rust - /// use super::{TokenizedFile, TokenLocation, Token}; - /// let file = TokenizedFile::from_str("0 / 0"); - /// assert_eq!( - /// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token), - /// Some(Token::Divide), - /// ); - /// ``` - #[track_caller] - pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> { - let TokenLocation::Position { line, column } = position else { - return None; - }; - let line = self.lines.get(line)?; - let column = column; - if column >= line.len() { - return None; - } - if let Some(spanned_line_number) = line.continued_from - && column == 0 - { - self.last_piece_in_line(spanned_line_number) - } else { - // If we have a token that continued from the previous line, - // then, relative to `self.buffer`, our `column` is actually 1-based - // and we need to shift it back to being 0-based. - let token_position = - line.local_range.start + column - if line.continued_from.is_some() { 1 } else { 0 }; - self.buffer.get(token_position) - } - } - - /// Returns an iterator over all contained tokens in the order they appear - /// in the original source file. - /// - /// By default includes all tokens, including whitespace and comments. - /// - /// Returns the same iterator as [`TokenizedFile::into_iter`] - #[must_use] - #[inline] - pub fn tokens(&'src self) -> Tokens<'src> { - Tokens::new(self) - } -} - -impl<'src> IntoIterator for &'src TokenizedFile<'src> { - type Item = (TokenLocation, TokenPiece<'src>); - type IntoIter = Tokens<'src>; - - #[inline] - fn into_iter(self) -> Self::IntoIter { - self.tokens() - } -} diff --git a/rottlib/src/lexer/lexing.rs b/rottlib/src/lexer/lexing.rs deleted file mode 100644 index d1996aa..0000000 --- a/rottlib/src/lexer/lexing.rs +++ /dev/null @@ -1,526 +0,0 @@ -//! Lexer for UnrealScript that understands inline `cpptext { ... }` blocks. -//! -//! ## Notable details -//! -//! Lexer for UnrealScript that recognizes inline `cpptext { ... }` blocks. -//! -//! In UnrealScript, `cpptext` lets authors embed raw C++ between braces. -//! Because whitespace, newlines, or comments may appear between the -//! `cpptext` keyword and the opening `{`, the lexer must remember that -//! it has just seen `cpptext` - hence a state machine. -//! -//! ## Modes -//! -//! - **Normal** - ordinary UnrealScript tokens. -//! - **AwaitingCppBlock** - after `cpptext`, waiting for the next `{`. -//! -//! When that brace arrives, the lexer consumes the entire C++ block as -//! one token (`Token::Brace(BraceKind::CppBlock)`), tracking nested -//! braces, strings, and comments on the way. If the closing `}` is -//! missing, everything to EOF is treated as C++; downstream parsers must -//! handle that gracefully. - -use logos::Lexer; - -/// Which lexer mode we're in. See the module docs for the full story. -#[derive(Default, Clone, Copy, PartialEq, Eq)] -enum LexerMode { - /// Lexing regular UnrealScript. - #[default] - Normal, - /// Saw `cpptext`; waiting for the opening `{` of a C++ block. - AwaitingCppBlock, -} - -/// Extra per-lexer state. Currently just holds the [`Mode`]. -/// -/// This is a logos-specific implementation detail. -#[derive(Default)] -pub struct LexerState { - mode: LexerMode, -} - -/// Are these braces "real" UnrealScript braces, or the start/end of a C++ block? -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] -pub enum BraceKind { - Normal, - CppBlock, -} - -/// All UnrealScript tokens that our compiler distinguishes. -#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)] -#[logos(extras = LexerState)] -pub enum Token { - // # Compiler/directive keywords - #[regex(r"(?i)#exec[^\r\n]*(\r|\n|\r\n)")] - ExecDirective, - #[regex("(?i)cpptext", |lex| { lex.extras.mode = LexerMode::AwaitingCppBlock; })] - CppText, - - // # Declaration & structural keywords - #[regex("(?i)class")] - Class, - #[regex("(?i)struct")] - Struct, - #[regex("(?i)enum")] - Enum, - #[regex("(?i)state")] - State, - #[regex("(?i)function")] - Function, - #[regex("(?i)event")] - Event, - #[regex("(?i)delegate")] - Delegate, - #[regex("(?i)var")] - Var, - #[regex("(?i)local")] - Local, - - // # Inheritance, interface, dependencies - #[regex("(?i)extends")] - Extends, - #[regex("(?i)dependson")] - DependsOn, - - // # Access modifiers & properties - #[regex("(?i)private")] - Private, - #[regex("(?i)protected")] - Protected, - #[regex("(?i)public")] - Public, - #[regex("(?i)const")] - Const, - #[regex("(?i)static")] - Static, - #[regex("(?i)native")] - Native, - #[regex("(?i)abstract")] - Abstract, - #[regex("(?i)deprecated")] - Deprecated, - - // # UnrealScript metadata/specifiers - #[regex("(?i)default")] - Default, - #[regex("(?i)defaultproperties")] - DefaultProperties, - #[regex("(?i)optional")] - Optional, - #[regex("(?i)config")] - Config, - #[regex("(?i)perobjectconfig")] - PerObjectConfig, - #[regex("(?i)globalconfig")] - GlobalConfig, - #[regex("(?i)collapsecategories")] - CollapseCategories, - #[regex("(?i)dontcollapsecategories")] - DontCollapseCategories, - #[regex("(?i)hidecategories")] - HideCategories, - #[regex("(?i)localized")] - Localized, - #[regex("(?i)placeable")] - Placeable, - #[regex("(?i)notplaceable")] - NotPlaceable, - #[regex("(?i)editinlinenew")] - EditInlineNew, - #[regex("(?i)noteditinlinenew")] - NotEditInlineNew, - #[regex("(?i)dynamicrecompile")] - DynamicRecompile, - #[regex("(?i)transient")] - Transient, - #[regex("(?i)operator")] - Operator, - #[regex("(?i)simulated")] - Simulated, - #[regex("(?i)latent")] - Latent, - #[regex("(?i)iterator")] - Iterator, - #[regex("(?i)out")] - Out, - #[regex("(?i)skip")] - Skip, - #[regex("(?i)singular")] - Singular, - #[regex("(?i)coerce")] - Coerce, - #[regex("(?i)assert")] - Assert, - #[regex("(?i)ignores")] - Ignores, - #[regex("(?i)within")] - Within, - #[regex("(?i)noexport")] - NoExport, - - // # Replication-related - #[regex("(?i)reliable")] - Reliable, - #[regex("(?i)unreliable")] - Unreliable, - #[regex("(?i)replication")] - Replication, - #[regex("(?i)nativereplication")] - NativeReplication, - - // # Control-flow keywords - #[regex("(?i)goto")] - Goto, - #[regex("(?i)if")] - If, - #[regex("(?i)else")] - Else, - #[regex("(?i)switch")] - Switch, - #[regex("(?i)case")] - Case, - #[regex("(?i)for")] - For, - #[regex("(?i)foreach")] - ForEach, - #[regex("(?i)while")] - While, - #[regex("(?i)do")] - Do, - #[regex("(?i)until")] - Until, - #[regex("(?i)break")] - Break, - #[regex("(?i)continue")] - Continue, - #[regex("(?i)return")] - Return, - - // # Built-in types - #[regex("(?i)int")] - Int, - #[regex("(?i)float")] - Float, - #[regex("(?i)bool")] - Bool, - #[regex("(?i)byte")] - Byte, - #[regex("(?i)string")] - String, - #[regex("(?i)array")] - Array, - #[regex("(?i)name")] - Name, - - // # Literals & identifiers - #[regex(r"0[xX][0-9A-Fa-f]+|[0-9]+")] - IntegerLiteral, - #[regex(r"[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?")] - FloatLiteral, - #[regex(r#""([^"\\\r\n]|\\.)*""#)] - StringLiteral, - #[regex(r"'[a-zA-Z0-9_\. \-]*'")] - NameLiteral, - #[regex("(?i)true")] - True, - #[regex("(?i)false")] - False, - #[regex("(?i)none")] - None, - #[regex("(?i)self")] - SelfKeyword, - #[regex("(?i)new")] - New, - #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] - Identifier, - - // # Operations - // ## Exponentiation - #[token("**")] - Exponentiation, - // ## Unary - #[token("++")] - Increment, - #[token("--")] - Decrement, - #[token("!")] - Not, - #[token("~")] - BitwiseNot, - // ## Vector - #[regex("(?i)dot")] - Dot, - #[regex("(?i)cross")] - Cross, - // ## Multiplicative - #[token("*")] - Multiply, - #[token("/")] - Divide, - #[token("%")] - Modulo, - // ## Additive - #[token("+")] - Plus, - #[token("-")] - Minus, - // ## String manipulation - #[token("@")] - ConcatSpace, - #[token("$")] - Concat, - // ## Shifts - #[token("<<")] - LeftShift, - #[token(">>>")] - LogicalRightShift, - #[token(">>")] - RightShift, - // ## Relational - #[token("<")] - Less, - #[token("<=")] - LessEqual, - #[token(">")] - Greater, - #[token(">=")] - GreaterEqual, - #[token("==")] - Equal, - #[token("!=")] - NotEqual, - #[token("~=")] - ApproximatelyEqual, - #[regex("(?i)clockwisefrom")] - ClockwiseFrom, - // ## Bitwise - #[token("&")] - BitwiseAnd, - #[token("|")] - BitwiseOr, - #[token("^")] - BitwiseXor, - // ## Logical - #[token("&&")] - And, - #[token("^^")] - Xor, - #[token("||")] - Or, - // ## Assigments - #[token("=")] - Assign, - #[token("*=")] - MultiplyAssign, - #[token("/=")] - DivideAssign, - #[token("%=")] - ModuloAssign, - #[token("+=")] - PlusAssign, - #[token("-=")] - MinusAssign, - #[token("$=")] - ConcatAssign, - #[token("@=")] - ConcatSpaceAssign, - - // # Punctuation & delimiters - #[token("(")] - LeftParenthesis, - #[token(")")] - RightParenthesis, - #[token("{", handle_brace)] - Brace(BraceKind), - #[token("}")] - RightBrace, - #[token("[")] - LeftBracket, - #[token("]")] - RightBracket, - #[token(";")] - Semicolon, - #[token(",")] - Comma, - #[token(".")] - Period, - #[token(":")] - Colon, - #[token("#")] - Hash, - #[token("?")] - Question, - - // # Comments & whitespaces - #[regex(r"//[^\r\n]*")] - LineComment, - #[regex(r"/\*", handle_block_comment)] - BlockComment, - #[regex(r"\r\n|\n|\r")] - Newline, - #[regex(r"[ \t]+")] - Whitespace, - - // # Technical - Error, -} - -impl Token { - /// Returns `true` if this token is a newline (`Token::NewLine`). - pub fn is_newline(&self) -> bool { - matches!(self, Token::Newline) - } - - /// Returns `true` if this token is trivia whitespace - /// (`Token::Whitespace` or `Token::NewLine`). - /// - /// Note: comments are **not** considered whitespace. - pub fn is_whitespace(&self) -> bool { - matches!(&self, Token::Whitespace | Token::Newline) - } - - /// Returns `true` if this token may span multiple physical lines - /// (i.e. can contain newline characters). - pub fn can_span_lines(&self) -> bool { - matches!( - self, - Token::BlockComment | Token::Brace(BraceKind::CppBlock) | Token::Error - ) - } - - /// Returns `true` if this token can appear in type position - /// (either a built-in type keyword or an identifier). - pub fn is_valid_type_name_token(&self) -> bool { - matches!( - self, - Token::Int - | Token::Float - | Token::Bool - | Token::Byte - | Token::String - | Token::Array - | Token::Name - | Token::Identifier - ) - } -} - -/// Consume a /* ... */ block comment with arbitrary nesting -/// (like UnrealScript allows). -/// -/// Matches the whole comment (delimiters included) or [`None`] if the file ends -/// before every `/*` is closed. -fn handle_block_comment(lexer: &mut Lexer) -> Option<()> { - let mut comment_depth = 1; - while let Some(next_char) = lexer.remainder().chars().next() { - if lexer.remainder().starts_with("/*") { - comment_depth += 1; - lexer.bump(2); - continue; - } - if lexer.remainder().starts_with("*/") { - comment_depth -= 1; - lexer.bump(2); - if comment_depth == 0 { - return Some(()); - } - continue; - } - lexer.bump(next_char.len_utf8()); - } - // Unterminated comment - None -} - -/// Called for every `{`. -/// -/// This method either emits an opening brace or token for `cppblock`, -/// depending on lexer's current state. -fn handle_brace(lexer: &mut Lexer) -> Option { - match lexer.extras.mode { - LexerMode::Normal => Some(BraceKind::Normal), - - LexerMode::AwaitingCppBlock => { - lexer.extras.mode = LexerMode::Normal; - consume_cpp_block(lexer); - Some(BraceKind::CppBlock) - } - } -} - -/// Consumes a complete C++ block, handling: -/// - Nested `{...}` pairs -/// - String literals (`"..."` and `'...'`), including escaped quotes -/// - Line comments (`// ...\n`) -/// - Block comments (`/* ... */`) -/// -/// Leaves the lexer positioned immediately after the closing `}` of the block. -/// The opening `{` must have already been consumed by the caller. -fn consume_cpp_block(lexer: &mut Lexer) { - let mut depth = 1; - while let Some(ch) = lexer.remainder().chars().next() { - match ch { - '{' => { - depth += 1; - lexer.bump(1); - } - '}' => { - depth -= 1; - lexer.bump(1); - if depth == 0 { - break; - } - } - '/' if lexer.remainder().starts_with("/*") => { - lexer.bump(2); // consuming two-byte sequence `/*` - consume_c_comment(lexer) - } - '/' if lexer.remainder().starts_with("//") => { - lexer.bump(2); // consuming two-byte sequence `//` - while let Some(c) = lexer.remainder().chars().next() { - lexer.bump(c.len_utf8()); - if c == '\n' { - break; - } - } - } - '"' | '\'' => { - lexer.bump(1); // skip `'` or `"` - consume_string_literal(lexer, ch); - } - _ => lexer.bump(ch.len_utf8()), - } - } -} - -/// Consume over a C-style `/* ... */` comment (without nesting). -/// -/// Assumes that opener `/*` is already consumed. -fn consume_c_comment(lexer: &mut Lexer) { - while let Some(next_character) = lexer.remainder().chars().next() { - if lexer.remainder().starts_with("*/") { - lexer.bump(2); - break; - } else { - lexer.bump(next_character.len_utf8()); - } - } -} - -/// Consume a string literal from C++ code. -/// -/// Assumes that opening quotation mark is already consumed. -fn consume_string_literal(lexer: &mut Lexer, delimiter: char) { - while let Some(next_character) = lexer.remainder().chars().next() { - lexer.bump(next_character.len_utf8()); - if next_character == '\\' { - // Skip the escaped character - if let Some(next) = lexer.remainder().chars().next() { - lexer.bump(next.len_utf8()); - } - } else if next_character == delimiter { - return; - } - } -} diff --git a/rottlib/src/lexer/mod.rs b/rottlib/src/lexer/mod.rs index 6d0c6e7..916a6b1 100644 --- a/rottlib/src/lexer/mod.rs +++ b/rottlib/src/lexer/mod.rs @@ -2,7 +2,8 @@ //! //! Converts raw source text into a lossless, position-aware stream of lexical //! [`Token`]s, grouped *per physical line*, and returns it as -//! a [`TokenizedFile`]. +//! a [`TokenizedFile`]. A trailing newline terminates the last physical line +//! rather than introducing an additional empty line. //! //! Design goals: //! @@ -12,191 +13,184 @@ //! precompute lengths of each token in that encoding, making interfacing //! easier. //! -//! ## Iteration over tokens -//! -//! For simplicity we've moved out code for iterating over tokens of -//! [`TokenizedFile`] into a separate submodule [`iterator`]. -//! //! ## Opt-in debug helpers //! //! Extra diagnostics become available in **debug builds** or when the crate is //! compiled with `debug` feature enabled. They live in the [`debug_tools`] //! extension trait, implemented for [`TokenizedFile`]. -//! -//! ```rust -//! // bring the trait into scope -//! use lexer::DebugTools; -//! -//! let file = TokenizedFile::from_str("local int myValue;"); -//! file.debug_dump(); // pretty-print token layout -//! let text = file.to_source(); // reconstruct original text -//! ``` -mod debug_tools; -mod iterator; -mod lexing; +mod queries; +mod raw_lexer; +#[cfg(test)] +mod tests; +mod token; +use std::collections::HashMap; use std::ops::Range; use logos::Logos; -#[cfg(any(debug_assertions, feature = "debug"))] -pub use debug_tools::DebugTools; -pub use iterator::Tokens; -pub use lexing::{BraceKind, Token}; +use raw_lexer::RawToken; + +pub use raw_lexer::BraceKind; +pub use token::Keyword; +pub use token::Token; /// Empirically chosen starting size for token buffer (used during tokenization) /// that provides good performance. const DEFAULT_TOKEN_BUFFER_CAPACITY: usize = 20_000; -/// A slice tagged with its token kind plus two length counters. +// TODO: check this!!! +/// Visible fragment of a token on one physical line. /// -/// *No absolute coordinates* are stored - they are recomputed per line. -#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)] -pub struct TokenPiece<'src> { - /// Token, represented by this [`TokenPiece`]. +/// `columns` is an end-exclusive range inside the string returned by +/// [`TokenizedFile::line_text`] for that line. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct VisibleLineSpan { + pub line: usize, + pub columns: std::ops::Range, +} + +/// A token together with its source text and precomputed UTF-16 length. +/// +/// It does not store an absolute file position. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub struct TokenData<'src> { + /// Kind of token that was lexed. pub token: Token, /// Underlying text that was lexed as the corresponding token. pub lexeme: &'src str, /// Length of the token in UTF-16 code units for the needs of easy seeking /// using given LSP cursor coordinates (line + UTF-16 offset). /// Precomputed for convenience. - pub length_utf16: usize, + pub utf16_length: usize, } -/// Defines location of a token inside [`TokenizedFile`] in a form convenient -/// for communicating through LSP. -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum TokenLocation { - /// Actual position of some token in the file. - Position { - /// 0-based line number. - line: usize, - /// 0-based index of a token in the line, possibly including the token that - /// has continued from the previous line. - /// - /// Columns count tokens, not bytes or chars. - column: usize, - }, - /// Position af the end-of-file. - EndOfFile, -} +/// 0-based index of a token within the file-wide token buffer. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)] +pub struct TokenPosition(pub usize); -/// A tokenized, lossless representation of an UnrealScript source file. -#[derive(Debug)] +/// A tokenized, lossless representation of an `UnrealScript` source file. +#[derive(Clone, Debug, PartialEq, Eq)] pub struct TokenizedFile<'src> { /// Arena of every token span in this file. - buffer: Vec>, - /// Mapping that provides an easy and efficient access to tokens by - /// line number. + buffer: Vec>, + /// Mapping from physical line number to the tokens that belong to it. lines: Vec, + /// Mapping token index to ranges of bytes that correspond to + /// visible characters (i.e. all non line terminators) in its lines. + /// + /// Records only exists for multiline tokens and ranges can be empty for + /// lines that only contain line break boundary. + multi_line_map: HashMap>, /// Simple flag for marking erroneous state. had_errors: bool, } -/// Mutable state that encapsulates data needed during the tokenization loop. +/// An immutable iterator over all tokens in a [`TokenizedFile`], preserving +/// their order of appearance in the original source file. /// -/// Access to stored tokens is provided through the [`iterator::Tokens`] -/// iterator. +/// After exhaustion it keeps returning [`None`]. +#[must_use] +#[derive(Clone, Debug)] +pub struct Tokens<'file, 'src> { + /// Position of the next token to be returned in the canonical file-wide + /// token arena. + cursor: TokenPosition, + /// [`TokenizedFile`] whose tokens we're iterating over. + source_file: &'file TokenizedFile<'src>, +} + +/// Type for referring to line numbers. +type LineNumber = usize; + +/// Type for specific tokens inside each [`Line`]. +type BufferIndex = usize; + +/// Type for describing sub-range of visible characters of a single line for +/// some token. +type VisibleByteRange = Range; + +/// Representation of a single physical line of the source file. +/// +/// Uses ranges instead of slices to avoid a self-referential relationship +/// with [`TokenizedFile`], which Rust forbids. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +struct Line { + /// Token that began on an earlier line (`None` for standalone lines). + continued_from: Option, + /// Contiguous tokens that started on this line (`start >= end` iff empty). + local_range: Range, +} + +/// Mutable state used while tokenizing a source file. +#[derive(Debug)] struct Tokenizer<'src> { - /// Arena that owns every [`TokenPiece`] produced for the file. - buffer: Vec>, + /// Arena that owns every [`TokenData`] produced for the file. + buffer: Vec>, /// Mapping from physical line number to the tokens that belong to it. lines: Vec, - /// The current 0-based physical line number. + /// Mapping token index to ranges of bytes that correspond to + /// visible characters in its lines. + multi_line_map: HashMap>, + /// The 0-based physical line number that is currently being scanned. line_number: usize, - /// Index in [`Tokenizer::buffer`] where the current *line* starts. - slice_start_index: usize, + /// Points to the first token (index in [`Tokenizer::buffer`]) not yet + /// committed to `lines`, e.g. where the current *line* starts. + uncommitted_start_index: usize, /// When a multi-line token is being scanned, stores the 0-based line /// on which it started; [`None`] otherwise. /// - /// `Some(line_idx)` iff the current line is within a multi-line token that - /// started on `line_idx`; it is consumed exactly once by + /// `Some(line_number)` iff the current line is within a multi-line token + /// that started on `line_number`; it is consumed exactly once by /// [`Self::commit_current_line`]. - multi_line_start: Option, - /// Set to [`true`] if the lexer reported any error tokens. + multi_line_start_line: Option, + /// Set to `true` if the lexer reported any error tokens. had_errors: bool, } impl<'src> TokenizedFile<'src> { - /// Tokenize `source` and return a fresh [`TokenizedFile`]. + /// Tokenizes `source` and returns a fresh [`TokenizedFile`]. /// - /// ## Examples - /// - /// ```rust - /// let source_text = "2 + 2 * 2".to_string(); - /// let tokenized_file = TokenizedFile::from_str(&source_text); - /// ``` + /// Its output is lossless and groups resulting tokens by physical lines. + /// Error spans are preserved as [`Token::Error`]. #[must_use] - pub fn from_str(source: &'src str) -> TokenizedFile<'src> { - let mut tokenizer = Self::builder(); - let mut lexer = Token::lexer(source); + pub fn tokenize(source: &'src str) -> Self { + let mut tokenizer = Tokenizer::new(); + let mut lexer = RawToken::lexer(source); while let Some(token_result) = lexer.next() { - // Add `Token:Error` manually, since Logos won't do it for us. - let token = token_result.unwrap_or_else(|_| { + // Add `Token::Error` manually, since Logos won't do it for us. + let token = token_result.unwrap_or_else(|()| { tokenizer.had_errors = true; - Token::Error + RawToken::Error }); - let token_piece = make_token_piece(token, lexer.slice()); + let token_piece = make_token_data(Token::from(token), lexer.slice()); tokenizer.process_token_piece(token_piece); } tokenizer.into_tokenized_file() } - /// Returns [`true`] if any erroneous tokens were produced during building - /// of this [`TokenizedFile`]. - /// - /// ## Examples - /// - /// ```rust - /// let tokenized_file = TokenizedFile::from_str("function test() {}"); - /// if tokenized_file.has_errors() { - /// println!("Error while parsing file."); - /// } - /// ``` - #[inline] - pub fn has_errors(&self) -> bool { + /// Returns `true` if tokenization produced any error tokens. + #[must_use] + pub const fn has_errors(&self) -> bool { self.had_errors } - /// Create an empty tokenizer state with tuned buffer capacity. - fn builder() -> Tokenizer<'src> { - Tokenizer { - buffer: Vec::with_capacity(DEFAULT_TOKEN_BUFFER_CAPACITY), - lines: Vec::new(), - line_number: 0, - slice_start_index: 0, - multi_line_start: None, - had_errors: false, - } + /// Returns an iterator over all contained tokens in the order they appear + /// in the original source file. + /// + /// Returns pairs of position and token data: `(TokenPosition, TokenData)`. + pub const fn iter(&self) -> Tokens<'_, 'src> { + Tokens::new(self) } } -/// Type for indexing lines in a [`TokenizedFile`]. -type LineIdx = usize; - -/// Type for specific tokens inside each [`Line`]. -type TokenIdx = usize; - -/// Representation of a single physical line of the source file. -/// -/// [`Range`] are used instead of slices to avoid creating -/// a self-referential struct (with [`TokenizedFile`]), which rust forbids. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -struct Line { - /// Token that began on an earlier line (`None` for standalone lines). - continued_from: Option, - /// Contiguous tokens that started on this line (`start >= end` iff empty). - local_range: Range, -} - impl Line { /// Creates a standalone line that owns a contiguous slice in /// the [`TokenizedFile::buffer`] arena. - #[inline] - fn standalone(locals: Range) -> Line { - Line { + const fn standalone(locals: Range) -> Self { + Self { continued_from: None, local_range: locals, } @@ -204,9 +198,8 @@ impl Line { /// Creates a line that is part of a multi-line token started on /// another line, referencing the 0-based index of its origin. - #[inline] - fn spanned(carried: LineIdx) -> Line { - Line { + const fn continued(carried: LineNumber) -> Self { + Self { continued_from: Some(carried), local_range: 0..0, } @@ -214,9 +207,8 @@ impl Line { /// Creates a line that is part of a multi-line token started on /// another line and also contains additional tokens local to itself. - #[inline] - fn spanned_with_tokens(carried: LineIdx, locals: Range) -> Line { - Line { + const fn continued_with_tokens(carried: LineNumber, locals: Range) -> Self { + Self { continued_from: Some(carried), local_range: locals, } @@ -227,29 +219,31 @@ impl Line { /// /// [`None`] means there are no such tokens. Otherwise range is guaranteed /// to not be empty. - #[inline] - fn local_range(&self) -> Option> { + fn local_range(&self) -> Option> { if self.local_range.is_empty() { None } else { Some(self.local_range.clone()) } } - - /// Returns the number of tokens on this line. - /// - /// Counts both tokens that started on this line and tokens that continued - /// from previous one. - #[inline] - fn len(&self) -> usize { - (if self.continued_from.is_some() { 1 } else { 0 }) - + (self.local_range.end - self.local_range.start) - } } impl<'src> Tokenizer<'src> { + /// Returns an empty tokenizer state. + fn new() -> Self { + Self { + buffer: Vec::with_capacity(DEFAULT_TOKEN_BUFFER_CAPACITY), + lines: Vec::new(), + multi_line_map: HashMap::new(), + line_number: 0, + uncommitted_start_index: 0, + multi_line_start_line: None, + had_errors: false, + } + } + /// Handles a token span and dispatches to the appropriate handler. - fn process_token_piece(&mut self, token_piece: TokenPiece<'src>) { + fn process_token_piece(&mut self, token_piece: TokenData<'src>) { if token_piece.token.can_span_lines() { self.process_multi_line_token(token_piece); } else { @@ -259,7 +253,7 @@ impl<'src> Tokenizer<'src> { /// Handles simple tokens that *never* span multiple lines, allowing us to /// skip a lot of work. - fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) { + fn process_single_line_token(&mut self, token_piece: TokenData<'src>) { if token_piece.token.is_newline() { self.line_number += 1; self.buffer.push(token_piece); @@ -270,34 +264,40 @@ impl<'src> Tokenizer<'src> { } /// Handles tokens that might contain one or more newline characters. - fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) { + fn process_multi_line_token(&mut self, token_piece: TokenData<'src>) { let start_line = self.line_number; - let newline_count = count_line_breaks(token_piece.lexeme); + let line_break_map = split_visible_line_segments(token_piece.lexeme); + let newline_count = line_break_map.len().saturating_sub(1); - // Did this token end in a newline? - // This can happen if this is an `Error` token that ends the file. + // Needed for unterminated multi-line error tokens that reach EOF right + // after a line break. let ends_with_newline = token_piece.lexeme.ends_with('\n') || token_piece.lexeme.ends_with('\r'); + let multi_line_token_index = self.buffer.len(); self.buffer.push(token_piece); - // We only need to commit the line if this token actually ended the line + if !line_break_map.is_empty() { + self.multi_line_map + .insert(multi_line_token_index, line_break_map); + } + // A line is committed only once the token stream has actually crossed + // a physical line boundary. if newline_count > 0 { + // This clears `multi_line_start_line` self.commit_current_line(); - // We only need to insert one `Line::spanned(start_line)` per + // We only need to insert one `Line::continued(start_line)` per // *interior* line: // // standalone | local int i = /* Now we start long comment - // spanned | with three line breaks and *exactly* two - // spanned | inner lines that contain nothing but - // spanned_with_tokens | comment bytes! */ = 0; + // continued | with three line breaks and *exactly* two + // continued | inner lines that contain nothing but + // continued_with_tokens| comment bytes! */ = 0; let inner_lines_count = newline_count - 1; for _ in 0..inner_lines_count { - self.lines.push(Line::spanned(start_line)); + self.lines.push(Line::continued(start_line)); } - // This is called *after* `commit_current_line()` cleared previous - // stored value - self.multi_line_start = if ends_with_newline { - None // we're done at this point + self.multi_line_start_line = if ends_with_newline { + None } else { Some(start_line) }; @@ -309,32 +309,34 @@ impl<'src> Tokenizer<'src> { /// Commits the tokens of the current physical line into `self.lines`. fn commit_current_line(&mut self) { let slice_end = self.buffer.len(); - if slice_end > self.slice_start_index { - let slice = self.slice_start_index..slice_end; + // A trailing newline terminates the current physical line rather than + // creating an additional empty line entry. + if slice_end > self.uncommitted_start_index { + let slice = self.uncommitted_start_index..slice_end; // If we were in the middle of a multi-line token, we // *always* consume `multi_line_start` here, ensuring that each call // to `commit_current_line()` only applies it once. // This guarantees no "bleed" between adjacent multi-line tokens. - if let Some(from) = self.multi_line_start.take() { - self.lines.push(Line::spanned_with_tokens(from, slice)); + if let Some(from) = self.multi_line_start_line.take() { + self.lines.push(Line::continued_with_tokens(from, slice)); } else { self.lines.push(Line::standalone(slice)); } - self.slice_start_index = slice_end; + self.uncommitted_start_index = slice_end; } } /// Finishes tokenization, converting accumulated data into /// [`TokenizedFile`]. fn into_tokenized_file(mut self) -> TokenizedFile<'src> { - // Flush trailing tokens for which `commit` wasn't auto triggered + // Commits the final line when the file does not end with a newline. self.commit_current_line(); // If we still have a `multi_line_start` // (i.e. a pure multi-line token with no local tokens on its last line), - // push a bare `Line::spanned` entry. - if let Some(from) = self.multi_line_start.take() { - self.lines.push(Line::spanned(from)); + // push a bare `Line::continued` entry. + if let Some(from) = self.multi_line_start_line.take() { + self.lines.push(Line::continued(from)); } self.buffer.shrink_to_fit(); @@ -343,40 +345,105 @@ impl<'src> Tokenizer<'src> { TokenizedFile { buffer: self.buffer, lines: self.lines, + multi_line_map: self.multi_line_map, had_errors: self.had_errors, } } } -fn make_token_piece<'src>(token: Token, text: &'src str) -> TokenPiece<'src> { +fn make_token_data(token: Token, text: &str) -> TokenData<'_> { let length_utf16 = text.encode_utf16().count(); - TokenPiece { + TokenData { lexeme: text, token, - length_utf16, + utf16_length: length_utf16, } } -/// Counts the number of newlines in given text. -fn count_line_breaks(text: &str) -> usize { - let mut bytes_iterator = text.as_bytes().iter().peekable(); - let mut newline_count = 0; - while let Some(&next_byte) = bytes_iterator.next() { +/// Returns byte ranges of visible text characters for each physical line +/// spanned by `text`. +/// +/// Returns an empty vector if `text` contains no line breaks. +fn split_visible_line_segments(text: &str) -> Vec> { + let bytes = text.as_bytes(); + let mut segments = Vec::new(); + + let mut segment_start = 0usize; + let mut saw_line_break = false; + let mut bytes_iterator = bytes.iter().enumerate().peekable(); + while let Some((next_byte_index, &next_byte)) = bytes_iterator.next() { // Logos' regex rule is "\r\n|\n|\r", so we agree with it on new line // character treatment match next_byte { b'\r' => { - newline_count += 1; - if let Some(&&b'\n') = bytes_iterator.peek() { - // skip the '\n' in a CRLF - bytes_iterator.next(); - } + saw_line_break = true; + let visible_end = next_byte_index; + let next_start = + if let Some((next_line_break_index, b'\n')) = bytes_iterator.peek().copied() { + bytes_iterator.next(); // consume '\n' of `\r\n` + next_line_break_index + 1 + } else { + next_byte_index + 1 + }; + segments.push(segment_start..visible_end); + segment_start = next_start; } b'\n' => { - newline_count += 1; + saw_line_break = true; + let visible_end = next_byte_index; + segments.push(segment_start..visible_end); + segment_start = next_byte_index + 1; } _ => (), } } - newline_count + // If the token contained at least one line break, include the visible + // segment of its final physical line as well. This may be empty, e.g. + // for text ending with '\n' or '\r\n'. + if saw_line_break { + segments.push(segment_start..bytes.len()); + } + segments +} + +// Because once `cursor` moves past the end of `buffer`, it can never become +// valid again. +impl std::iter::FusedIterator for Tokens<'_, '_> {} + +impl<'file, 'src> Tokens<'file, 'src> { + /// Advances the iterator cursor by one token. + const fn advance(&mut self) { + self.cursor.0 += 1; + } + + /// Creates a new iterator. + const fn new(source_file: &'file TokenizedFile<'src>) -> Self { + Self { + source_file, + cursor: TokenPosition(0), + } + } +} + +impl<'src> Iterator for Tokens<'_, 'src> { + type Item = (TokenPosition, TokenData<'src>); + + fn next(&mut self) -> Option { + if let Some(&token_piece) = self.source_file.buffer.get(self.cursor.0) { + let position = self.cursor; + self.advance(); + Some((position, token_piece)) + } else { + None + } + } +} + +impl<'file, 'src> IntoIterator for &'file TokenizedFile<'src> { + type Item = (TokenPosition, TokenData<'src>); + type IntoIter = Tokens<'file, 'src>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } } diff --git a/rottlib/src/lexer/queries.rs b/rottlib/src/lexer/queries.rs new file mode 100644 index 0000000..74f2da7 --- /dev/null +++ b/rottlib/src/lexer/queries.rs @@ -0,0 +1,291 @@ +//! # Query helpers +//! +//! Read-only convenience APIs for inspecting a [`TokenizedFile`] without +//! exposing its internal representation. + +use crate::lexer::{Line, TokenData, TokenPosition, TokenizedFile, VisibleLineSpan}; + +impl<'src> TokenizedFile<'src> { + /// Returns the number of physical lines stored in this file. + /// + /// Empty line after the trailing newline sequence isn't counted as a line + /// by this method. + #[must_use] + pub const fn line_count(&self) -> usize { + self.lines.len() + } + + /// Returns an iterator over tokens that *start* on physical line + /// `line_number`. + /// + /// The yielded items are `(TokenPosition, TokenData)` pairs, matching the + /// canonical file-wide token arena. + /// + /// If the line ends with a newline token, that newline token is included. + /// + /// If the line begins with a carried fragment of a multi-line token that + /// started on an earlier line, that fragment is **not** yielded here. + /// Use [`TokenizedFile::line_text`] to reconstruct the visible content of + /// the full line. + /// + /// If `line_number` is out of bounds, the returned iterator is empty. + #[must_use] + pub fn line_tokens( + &self, + line_number: usize, + ) -> std::vec::IntoIter<(TokenPosition, TokenData<'src>)> { + let Some(line) = self.lines.get(line_number) else { + return Vec::new().into_iter(); + }; + let Some(local_range) = line.local_range() else { + return Vec::new().into_iter(); + }; + let mut out = Vec::with_capacity(local_range.len()); + for buffer_index in local_range { + // Invariant: + // `Line::local_range()` is always constructed from contiguous + // slices of `self.buffer` during tokenization, so every index in + // this range must be valid for `self.buffer`. + let token_data = self.buffer[buffer_index]; + out.push((TokenPosition(buffer_index), token_data)); + } + out.into_iter() + } + + /// Returns the token stored at `position`, if that position is valid. + /// + /// This is a direct lookup into the file-wide token buffer. + #[must_use] + pub fn token_at(&self, position: TokenPosition) -> Option> { + self.buffer.get(position.0).copied() + } + + /// Reconstructs the visible text of physical line `line_index`. + /// + /// The returned string does **not** include a trailing line terminator. + /// + /// Unlike [`TokenizedFile::line_tokens`], this method includes the visible + /// fragment of a multi-line token carried from an earlier line. + /// + /// Returns [`None`] iff `line_index >= self.line_count()`. + #[must_use] + pub fn line_text(&self, line_index: usize) -> Option { + let line = self.lines.get(line_index)?; + let mut out = String::new(); + + if let Some(piece) = self.carried_piece_for_line(line_index) { + out.push_str(piece); + } + let Some(range) = line.local_range() else { + return Some(out); + }; + for buffer_index in range.clone() { + let token_piece = self.buffer[buffer_index]; + if token_piece.token.is_newline() { + // Must be last token + debug_assert_eq!(buffer_index + 1, range.end); + break; + } + if token_piece.token.can_span_lines() + && let Some(first_segment) = self + .multi_line_map + .get(&buffer_index) + .and_then(|segments| segments.first()) + { + out.push_str(&token_piece.lexeme[first_segment.clone()]); + // Must be last token + debug_assert_eq!(buffer_index + 1, range.end); + break; + } + out.push_str(token_piece.lexeme); + } + Some(out) + } + + /// Returns the 0-based physical line on which the token at `position` + /// starts. + /// + /// For multi-line tokens, this is the line where the token begins, not + /// every physical line it spans. + /// + /// Returns `None` if `position` is out of bounds. + #[must_use] + pub fn token_line(&self, position: TokenPosition) -> Option { + // Reject invalid token positions early. + self.buffer.get(position.0)?; + + let line_index = self + .lines + .partition_point(|line| self.line_search_upper_bound(line) <= position.0); + + (line_index < self.lines.len()).then_some(line_index) + } + + /// Returns the exclusive upper token index bound for binary-searching + /// lines by token position. + /// + /// In other words: every token that "belongs" to this line in start-line + /// terms has index `< returned_value`. + fn line_search_upper_bound(&self, line: &Line) -> usize { + if let Some(local_range) = line.local_range() { + local_range.end + } else { + // Pure continuation line: it contains only the carried fragment of + // a multi-line token that started earlier. + // + // That token is always the last local token on the origin line, so + // its token index + 1 acts as the exclusive upper bound. + let origin_line = line + .continued_from + .expect("empty line entry must be a continuation line"); + self.carried_token_index(origin_line) + .expect("continuation line must point to a valid origin token") + + 1 + } + } + + /// If `line_index` begins with a fragment of a multi-line token that + /// started earlier, returns the visible slice of that token for this line. + fn carried_piece_for_line(&self, line_index: usize) -> Option<&'src str> { + // Find carried, multiline token + let origin_line = self.lines.get(line_index)?.continued_from?; + let carried_token_index = self.carried_token_index(origin_line)?; + // Find right part of the multiline token's lexeme + let segments = self.multi_line_map.get(&carried_token_index)?; + let segment_index = line_index.checked_sub(origin_line)?; + let boundary = segments.get(segment_index)?; + self.buffer + .get(carried_token_index)? + .lexeme + .get(boundary.clone()) + } + + /// Recovers the token index of the multi-line token that started on + /// `origin_line` and is carried into later lines. + /// + /// In the current representation, this is always the last local token that + /// started on the origin line. + fn carried_token_index(&self, origin_line: usize) -> Option { + let range = self.lines.get(origin_line)?.local_range()?; + let token_index = range.end.checked_sub(1)?; + + debug_assert!(self.buffer[token_index].token.can_span_lines()); + Some(token_index) + } + + /// Returns the visible per-line spans occupied by the token at `position`. + /// + /// Coordinates are expressed in visible character columns inside + /// `line_text(line)`, with an exclusive end bound. + /// + /// Newline-only tokens have no visible text, so they return an empty vector. + /// + /// Returns `None` if `position` is invalid. + #[must_use] + pub fn token_visible_spans(&self, position: TokenPosition) -> Option> { + let token_piece = self.buffer.get(position.0).copied()?; + let start_line = self.token_line(position)?; + let start_column = self.token_start_visible_column(position)?; + + if token_piece.token.is_newline() { + return Some(Vec::new()); + } + + // True multi-line token: reuse already computed visible byte segments, + // then convert them into visible character columns. + if let Some(segments) = self.multi_line_map.get(&position.0) { + let mut out = Vec::with_capacity(segments.len()); + + for (segment_index, byte_range) in segments.iter().enumerate() { + let visible_text = &token_piece.lexeme[byte_range.clone()]; + let width = visible_text.chars().count(); + + // Empty visible fragment: skip it. + // This matters for things like a token ending with '\n'. + if width == 0 { + continue; + } + + let line = start_line + segment_index; + + // A trailing newline does not create an extra stored physical line. + if line >= self.line_count() { + break; + } + + let column_start = if segment_index == 0 { start_column } else { 0 }; + out.push(VisibleLineSpan { + line, + columns: column_start..(column_start + width), + }); + } + + return Some(out); + } + + // Single-line token, including "can_span_lines" tokens that happen not + // to contain a line break. + let width = token_piece.lexeme.chars().count(); + Some(vec![VisibleLineSpan { + line: start_line, + columns: start_column..(start_column + width), + }]) + } + + /// Returns the visible start column of the token at `position` inside + /// `line_text(token_line(position))`. + /// + /// Column is measured in visible characters, excluding line terminators. + fn token_start_visible_column(&self, position: TokenPosition) -> Option { + let line_index = self.token_line(position)?; + let line = self.lines.get(line_index)?; + + let mut column = self + .carried_piece_for_line(line_index) + .map_or(0, |text| text.chars().count()); + + let local_range = line.local_range()?; + for buffer_index in local_range { + if buffer_index == position.0 { + return Some(column); + } + + let token_piece = self.buffer.get(buffer_index)?; + + if token_piece.token.is_newline() { + break; + } + + if token_piece.token.can_span_lines() && self.multi_line_map.contains_key(&buffer_index) + { + //debug_assert_eq!(buffer_index + 1, local_range.end); + return None; + } + + column += token_piece.lexeme.chars().count(); + } + + None + } + + #[must_use] + pub fn span_visible_on_line(&self, span: crate::ast::AstSpan) -> Option { + let start = self + .token_visible_spans(span.token_from)? + .into_iter() + .next()?; + let end = self + .token_visible_spans(span.token_to)? + .into_iter() + .last()?; + + if start.line != end.line { + return None; + } + + Some(VisibleLineSpan { + line: start.line, + columns: start.columns.start..end.columns.end, + }) + } +} diff --git a/rottlib/src/lexer/raw_lexer.rs b/rottlib/src/lexer/raw_lexer.rs new file mode 100644 index 0000000..892f1d7 --- /dev/null +++ b/rottlib/src/lexer/raw_lexer.rs @@ -0,0 +1,632 @@ +//! Lexer for `UnrealScript` that understands inline `cpptext { ... }` blocks. +//! +//! ## Notable details +//! +//! Lexer for `UnrealScript` that recognizes inline `cpptext { ... }` blocks. +//! +//! In `UnrealScript`, `cpptext` lets authors embed raw C++ between braces.\ +//! Because whitespace, newlines, or comments may appear between the +//! `cpptext` keyword and the opening `{`, the lexer must remember that +//! it has just seen `cpptext` - hence a state machine. +//! +//! ## Modes +//! +//! - **Normal** - ordinary `UnrealScript` `RawTokens`. +//! - **`AwaitingCppBlock`** - after `cpptext`, waiting for the next `{`. +//! +//! When that brace arrives, the lexer consumes the entire C++ block as +//! one `RawToken` (`RawToken::Brace(BraceKind::CppBlock)`), tracking nested +//! braces, strings, and comments on the way. If the closing `}` is +//! missing, everything to EOF is treated as C++; downstream parsers must +//! handle that gracefully. + +use logos::Lexer; + +/// Which lexer mode we're in. See the module docs for the full story. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Default)] +enum LexerMode { + /// Lexing regular `UnrealScript`. + #[default] + Normal, + /// Saw `cpptext`; waiting for the opening `{` of a C++ block. + AwaitingCppBlock, +} + +/// Extra per-lexer state. Currently just holds the [`LexerMode`]. +/// +/// This is a logos-specific implementation detail. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub struct LexerState { + mode: LexerMode, +} + +/// Distinguishes an ordinary `{` token from one that starts +/// an embedded C++ block. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum BraceKind { + /// An ordinary `UnrealScript` `{`. + Normal, + /// A `{` that starts an embedded C++ block and consumes through its + /// matching `}`. + CppBlock, +} + +/// Tokens produced by the `UnrealScript` lexer. +/// +/// Includes both syntactic tokens and trivia such as whitespace, newlines, +/// and comments. +#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)] +#[logos(extras = LexerState)] +pub enum RawToken { + // # Compiler/directive keywords + #[regex(r"(?i)#exec[^\r\n]*(?:\r\n|\n|\r)?")] + ExecDirective, + #[regex("(?i)cpptext", |lex| { + if is_next_nontrivia_left_brace(lex) { + lex.extras.mode = LexerMode::AwaitingCppBlock; + } else { + lex.extras.mode = LexerMode::Normal; + } + })] + CppText, + + #[regex("(?i)cppstruct", |lex| { + if is_next_nontrivia_left_brace(lex) { + lex.extras.mode = LexerMode::AwaitingCppBlock; + } else { + lex.extras.mode = LexerMode::Normal; + } + })] + CppStruct, + // # Declaration & structural keywords + //#[regex("(?i)class")] + #[token("class", ignore(case))] + Class, + #[token("struct", ignore(case))] + Struct, + #[token("enum", ignore(case))] + Enum, + #[token("state", ignore(case))] + State, + #[token("auto", ignore(case))] + Auto, + #[token("function", ignore(case))] + Function, + #[token("event", ignore(case))] + Event, + #[token("delegate", ignore(case))] + Delegate, + #[token("var", ignore(case))] + Var, + #[token("local", ignore(case))] + Local, + + // # Inheritance, interface, dependencies + #[token("extends", ignore(case))] + Extends, + #[token("dependson", ignore(case))] + DependsOn, + + // # Access modifiers & properties + #[token("private", ignore(case))] + Private, + #[token("protected", ignore(case))] + Protected, + #[token("public", ignore(case))] + Public, + #[token("const", ignore(case))] + Const, + #[token("static", ignore(case))] + Static, + #[token("native", ignore(case))] + Native, + #[token("abstract", ignore(case))] + Abstract, + #[token("deprecated", ignore(case))] + Deprecated, + #[token("safereplace", ignore(case))] + SafeReplace, + #[token("exportstructs", ignore(case))] + ExportStructs, + #[token("input", ignore(case))] + Input, + + // # UnrealScript metadata/specifiers + #[token("final", ignore(case))] + Final, + #[token("default", ignore(case))] + Default, + #[token("defaultproperties", ignore(case))] + DefaultProperties, + #[token("object", ignore(case))] + Object, + #[token("begin", ignore(case))] + Begin, + #[token("end", ignore(case))] + End, + #[token("optional", ignore(case))] + Optional, + #[token("config", ignore(case))] + Config, + #[token("perobjectconfig", ignore(case))] + PerObjectConfig, + #[token("globalconfig", ignore(case))] + GlobalConfig, + #[token("collapsecategories", ignore(case))] + CollapseCategories, + #[token("dontcollapsecategories", ignore(case))] + DontCollapseCategories, + #[token("hidecategories", ignore(case))] + HideCategories, + #[token("showcategories", ignore(case))] + ShowCategories, + #[token("localized", ignore(case))] + Localized, + #[token("placeable", ignore(case))] + Placeable, + #[token("notplaceable", ignore(case))] + NotPlaceable, + #[token("instanced", ignore(case))] + Instanced, + #[token("editconst", ignore(case))] + EditConst, + #[token("editconstarray", ignore(case))] + EditConstArray, + #[token("editinline", ignore(case))] + EditInline, + #[token("editinlineuse", ignore(case))] + EditInlineUse, + #[token("editinlinenew", ignore(case))] + EditInlineNew, + #[token("noteditinlinenew", ignore(case))] + NotEditInlineNew, + #[token("edfindable", ignore(case))] + EdFindable, + #[token("editinlinenotify", ignore(case))] + EditInlineNotify, + #[token("parseconfig", ignore(case))] + ParseConfig, + #[token("automated", ignore(case))] + Automated, + #[token("dynamicrecompile", ignore(case))] + DynamicRecompile, + #[token("transient", ignore(case))] + Transient, + #[token("long", ignore(case))] + Long, + #[token("operator", ignore(case))] + Operator, + #[token("preoperator", ignore(case))] + PreOperator, + #[token("postoperator", ignore(case))] + PostOperator, + #[token("simulated", ignore(case))] + Simulated, + #[token("exec", ignore(case))] + Exec, + #[token("latent", ignore(case))] + Latent, + #[token("iterator", ignore(case))] + Iterator, + #[token("out", ignore(case))] + Out, + #[token("skip", ignore(case))] + Skip, + #[token("singular", ignore(case))] + Singular, + #[token("coerce", ignore(case))] + Coerce, + #[token("assert", ignore(case))] + Assert, + #[token("ignores", ignore(case))] + Ignores, + #[token("within", ignore(case))] + Within, + #[token("init", ignore(case))] + Init, + #[token("export", ignore(case))] + Export, + #[token("noexport", ignore(case))] + NoExport, + #[token("hidedropdown", ignore(case))] + HideDropdown, + #[token("travel", ignore(case))] + Travel, + #[token("cache", ignore(case))] + Cache, + #[token("cacheexempt", ignore(case))] + CacheExempt, + + // # Replication-related + #[token("reliable", ignore(case))] + Reliable, + #[token("unreliable", ignore(case))] + Unreliable, + #[token("replication", ignore(case))] + Replication, + #[token("nativereplication", ignore(case))] + NativeReplication, + + // # Control-flow keywords + #[token("goto", ignore(case))] + Goto, + #[token("if", ignore(case))] + If, + #[token("else", ignore(case))] + Else, + #[token("switch", ignore(case))] + Switch, + #[token("case", ignore(case))] + Case, + #[token("for", ignore(case))] + For, + #[token("foreach", ignore(case))] + ForEach, + #[token("while", ignore(case))] + While, + #[token("do", ignore(case))] + Do, + #[token("until", ignore(case))] + Until, + #[token("break", ignore(case))] + Break, + #[token("continue", ignore(case))] + Continue, + #[token("return", ignore(case))] + Return, + + // # Built-in types + #[token("int", ignore(case))] + Int, + #[token("float", ignore(case))] + Float, + #[token("bool", ignore(case))] + Bool, + #[token("byte", ignore(case))] + Byte, + #[token("string", ignore(case))] + String, + #[token("array", ignore(case))] + Array, + #[token("name", ignore(case))] + Name, + + // FloatLiteral must come before IntegerLiteral and '.' + // to have higher priority. + // It also recognizes things like: `1.foo``, `1.foo.bar`, `1.2.3`. + // It has to. Because UnrealScript is a pile of-... wonderful language, + // where everything is possible. + #[regex(r"[0-9]+(?:\.(?:[0-9]+|[A-Za-z_][A-Za-z0-9_]*))+[fF]?")] + #[regex(r"(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[eE][+-]?[0-9]+)?[fF]?")] + #[regex(r"[0-9]+[eE][+-]?[0-9]+[fF]?")] + FloatLiteral, + + #[regex(r"0b[01](?:_?[01])*")] + #[regex(r"0o[0-7](?:_?[0-7])*")] + #[regex(r"0x[0-9A-Fa-f](?:_?[0-9A-Fa-f])*")] + #[regex(r"[0-9][0-9]*")] + IntegerLiteral, + + #[regex(r#""([^"\\\r\n]|\\.)*""#)] + StringLiteral, + #[regex(r"'[a-zA-Z0-9_\. \-]*'")] + NameLiteral, + #[token("true", ignore(case))] + True, + #[token("false", ignore(case))] + False, + #[token("none", ignore(case))] + None, + #[token("self", ignore(case))] + SelfValue, + #[token("new", ignore(case))] + New, + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")] + Identifier, + + // # Operations + // ## Exponentiation + #[token("**")] + Exponentiation, + // ## Unary + #[token("++")] + Increment, + #[token("--")] + Decrement, + #[token("!")] + Not, + #[token("~")] + BitwiseNot, + // ## Vector + #[token("dot", ignore(case))] + Dot, + #[token("cross", ignore(case))] + Cross, + // ## Multiplicative + #[token("*")] + Multiply, + #[token("/")] + Divide, + #[token("%")] + Modulo, + // ## Additive + #[token("+")] + Plus, + #[token("-")] + Minus, + // ## String manipulation + #[token("@")] + ConcatSpace, + #[token("$")] + Concat, + // ## Shifts + #[token("<<")] + LeftShift, + #[token(">>>")] + LogicalRightShift, + #[token(">>")] + RightShift, + // ## Relational + #[token("<")] + Less, + #[token("<=")] + LessEqual, + #[token(">")] + Greater, + #[token(">=")] + GreaterEqual, + #[token("==")] + Equal, + #[token("!=")] + NotEqual, + #[token("~=")] + ApproximatelyEqual, + #[token("clockwisefrom", ignore(case))] + ClockwiseFrom, + // ## Bitwise + #[token("&")] + BitwiseAnd, + #[token("|")] + BitwiseOr, + #[token("^")] + BitwiseXor, + // ## Logical + #[token("&&")] + LogicalAnd, + #[token("^^")] + LogicalXor, + #[token("||")] + LogicalOr, + // ## Assignments + #[token("=")] + Assign, + #[token("*=")] + MultiplyAssign, + #[token("/=")] + DivideAssign, + #[token("%=")] + ModuloAssign, + #[token("+=")] + PlusAssign, + #[token("-=")] + MinusAssign, + #[token("$=")] + ConcatAssign, + #[token("@=")] + ConcatSpaceAssign, + + // # Punctuation & delimiters + #[token("(")] + LeftParenthesis, + #[token(")")] + RightParenthesis, + #[token("{", process_left_brace)] + Brace(BraceKind), + #[token("}")] + RightBrace, + #[token("[")] + LeftBracket, + #[token("]")] + RightBracket, + #[token(";")] + Semicolon, + #[token(",")] + Comma, + #[token(".")] + Period, + #[token(":")] + Colon, + #[token("#")] + Hash, + #[token("?")] + Question, + + // # Comments & whitespaces + #[regex(r"//[^\r\n]*")] + LineComment, + #[regex(r"/\*", handle_block_comment)] + BlockComment, + #[regex(r"\r\n|\n|\r")] + Newline, + #[regex(r"[ \t]+")] + Whitespace, + + // # Technical + Error, +} + +/// Consumes an `UnrealScript` `/* ... */` block comment, including nested comments. +/// +/// Matches the entire comment, including its delimiters. +/// If the comment is unterminated, consumes to the end of input. +fn handle_block_comment(lexer: &mut Lexer) { + let mut comment_depth = 1; + while let Some(next_character) = lexer.remainder().chars().next() { + if lexer.remainder().starts_with("/*") { + comment_depth += 1; + lexer.bump(2); + continue; + } + if lexer.remainder().starts_with("*/") { + comment_depth -= 1; + lexer.bump(2); + if comment_depth == 0 { + break; + } + continue; + } + lexer.bump(next_character.len_utf8()); + } +} + +/// Processes `{` according to the current lexer mode. +/// +/// Returns [`BraceKind::Normal`] for ordinary `UnrealScript` braces. +/// After `cpptext` or `cppstruct`, consumes the embedded C++ block and returns +/// [`BraceKind::CppBlock`]. +fn process_left_brace(lexer: &mut Lexer) -> BraceKind { + match lexer.extras.mode { + LexerMode::Normal => BraceKind::Normal, + LexerMode::AwaitingCppBlock => { + lexer.extras.mode = LexerMode::Normal; + consume_cpp_block(lexer); + BraceKind::CppBlock + } + } +} + +/// Consumes a complete C++ block, handling: +/// - Nested `{...}` pairs +/// - String literals (`"..."` and `'...'`), including escaped quotes +/// - Line comments (`// ...\n`) +/// - Block comments (`/* ... */`) +/// +/// Leaves the lexer positioned immediately after the closing `}` of the block. +/// The opening `{` must have already been consumed by the caller. +/// +/// We target UE2-era cpp blocks, so no need for anything fancy. +fn consume_cpp_block(lexer: &mut Lexer) { + let mut brace_depth = 1; + while let Some(next_character) = lexer.remainder().chars().next() { + match next_character { + '{' => { + brace_depth += 1; + lexer.bump(1); + } + '}' => { + brace_depth -= 1; + lexer.bump(1); + if brace_depth == 0 { + break; + } + } + '/' if lexer.remainder().starts_with("/*") => { + lexer.bump(2); // consuming two-byte sequence `/*` + consume_c_style_block_comment(lexer); + } + '/' if lexer.remainder().starts_with("//") => { + lexer.bump(2); // consuming two-byte sequence `//` + while let Some(next_character) = lexer.remainder().chars().next() { + lexer.bump(next_character.len_utf8()); + if next_character == '\n' || next_character == '\r' { + break; + } + } + } + '"' | '\'' => { + lexer.bump(1); // skip `'` or `"` + consume_quoted_cpp_literal(lexer, next_character); + } + _ => lexer.bump(next_character.len_utf8()), + } + } +} + +/// Consumes a non-nesting C-style `/* ... */` comment. +/// +/// Assumes that the opening `/*` has already been consumed. +fn consume_c_style_block_comment(lexer: &mut Lexer) { + while let Some(next_character) = lexer.remainder().chars().next() { + if lexer.remainder().starts_with("*/") { + lexer.bump(2); + break; + } + lexer.bump(next_character.len_utf8()); + } +} + +/// Consumes a quoted C++ string or character literal. +/// +/// Assumes that the opening delimiter has already been consumed. +fn consume_quoted_cpp_literal(lexer: &mut Lexer, delimiter: char) { + while let Some(next_character) = lexer.remainder().chars().next() { + lexer.bump(next_character.len_utf8()); + if next_character == '\\' { + // Skip the escaped character + if let Some(escaped_character) = lexer.remainder().chars().next() { + lexer.bump(escaped_character.len_utf8()); + } + } else if next_character == delimiter { + return; + } + } +} + +/// Peek ahead from the current lexer position, skipping "trivia", and report +/// whether the next significant character is `{`. +/// +/// Trivia here means: +/// - Spaces and tabs +/// - Newlines (`\r`, `\n`, or `\r\n`) +/// - Line comments (`// ...`) +/// - Block comments (`/* ... */`), including nested ones +/// +/// This is used after lexing tokens like `cpptext` or `cppstruct`, where +/// `UnrealScript` allows arbitrary trivia between the keyword and the opening +/// brace of the embedded C++ block. +/// +/// Returns `true` if the next non-trivia character is `{`, otherwise `false`. +/// If the input ends while skipping trivia, returns `false`. +fn is_next_nontrivia_left_brace(lexer: &Lexer) -> bool { + let mut remaining = lexer.remainder(); + + while let Some(next_character) = remaining.chars().next() { + match next_character { + ' ' | '\t' | '\r' | '\n' => { + remaining = &remaining[next_character.len_utf8()..]; + } + '/' if remaining.starts_with("//") => { + remaining = &remaining[2..]; + while let Some(comment_character) = remaining.chars().next() { + remaining = &remaining[comment_character.len_utf8()..]; + if comment_character == '\n' || comment_character == '\r' { + break; + } + } + } + '/' if remaining.starts_with("/*") => { + remaining = &remaining[2..]; + let mut comment_depth = 1; + while comment_depth > 0 { + if remaining.starts_with("/*") { + comment_depth += 1; + remaining = &remaining[2..]; + continue; + } + if remaining.starts_with("*/") { + comment_depth -= 1; + remaining = &remaining[2..]; + continue; + } + let Some(comment_character) = remaining.chars().next() else { + return false; + }; + remaining = &remaining[comment_character.len_utf8()..]; + } + } + _ => return next_character == '{', + } + } + + false +} diff --git a/rottlib/src/lexer/tests.rs b/rottlib/src/lexer/tests.rs new file mode 100644 index 0000000..971e1d1 --- /dev/null +++ b/rottlib/src/lexer/tests.rs @@ -0,0 +1,338 @@ +use super::{Keyword, Token, TokenPosition, TokenizedFile, split_visible_line_segments}; + +fn reconstruct_source(file: &TokenizedFile<'_>) -> String { + file.buffer.iter().map(|piece| piece.lexeme).collect() +} + +fn token_kinds_and_lexemes<'src>(file: &TokenizedFile<'src>) -> Vec<(Token, &'src str)> { + file.buffer + .iter() + .map(|piece| (piece.token, piece.lexeme)) + .collect() +} + +#[test] +fn split_visible_line_segments_returns_empty_for_single_line_text() { + assert!(split_visible_line_segments("abcdef").is_empty()); + assert!(split_visible_line_segments("").is_empty()); +} + +#[test] +fn split_visible_line_segments_handles_mixed_line_endings() { + let text = "ab\r\ncd\ref\n"; + let segments = split_visible_line_segments(text); + + assert_eq!(segments, vec![0..2, 4..6, 7..9, 10..10]); + + let visible: Vec<&str> = segments.iter().map(|range| &text[range.clone()]).collect(); + assert_eq!(visible, vec!["ab", "cd", "ef", ""]); +} + +#[test] +fn tokenization_is_lossless_for_mixed_input() { + let source = concat!( + "class Foo extends Bar;\r\n", + "var string S;\n", + "/* block comment */\r", + "defaultproperties {}\n", + "X = 1.25e+2;\n", + ); + + let file = TokenizedFile::tokenize(source); + + assert_eq!(reconstruct_source(&file), source); +} + +#[test] +fn trailing_newline_does_not_create_extra_empty_line() { + let source = "a\n"; + let file = TokenizedFile::tokenize(source); + + assert_eq!(file.lines.len(), 1); + assert_eq!(file.lines[0].continued_from, None); + assert_eq!(file.lines[0].local_range(), Some(0..2)); + + assert_eq!( + token_kinds_and_lexemes(&file), + vec![(Token::Identifier, "a"), (Token::Newline, "\n")] + ); +} + +#[test] +fn final_line_without_trailing_newline_is_committed() { + let source = "a\nb"; + let file = TokenizedFile::tokenize(source); + + assert_eq!(file.lines.len(), 2); + + assert_eq!(file.lines[0].continued_from, None); + assert_eq!(file.lines[0].local_range(), Some(0..2)); + + assert_eq!(file.lines[1].continued_from, None); + assert_eq!(file.lines[1].local_range(), Some(2..3)); + + assert_eq!( + token_kinds_and_lexemes(&file), + vec![ + (Token::Identifier, "a"), + (Token::Newline, "\n"), + (Token::Identifier, "b"), + ] + ); +} + +#[test] +fn multiline_block_comment_creates_continuation_line_with_local_tokens() { + let source = "a/*x\ny*/b"; + let file = TokenizedFile::tokenize(source); + + assert_eq!( + token_kinds_and_lexemes(&file), + vec![ + (Token::Identifier, "a"), + (Token::BlockComment, "/*x\ny*/"), + (Token::Identifier, "b"), + ] + ); + + assert_eq!(file.lines.len(), 2); + + assert_eq!(file.lines[0].continued_from, None); + assert_eq!(file.lines[0].local_range(), Some(0..2)); + + assert_eq!(file.lines[1].continued_from, Some(0)); + assert_eq!(file.lines[1].local_range(), Some(2..3)); + + let block_comment_index = 1; + assert_eq!( + file.multi_line_map.get(&block_comment_index), + Some(&vec![0..3, 4..7]) + ); +} + +#[test] +fn pure_multiline_token_finishes_with_bare_continuation_line() { + let source = "/*a\nb*/"; + let file = TokenizedFile::tokenize(source); + + assert_eq!( + token_kinds_and_lexemes(&file), + vec![(Token::BlockComment, "/*a\nb*/")] + ); + + assert_eq!(file.lines.len(), 2); + + assert_eq!(file.lines[0].continued_from, None); + assert_eq!(file.lines[0].local_range(), Some(0..1)); + + assert_eq!(file.lines[1].continued_from, Some(0)); + assert_eq!(file.lines[1].local_range(), None); + + assert_eq!(file.multi_line_map.get(&0), Some(&vec![0..3, 4..7])); +} + +#[test] +fn nested_block_comments_are_consumed_as_one_token() { + let source = "/* outer /* inner */ still outer */"; + let file = TokenizedFile::tokenize(source); + + assert!(!file.has_errors()); + assert_eq!(file.buffer.len(), 1); + assert_eq!(file.buffer[0].token, Token::BlockComment); + assert_eq!(file.buffer[0].lexeme, source); +} + +#[test] +fn cpptext_with_trivia_before_brace_produces_cpp_block_token() { + let source = "cpptext /* gap */\n{ int x; if (y) { z(); } }"; + let file = TokenizedFile::tokenize(source); + + assert_eq!( + token_kinds_and_lexemes(&file), + vec![ + (Token::Keyword(Keyword::CppText), "cpptext"), + (Token::Whitespace, " "), + (Token::BlockComment, "/* gap */"), + (Token::Newline, "\n"), + (Token::CppBlock, "{ int x; if (y) { z(); } }"), + ] + ); + + assert_eq!(file.lines.len(), 2); + + assert_eq!(file.lines[0].continued_from, None); + assert_eq!(file.lines[0].local_range(), Some(0..4)); + + assert_eq!(file.lines[1].continued_from, None); + assert_eq!(file.lines[1].local_range(), Some(4..5)); +} + +#[test] +fn cpptext_without_following_brace_does_not_start_cpp_block_mode() { + let source = "cpptext Foo { bar }"; + let file = TokenizedFile::tokenize(source); + + let tokens = token_kinds_and_lexemes(&file); + + assert!(!tokens.iter().any(|(token, _)| *token == Token::CppBlock)); + assert!( + tokens + .iter() + .any(|(token, lexeme)| *token == Token::Keyword(Keyword::CppText) + && *lexeme == "cpptext") + ); + assert!( + tokens + .iter() + .any(|(token, lexeme)| *token == Token::LeftBrace && *lexeme == "{") + ); + assert!( + tokens + .iter() + .any(|(token, lexeme)| *token == Token::RightBrace && *lexeme == "}") + ); +} + +#[test] +fn utf16_length_is_precomputed_per_token() { + let source = "\"😀\""; + let file = TokenizedFile::tokenize(source); + + assert_eq!(file.buffer.len(), 1); + assert_eq!(file.buffer[0].token, Token::StringLiteral); + assert_eq!(file.buffer[0].utf16_length, source.encode_utf16().count()); + assert_eq!(file.buffer[0].utf16_length, 4); +} + +#[test] +fn lexer_reports_error_tokens() { + let source = "`"; + let file = TokenizedFile::tokenize(source); + + assert!(file.has_errors()); + assert_eq!(reconstruct_source(&file), source); + assert_eq!(file.buffer.len(), 1); + assert_eq!(file.buffer[0].token, Token::Error); + assert_eq!(file.buffer[0].lexeme, "`"); +} + +#[test] +fn token_predicates_match_current_rules() { + assert!(Token::Identifier.is_valid_identifier_name()); + assert!(Token::Keyword(Keyword::Int).is_valid_identifier_name()); + assert!(Token::Keyword(Keyword::Int).is_valid_type_name()); + assert!(Token::Keyword(Keyword::Delegate).is_valid_type_name()); + + assert!(Token::Keyword(Keyword::Exec).is_valid_function_modifier()); + assert!(Token::Keyword(Keyword::Operator).is_valid_function_modifier()); + assert!(Token::Keyword(Keyword::Config).is_valid_function_modifier()); + + assert!(!Token::Plus.is_valid_identifier_name()); + assert!(!Token::Plus.is_valid_type_name()); + assert!(!Token::Keyword(Keyword::If).is_valid_function_modifier()); +} + +#[test] +fn tokens_iterator_yields_positions_in_buffer_order() { + let source = "a + b"; + let file = TokenizedFile::tokenize(source); + + let collected: Vec<_> = file.iter().collect(); + + assert_eq!(collected.len(), file.buffer.len()); + + for (expected_index, (position, token_data)) in collected.into_iter().enumerate() { + assert_eq!(position.0, expected_index); + assert_eq!(token_data, file.buffer[expected_index]); + } +} + +fn line_token_kinds_and_lexemes<'src>( + file: &TokenizedFile<'src>, + line_number: usize, +) -> Vec<(usize, Token, &'src str)> { + file.line_tokens(line_number) + .map(|(position, token_data)| (position.0, token_data.token, token_data.lexeme)) + .collect() +} + +#[test] +fn line_count_counts_physical_lines_without_trailing_empty_line() { + assert_eq!(TokenizedFile::tokenize("").line_count(), 0); + assert_eq!(TokenizedFile::tokenize("a").line_count(), 1); + assert_eq!(TokenizedFile::tokenize("a\n").line_count(), 1); + assert_eq!(TokenizedFile::tokenize("a\nb\n").line_count(), 2); +} + +#[test] +fn line_tokens_return_only_tokens_that_start_on_that_line() { + let source = "a/*x\ny*/b\nc"; + let file = TokenizedFile::tokenize(source); + + assert_eq!( + line_token_kinds_and_lexemes(&file, 0), + vec![ + (0, Token::Identifier, "a"), + (1, Token::BlockComment, "/*x\ny*/"), + ] + ); + + // Important: the carried fragment "y*/" is NOT yielded here. + assert_eq!( + line_token_kinds_and_lexemes(&file, 1), + vec![(2, Token::Identifier, "b"), (3, Token::Newline, "\n"),] + ); + + assert_eq!( + line_token_kinds_and_lexemes(&file, 2), + vec![(4, Token::Identifier, "c")] + ); +} + +#[test] +fn line_tokens_are_empty_for_continuation_only_or_out_of_bounds_lines() { + let file = TokenizedFile::tokenize("/*a\nb*/"); + + assert_eq!(file.line_tokens(1).count(), 0); + assert_eq!(file.line_tokens(999).count(), 0); +} + +#[test] +fn token_at_returns_token_for_valid_position_and_none_for_invalid_one() { + let file = TokenizedFile::tokenize("a + b"); + + assert_eq!(file.token_at(TokenPosition(0)), Some(file.buffer[0])); + assert_eq!( + file.token_at(TokenPosition(1)).map(|t| t.token), + Some(Token::Whitespace) + ); + assert_eq!( + file.token_at(TokenPosition(2)).map(|t| t.token), + Some(Token::Plus) + ); + assert_eq!(file.token_at(TokenPosition(file.buffer.len())), None); +} + +#[test] +fn line_text_omits_line_terminators_and_handles_empty_lines() { + let file = TokenizedFile::tokenize("left\n\nright"); + + assert_eq!(file.line_text(0).as_deref(), Some("left")); + assert_eq!(file.line_text(1).as_deref(), Some("")); + assert_eq!(file.line_text(2).as_deref(), Some("right")); + assert_eq!(file.line_text(999), None); +} + +#[test] +fn line_text_includes_carried_fragment_on_continued_line() { + let file = TokenizedFile::tokenize("a/*x\ny*/b"); + + assert_eq!(file.line_text(1).as_deref(), Some("y*/b")); +} + +#[test] +fn line_text_on_origin_line_of_multiline_token_uses_only_visible_part() { + let file = TokenizedFile::tokenize("a/*x\ny*/b"); + + assert_eq!(file.line_text(0).as_deref(), Some("a/*x")); +} diff --git a/rottlib/src/lexer/token.rs b/rottlib/src/lexer/token.rs new file mode 100644 index 0000000..5e167d6 --- /dev/null +++ b/rottlib/src/lexer/token.rs @@ -0,0 +1,560 @@ +//! Token definitions for Fermented `UnrealScript`. +//! +//! These are the tokens consumed by the parser and derived from [`RawToken`]s. + +use super::{BraceKind, raw_lexer::RawToken}; + +/// Tokens consumed by the Fermented `UnrealScript` parser. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum Token { + ExecDirective, + Keyword(Keyword), + // Primaries + FloatLiteral, + IntegerLiteral, + StringLiteral, + NameLiteral, + Identifier, + // Operations + Exponentiation, + Increment, + Decrement, + Not, + BitwiseNot, + Multiply, + Divide, + Modulo, + Plus, + Minus, + ConcatSpace, + Concat, + LeftShift, + LogicalRightShift, + RightShift, + Less, + LessEqual, + Greater, + GreaterEqual, + Equal, + NotEqual, + ApproximatelyEqual, + BitwiseAnd, + BitwiseOr, + BitwiseXor, + LogicalAnd, + LogicalXor, + LogicalOr, + Assign, + MultiplyAssign, + DivideAssign, + ModuloAssign, + PlusAssign, + MinusAssign, + ConcatAssign, + ConcatSpaceAssign, + // Delimiters + LeftParenthesis, + RightParenthesis, + LeftBrace, + CppBlock, + RightBrace, + LeftBracket, + RightBracket, + Semicolon, + Comma, + Period, + Colon, + Hash, + Question, + // Trivia + LineComment, + BlockComment, + Newline, + Whitespace, + // Technical - for representing a very wrong sequence of characters + Error, +} + +impl From for Token { + #![allow(clippy::too_many_lines)] + fn from(token: RawToken) -> Self { + match token { + // Non-trivial conversions + RawToken::Brace(BraceKind::Normal) => Self::LeftBrace, + RawToken::Brace(BraceKind::CppBlock) => Self::CppBlock, + // Keyword conversions + RawToken::CppText => Self::Keyword(Keyword::CppText), + RawToken::CppStruct => Self::Keyword(Keyword::CppStruct), + RawToken::Class => Self::Keyword(Keyword::Class), + RawToken::Struct => Self::Keyword(Keyword::Struct), + RawToken::Enum => Self::Keyword(Keyword::Enum), + RawToken::State => Self::Keyword(Keyword::State), + RawToken::Auto => Self::Keyword(Keyword::Auto), + RawToken::Function => Self::Keyword(Keyword::Function), + RawToken::Event => Self::Keyword(Keyword::Event), + RawToken::Delegate => Self::Keyword(Keyword::Delegate), + RawToken::Var => Self::Keyword(Keyword::Var), + RawToken::Local => Self::Keyword(Keyword::Local), + RawToken::Extends => Self::Keyword(Keyword::Extends), + RawToken::DependsOn => Self::Keyword(Keyword::DependsOn), + RawToken::Private => Self::Keyword(Keyword::Private), + RawToken::Protected => Self::Keyword(Keyword::Protected), + RawToken::Public => Self::Keyword(Keyword::Public), + RawToken::Const => Self::Keyword(Keyword::Const), + RawToken::Static => Self::Keyword(Keyword::Static), + RawToken::Native => Self::Keyword(Keyword::Native), + RawToken::Abstract => Self::Keyword(Keyword::Abstract), + RawToken::Deprecated => Self::Keyword(Keyword::Deprecated), + RawToken::SafeReplace => Self::Keyword(Keyword::SafeReplace), + RawToken::ExportStructs => Self::Keyword(Keyword::ExportStructs), + RawToken::Input => Self::Keyword(Keyword::Input), + RawToken::Final => Self::Keyword(Keyword::Final), + RawToken::Default => Self::Keyword(Keyword::Default), + RawToken::DefaultProperties => Self::Keyword(Keyword::DefaultProperties), + RawToken::Object => Self::Keyword(Keyword::Object), + RawToken::Begin => Self::Keyword(Keyword::Begin), + RawToken::End => Self::Keyword(Keyword::End), + RawToken::Optional => Self::Keyword(Keyword::Optional), + RawToken::Config => Self::Keyword(Keyword::Config), + RawToken::PerObjectConfig => Self::Keyword(Keyword::PerObjectConfig), + RawToken::GlobalConfig => Self::Keyword(Keyword::GlobalConfig), + RawToken::CollapseCategories => Self::Keyword(Keyword::CollapseCategories), + RawToken::DontCollapseCategories => Self::Keyword(Keyword::DontCollapseCategories), + RawToken::HideCategories => Self::Keyword(Keyword::HideCategories), + RawToken::ShowCategories => Self::Keyword(Keyword::ShowCategories), + RawToken::Localized => Self::Keyword(Keyword::Localized), + RawToken::Placeable => Self::Keyword(Keyword::Placeable), + RawToken::NotPlaceable => Self::Keyword(Keyword::NotPlaceable), + RawToken::Instanced => Self::Keyword(Keyword::Instanced), + RawToken::EditConst => Self::Keyword(Keyword::EditConst), + RawToken::EditConstArray => Self::Keyword(Keyword::EditConstArray), + RawToken::EditInline => Self::Keyword(Keyword::EditInline), + RawToken::EditInlineUse => Self::Keyword(Keyword::EditInlineUse), + RawToken::EditInlineNew => Self::Keyword(Keyword::EditInlineNew), + RawToken::NotEditInlineNew => Self::Keyword(Keyword::NotEditInlineNew), + RawToken::EdFindable => Self::Keyword(Keyword::EdFindable), + RawToken::EditInlineNotify => Self::Keyword(Keyword::EditInlineNotify), + RawToken::ParseConfig => Self::Keyword(Keyword::ParseConfig), + RawToken::Automated => Self::Keyword(Keyword::Automated), + RawToken::DynamicRecompile => Self::Keyword(Keyword::DynamicRecompile), + RawToken::Transient => Self::Keyword(Keyword::Transient), + RawToken::Long => Self::Keyword(Keyword::Long), + RawToken::Operator => Self::Keyword(Keyword::Operator), + RawToken::PreOperator => Self::Keyword(Keyword::PreOperator), + RawToken::PostOperator => Self::Keyword(Keyword::PostOperator), + RawToken::Simulated => Self::Keyword(Keyword::Simulated), + RawToken::Exec => Self::Keyword(Keyword::Exec), + RawToken::Latent => Self::Keyword(Keyword::Latent), + RawToken::Iterator => Self::Keyword(Keyword::Iterator), + RawToken::Out => Self::Keyword(Keyword::Out), + RawToken::Skip => Self::Keyword(Keyword::Skip), + RawToken::Singular => Self::Keyword(Keyword::Singular), + RawToken::Coerce => Self::Keyword(Keyword::Coerce), + RawToken::Assert => Self::Keyword(Keyword::Assert), + RawToken::Ignores => Self::Keyword(Keyword::Ignores), + RawToken::Within => Self::Keyword(Keyword::Within), + RawToken::Init => Self::Keyword(Keyword::Init), + RawToken::Export => Self::Keyword(Keyword::Export), + RawToken::NoExport => Self::Keyword(Keyword::NoExport), + RawToken::HideDropdown => Self::Keyword(Keyword::HideDropdown), + RawToken::Travel => Self::Keyword(Keyword::Travel), + RawToken::Cache => Self::Keyword(Keyword::Cache), + RawToken::CacheExempt => Self::Keyword(Keyword::CacheExempt), + RawToken::Reliable => Self::Keyword(Keyword::Reliable), + RawToken::Unreliable => Self::Keyword(Keyword::Unreliable), + RawToken::Replication => Self::Keyword(Keyword::Replication), + RawToken::NativeReplication => Self::Keyword(Keyword::NativeReplication), + RawToken::Goto => Self::Keyword(Keyword::Goto), + RawToken::If => Self::Keyword(Keyword::If), + RawToken::Else => Self::Keyword(Keyword::Else), + RawToken::Switch => Self::Keyword(Keyword::Switch), + RawToken::Case => Self::Keyword(Keyword::Case), + RawToken::For => Self::Keyword(Keyword::For), + RawToken::ForEach => Self::Keyword(Keyword::ForEach), + RawToken::While => Self::Keyword(Keyword::While), + RawToken::Do => Self::Keyword(Keyword::Do), + RawToken::Until => Self::Keyword(Keyword::Until), + RawToken::Break => Self::Keyword(Keyword::Break), + RawToken::Continue => Self::Keyword(Keyword::Continue), + RawToken::Return => Self::Keyword(Keyword::Return), + RawToken::Int => Self::Keyword(Keyword::Int), + RawToken::Float => Self::Keyword(Keyword::Float), + RawToken::Bool => Self::Keyword(Keyword::Bool), + RawToken::Byte => Self::Keyword(Keyword::Byte), + RawToken::String => Self::Keyword(Keyword::String), + RawToken::Array => Self::Keyword(Keyword::Array), + RawToken::Name => Self::Keyword(Keyword::Name), + RawToken::True => Self::Keyword(Keyword::True), + RawToken::False => Self::Keyword(Keyword::False), + RawToken::None => Self::Keyword(Keyword::None), + RawToken::SelfValue => Self::Keyword(Keyword::SelfValue), + RawToken::New => Self::Keyword(Keyword::New), + RawToken::Dot => Self::Keyword(Keyword::Dot), + RawToken::Cross => Self::Keyword(Keyword::Cross), + RawToken::ClockwiseFrom => Self::Keyword(Keyword::ClockwiseFrom), + // Trivial 1-to-1 conversions. + RawToken::ExecDirective => Self::ExecDirective, + RawToken::FloatLiteral => Self::FloatLiteral, + RawToken::IntegerLiteral => Self::IntegerLiteral, + RawToken::StringLiteral => Self::StringLiteral, + RawToken::NameLiteral => Self::NameLiteral, + RawToken::Identifier => Self::Identifier, + RawToken::Exponentiation => Self::Exponentiation, + RawToken::Increment => Self::Increment, + RawToken::Decrement => Self::Decrement, + RawToken::Not => Self::Not, + RawToken::BitwiseNot => Self::BitwiseNot, + RawToken::Multiply => Self::Multiply, + RawToken::Divide => Self::Divide, + RawToken::Modulo => Self::Modulo, + RawToken::Plus => Self::Plus, + RawToken::Minus => Self::Minus, + RawToken::ConcatSpace => Self::ConcatSpace, + RawToken::Concat => Self::Concat, + RawToken::LeftShift => Self::LeftShift, + RawToken::LogicalRightShift => Self::LogicalRightShift, + RawToken::RightShift => Self::RightShift, + RawToken::Less => Self::Less, + RawToken::LessEqual => Self::LessEqual, + RawToken::Greater => Self::Greater, + RawToken::GreaterEqual => Self::GreaterEqual, + RawToken::Equal => Self::Equal, + RawToken::NotEqual => Self::NotEqual, + RawToken::ApproximatelyEqual => Self::ApproximatelyEqual, + RawToken::BitwiseAnd => Self::BitwiseAnd, + RawToken::BitwiseOr => Self::BitwiseOr, + RawToken::BitwiseXor => Self::BitwiseXor, + RawToken::LogicalAnd => Self::LogicalAnd, + RawToken::LogicalXor => Self::LogicalXor, + RawToken::LogicalOr => Self::LogicalOr, + RawToken::Assign => Self::Assign, + RawToken::MultiplyAssign => Self::MultiplyAssign, + RawToken::DivideAssign => Self::DivideAssign, + RawToken::ModuloAssign => Self::ModuloAssign, + RawToken::PlusAssign => Self::PlusAssign, + RawToken::MinusAssign => Self::MinusAssign, + RawToken::ConcatAssign => Self::ConcatAssign, + RawToken::ConcatSpaceAssign => Self::ConcatSpaceAssign, + RawToken::LeftParenthesis => Self::LeftParenthesis, + RawToken::RightParenthesis => Self::RightParenthesis, + RawToken::RightBrace => Self::RightBrace, + RawToken::LeftBracket => Self::LeftBracket, + RawToken::RightBracket => Self::RightBracket, + RawToken::Semicolon => Self::Semicolon, + RawToken::Comma => Self::Comma, + RawToken::Period => Self::Period, + RawToken::Colon => Self::Colon, + RawToken::Hash => Self::Hash, + RawToken::Question => Self::Question, + RawToken::LineComment => Self::LineComment, + RawToken::BlockComment => Self::BlockComment, + RawToken::Newline => Self::Newline, + RawToken::Whitespace => Self::Whitespace, + RawToken::Error => Self::Error, + } + } +} + +impl Token { + /// Returns `true` if this token is a newline. + #[must_use] + pub const fn is_newline(&self) -> bool { + matches!(self, Self::Newline) + } + + /// Returns `true` if this token is trivia whitespace. + /// + /// Note: comments are **not** considered whitespace. + #[must_use] + pub const fn is_whitespace(&self) -> bool { + matches!(self, Self::Whitespace | Self::Newline) + } + + /// Returns `true` if this token may span multiple physical lines + /// (i.e. can contain newline characters). + #[must_use] + pub const fn can_span_lines(&self) -> bool { + matches!(self, Self::BlockComment | Self::CppBlock | Self::Error) + } + + /// Returns `true` if this token can appear in type position + /// (either a built-in type keyword or an identifier). + #[must_use] + pub fn is_valid_type_name(&self) -> bool { + let Self::Keyword(keyword) = self else { + return *self == Self::Identifier; + }; + keyword.is_valid_type_name() + } + + /// Returns `true` if this token can be used as an identifier. + /// + /// This includes [`Token::Identifier`] and certain keywords that + /// `UnrealScript` also accepts in identifier position. + #[must_use] + pub fn is_valid_identifier_name(&self) -> bool { + if *self == Self::Identifier { + return true; + } + if let Self::Keyword(keyword) = self { + return keyword.is_valid_identifier_name(); + } + false + } + + /// Returns `true` if this token can be used as function's modifier. + #[must_use] + pub const fn is_valid_function_modifier(&self) -> bool { + let Self::Keyword(keyword) = self else { + return false; + }; + matches!( + keyword, + Keyword::Final + | Keyword::Native + | Keyword::Abstract + | Keyword::Transient + | Keyword::Public + | Keyword::Protected + | Keyword::Private + | Keyword::Static + | Keyword::Const + | Keyword::Deprecated + | Keyword::NoExport + | Keyword::Export + | Keyword::Simulated + | Keyword::Latent + | Keyword::Iterator + | Keyword::Singular + | Keyword::Reliable + | Keyword::Unreliable + | Keyword::NativeReplication + | Keyword::PreOperator + | Keyword::Operator + | Keyword::PostOperator + | Keyword::Config + | Keyword::Exec + ) + } +} + +/// Reserved words of Fermented `UnrealScript`. +/// +/// These are represented in [`Token`] as [`Token::Keyword`]. +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +pub enum Keyword { + // C++ blocks + CppText, + CppStruct, + // Top-level declaration + Class, + Struct, + Enum, + State, + Auto, + Function, + Event, + Delegate, + Var, + Local, + // Class modifiers + Extends, + DependsOn, + // Access modifiers + Private, + Protected, + Public, + Const, + // Meta data / specifiers + Static, + Native, + Abstract, + Deprecated, + SafeReplace, + ExportStructs, + Input, + Final, + Default, + DefaultProperties, + Object, + Begin, + End, + Optional, + Config, + PerObjectConfig, + GlobalConfig, + CollapseCategories, + DontCollapseCategories, + HideCategories, + ShowCategories, + Localized, + Placeable, + NotPlaceable, + Instanced, + EditConst, + EditConstArray, + EditInline, + EditInlineUse, + EditInlineNew, + NotEditInlineNew, + EdFindable, + EditInlineNotify, + ParseConfig, + Automated, + DynamicRecompile, + Transient, + Long, + Operator, + PreOperator, + PostOperator, + Simulated, + Exec, + Latent, + Iterator, + Out, + Skip, + Singular, + Coerce, + Assert, + Ignores, + Within, + Init, + Export, + NoExport, + HideDropdown, + Travel, + Cache, + CacheExempt, + // Replication + Reliable, + Unreliable, + Replication, + NativeReplication, + // Control flow + Goto, + If, + Else, + Switch, + Case, + For, + ForEach, + While, + Do, + Until, + Break, + Continue, + Return, + // Built-in types + Int, + Float, + Bool, + Byte, + String, + Array, + Name, + // Literals + True, + False, + None, + SelfValue, + New, + // Vector math operators + Dot, + Cross, + ClockwiseFrom, +} + +impl Keyword { + /// Returns `true` if this keyword can be used as an identifier. + #[must_use] + pub const fn is_valid_identifier_name(self) -> bool { + matches!( + self, + // Built-in type words usable as identifiers + Self::Name + | Self::String + | Self::Byte + | Self::Int + | Self::Bool + | Self::Float + | Self::Array + | Self::Delegate + // Context keywords we've directly checked + | Self::Class + | Self::SelfValue + | Self::Default + | Self::Static + | Self::Simulated + | Self::Native + | Self::Latent + | Self::Iterator + | Self::Singular + | Self::Reliable + | Self::Unreliable + | Self::Transient + | Self::Const + | Self::Abstract + | Self::New + | Self::Extends + | Self::Within + | Self::Config + | Self::Out + | Self::Optional + | Self::Local + | Self::Var + | Self::DefaultProperties + | Self::PerObjectConfig + | Self::Object + | Self::Enum + | Self::End + | Self::Event + | Self::Switch + | Self::Goto + | Self::Cross + | Self::CppText + | Self::CppStruct + | Self::HideCategories + | Self::Auto + | Self::For + | Self::Skip + | Self::Placeable + | Self::NotPlaceable + | Self::Instanced + | Self::Function + | Self::State + | Self::Init + | Self::Export + | Self::NoExport + | Self::Dot + | Self::ClockwiseFrom + | Self::Assert + | Self::ExportStructs + | Self::SafeReplace + | Self::Input + | Self::Travel + | Self::Cache + | Self::CacheExempt + | Self::Long + | Self::Continue + ) + } + + /// Returns `true` if this keyword can appear in type position. + #[must_use] + pub const fn is_valid_type_name(self) -> bool { + matches!( + self, + Self::Int + | Self::Float + | Self::Bool + | Self::Byte + | Self::String + | Self::Array + | Self::Name + | Self::Object + | Self::Function + | Self::State + | Self::Delegate + ) + } +} diff --git a/rottlib/src/parser/cursor.rs b/rottlib/src/parser/cursor.rs index 5182839..8719452 100644 --- a/rottlib/src/parser/cursor.rs +++ b/rottlib/src/parser/cursor.rs @@ -1,48 +1,45 @@ //! Cursor utilities for a token stream. //! -//! Provides memoized lookahead over significant tokens and attaches -//! trivia to [`TriviaComponent`]. Significant tokens exclude whitespace and -//! comments; see [`crate::parser::TriviaKind`]. +//! Provides memoized lookahead over significant tokens and records trivia in +//! [`TriviaIndexBuilder`]. Significant tokens exclude whitespace and comments; +//! see [`parser::TriviaKind`]. -use crate::lexer::{Token, TokenLocation}; -use crate::parser::trivia::TriviaComponent; +use std::collections::VecDeque; + +use crate::{ + ast::AstSpan, + lexer::{self, Keyword, Token, TokenPosition}, + parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder}, +}; /// Cursor over a token stream with memoized lookahead and trivia attachment. #[derive(Clone, Debug)] -pub(crate) struct CursorComponent<'src> { - /// Underlying token stream. - tokens: crate::lexer::Tokens<'src>, - /// Significant-token lookahead buffer. - lookahead_buffer: std::collections::VecDeque<(TokenLocation, crate::lexer::TokenPiece<'src>)>, - /// Location of the last consumed token. - previous_location: Option, - /// Location of the last significant token. - /// - /// Used to associate following trivia with the correct token. - last_significant_location: Option, - /// Scratch space for [`CursorComponent::buffer_next_significant_token`], - /// used to avoid reallocations. - trivia_buffer: Vec>, +pub(crate) struct Cursor<'file, 'src> { + tokens: lexer::Tokens<'file, 'src>, + lookahead_buffer: VecDeque<(TokenPosition, lexer::TokenData<'src>)>, + last_consumed_position: Option, } -impl<'src> CursorComponent<'src> { - /// Create a [`CursorComponent`] over the tokens of `file`. - pub(crate) fn new(tokenized_file: &'src crate::lexer::TokenizedFile<'src>) -> Self { +impl<'file, 'src> Cursor<'file, 'src> { + /// Creates a [`Cursor`] over `tokenized_file`. + pub(crate) const fn new(tokenized_file: &'file lexer::TokenizedFile<'src>) -> Self { Self { - tokens: tokenized_file.tokens(), - lookahead_buffer: std::collections::VecDeque::new(), - previous_location: None, - last_significant_location: None, - trivia_buffer: Vec::new(), + tokens: tokenized_file.iter(), + lookahead_buffer: VecDeque::new(), + last_consumed_position: None, } } - /// Ensure the lookahead buffer contains at least `lookahead + 1` - /// significant tokens. + /// Ensures that the lookahead buffer contains at least `lookahead + 1` + /// significant tokens, if available. /// - /// May consume trivia from the underlying stream. - /// Does not consume significant tokens. - fn ensure_min_lookahead(&mut self, lookahead: usize, trivia: &mut TriviaComponent<'src>) { + /// May consume trivia from the underlying stream without consuming + /// significant tokens. + fn ensure_lookahead_available( + &mut self, + lookahead: usize, + trivia: &mut TriviaIndexBuilder<'src>, + ) { while self.lookahead_buffer.len() <= lookahead { if !self.buffer_next_significant_token(trivia) { break; @@ -50,181 +47,320 @@ impl<'src> CursorComponent<'src> { } } - /// Scan to the next significant token, recording intervening trivia. + /// Buffers the next significant token and records any preceding trivia. /// - /// Returns `true` if a significant token was buffered, - /// `false` on end of file. - fn buffer_next_significant_token(&mut self, trivia: &mut TriviaComponent<'src>) -> bool { - self.trivia_buffer.clear(); - while let Some((token_location, token_piece)) = self.tokens.next() { - if let Ok(trivia_kind) = crate::parser::TriviaKind::try_from(token_piece.token) { - self.trivia_buffer.push(crate::parser::TriviaToken { + /// Returns `true` if a significant token was buffered, or `false` if the + /// stream is exhausted. + fn buffer_next_significant_token(&mut self, trivia: &mut TriviaIndexBuilder<'src>) -> bool { + for (token_position, token_data) in self.tokens.by_ref() { + if let Ok(trivia_kind) = parser::TriviaKind::try_from(token_data.token) { + trivia.record_trivia(parser::TriviaToken { kind: trivia_kind, - text: token_piece.lexeme, - location: token_location, + text: token_data.lexeme, + position: token_position, }); } else { - // Attach trivia found after the previous significant token - if !self.trivia_buffer.is_empty() { - trivia.record_between_locations( - self.last_significant_location, - token_location, - &mut self.trivia_buffer, - ); - } + trivia.record_significant_token(token_position); self.lookahead_buffer - .push_back((token_location, token_piece)); - self.last_significant_location = Some(token_location); + .push_back((token_position, token_data)); return true; } } - // Reached end-of-file: attach trailing trivia - if !self.trivia_buffer.is_empty() { - trivia.record_between_locations( - self.last_significant_location, - TokenLocation::EndOfFile, - &mut self.trivia_buffer, - ); - } false } } -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Returns the next token without consuming it. +impl<'src, 'arena> Parser<'src, 'arena> { + fn peek_buffered_token(&mut self) -> Option<&(TokenPosition, lexer::TokenData<'src>)> { + self.cursor.ensure_lookahead_available(0, &mut self.trivia); + self.cursor.lookahead_buffer.front() + } + + /// Returns the next significant token without consuming it. + /// + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token(&mut self) -> Option { - self.peek_entry().map(|(_, token_piece)| token_piece.token) + self.peek_buffered_token() + .map(|(_, token_data)| token_data.token) } - /// Returns the next token, its lexeme, and its location - /// without consuming it. + /// Returns the next keyword without consuming it. + /// + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. + /// + /// Returns [`None`] if no tokens remain or if the next token is not + /// a keyword. + #[must_use] + pub(crate) fn peek_keyword(&mut self) -> Option { + match self.peek_token() { + Some(Token::Keyword(keyword)) => Some(keyword), + _ => None, + } + } + + /// Returns the position of the next significant token without consuming it. + /// + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] - pub(crate) fn peek_token_lexeme_and_location( - &mut self, - ) -> Option<(Token, &'src str, TokenLocation)> { - self.peek_entry().map(|(token_location, token_piece)| { - (token_piece.token, token_piece.lexeme, *token_location) - }) + pub(crate) fn peek_position(&mut self) -> Option { + self.peek_buffered_token() + .map(|(token_position, _)| *token_position) } - /// Returns the next token and its lexeme without consuming it. + /// Returns the next significant token and its lexeme without consuming it. + /// + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> { - self.peek_entry() - .map(|(_, token_piece)| (token_piece.token, token_piece.lexeme)) + self.peek_buffered_token() + .map(|(_, token_data)| (token_data.token, token_data.lexeme)) } - /// Returns the next token and its location without consuming it. + /// Returns the next significant token and its position without consuming + /// it. + /// + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] - pub(crate) fn peek_token_and_location(&mut self) -> Option<(Token, TokenLocation)> { - self.peek_entry() - .map(|(token_location, token_piece)| (token_piece.token, *token_location)) + pub(crate) fn peek_token_and_position(&mut self) -> Option<(Token, TokenPosition)> { + self.peek_buffered_token() + .map(|(token_position, token_data)| (token_data.token, *token_position)) } - /// Returns the location of the next token, or [`TokenLocation::EndOfFile`] - /// if none remain. - #[must_use] - pub(crate) fn peek_location(&mut self) -> TokenLocation { - self.peek_entry() - .map(|(token_location, _)| *token_location) - .unwrap_or(TokenLocation::EndOfFile) - } - - /// Returns the location of the last token that was actually consumed - /// by [`crate::parser::Parser::advance`]. + /// Returns the next keyword and its position without consuming it. /// - /// Returns [`None`] if no tokens have been consumed yet. + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. + /// + /// Returns [`None`] if next token isn't keyword or no tokens remain. #[must_use] - pub(crate) fn last_consumed_location(&self) -> Option { - self.cursor.previous_location + pub(crate) fn peek_keyword_and_position(&mut self) -> Option<(Keyword, TokenPosition)> { + let Some((Token::Keyword(keyword), keyword_position)) = self.peek_token_and_position() + else { + return None; + }; + Some((keyword, keyword_position)) } - /// Returns the most recent location the parser is "at". + /// Returns the next significant token, its lexeme, and its position + /// without consuming them. /// - /// If at least one token has been consumed, this is the location of the - /// last consumed token. Otherwise it falls back to the location of the - /// first significant token in the stream (or [`TokenLocation::EndOfFile`] - /// if the stream is empty). + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. + /// + /// Returns [`None`] if no tokens remain. #[must_use] - pub(crate) fn last_visited_location(&mut self) -> TokenLocation { - // Only has to `unwrap` before *any* characters were consumed - self.last_consumed_location() - .unwrap_or_else(|| self.peek_location()) + pub(crate) fn peek_token_lexeme_and_position( + &mut self, + ) -> Option<(Token, &'src str, TokenPosition)> { + self.peek_buffered_token() + .map(|(token_position, token_data)| { + (token_data.token, token_data.lexeme, *token_position) + }) } - /// Peeks the token at `lookahead` (`0` is the next token) - /// without consuming. + /// Returns the next significant token at `lookahead` without consuming it. /// - /// Returns `None` if the stream ends before that position. + /// `lookahead` counts significant tokens, with `0` referring to the next + /// significant token. + /// + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. + /// + /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option { self.cursor - .ensure_min_lookahead(lookahead, &mut self.trivia); + .ensure_lookahead_available(lookahead, &mut self.trivia); self.cursor .lookahead_buffer .get(lookahead) - .map(|(_, token_piece)| token_piece.token) + .map(|(_, token_data)| token_data.token) + } + + /// Returns the keyword at `lookahead` without consuming it. + /// + /// `lookahead` counts significant tokens, with `0` referring to the next + /// significant token. + /// + /// May buffer additional tokens and record skipped trivia, but does not + /// consume any significant token. + /// + /// Returns [`None`] if the token at that position is not a keyword or if + /// the stream ends before that position. + #[must_use] + pub(crate) fn peek_keyword_at(&mut self, lookahead: usize) -> Option { + match self.peek_token_at(lookahead) { + Some(Token::Keyword(keyword)) => Some(keyword), + _ => None, + } + } + + /// Returns the position of the next significant token without consuming it. + /// + /// Generates an error with `error_kind` if no tokens remain. + pub(crate) fn require_position( + &mut self, + error_kind: parser::ParseErrorKind, + ) -> ParseResult<'src, 'arena, TokenPosition> { + self.peek_position() + .ok_or_else(|| self.make_error_here(error_kind)) + } + + /// Returns the next significant token and its position without consuming + /// it. + /// + /// Generates an error with `error_kind` if no tokens remain. + pub(crate) fn require_token_and_position( + &mut self, + error_kind: parser::ParseErrorKind, + ) -> ParseResult<'src, 'arena, (Token, TokenPosition)> { + self.peek_token_and_position() + .ok_or_else(|| self.make_error_here(error_kind)) + } + + /// Returns the next significant token, its lexeme, and its position + /// without consuming them. + /// + /// Generates an error with `error_kind` if no tokens remain. + pub(crate) fn require_token_lexeme_and_position( + &mut self, + error_kind: parser::ParseErrorKind, + ) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> { + self.peek_token_lexeme_and_position() + .ok_or_else(|| self.make_error_here(error_kind)) } /// Advances by one significant token. /// - /// Trivia is internally handled and recorded. - /// Does nothing at the end-of-file. - pub(crate) fn advance(&mut self) { - self.cursor.ensure_min_lookahead(0, &mut self.trivia); - if let Some((location, _)) = self.cursor.lookahead_buffer.pop_front() { - self.cursor.previous_location = Some(location); + /// Records any skipped trivia and returns the consumed token position. + /// Returns [`None`] if no significant tokens remain. + pub(crate) fn advance(&mut self) -> Option { + self.cursor.ensure_lookahead_available(0, &mut self.trivia); + if let Some((token_position, _)) = self.cursor.lookahead_buffer.pop_front() { + self.cursor.last_consumed_position = Some(token_position); + Some(token_position) + } else { + None } } - /// If the next token equals `token`, consumes it and returns `true`. + /// If the next significant token equals `token`, consumes it and + /// returns `true`. /// /// Otherwise leaves the cursor unchanged and returns `false`. - /// Trivia is recorded automatically. + #[must_use] pub(crate) fn eat(&mut self, token: Token) -> bool { - let correct_token = self.peek_token() == Some(token); - if correct_token { + if self.peek_token() == Some(token) { self.advance(); + true + } else { + false } - correct_token } - /// Centralized peek used by public peekers. - fn peek_entry(&mut self) -> Option<&(TokenLocation, crate::lexer::TokenPiece<'src>)> { - self.cursor.ensure_min_lookahead(0, &mut self.trivia); - self.cursor.lookahead_buffer.front() - } - - /// Expects `expected` at the current position. + /// If the next significant token corresponds to the given keyword, + /// consumes it and returns `true`. /// - /// On match consumes the token and returns its [`TokenLocation`]. - /// Otherwise returns a [`crate::parser::ParseError`] of - /// the given [`crate::parser::ParseErrorKind`] that carries the current - /// span for diagnostics. + /// Otherwise leaves the cursor unchanged and returns `false`. + #[must_use] + pub(crate) fn eat_keyword(&mut self, keyword: Keyword) -> bool { + self.eat(Token::Keyword(keyword)) + } + + /// Expects `expected` token as the next significant one. + /// + /// On match consumes the token and returns its [`TokenPosition`]. + /// Otherwise returns an error of `error_kind` anchored at + /// the current token, or at the last consumed token if the stream is + /// exhausted. That error also gets set a blame span that contains exactly + /// that anchor point. pub(crate) fn expect( &mut self, expected: Token, - error_kind: crate::parser::ParseErrorKind, - ) -> crate::parser::ParseResult<'src, 'arena, TokenLocation> { - let token_position = self.peek_location(); - // `Token` only includes type information, so comparison is valid + error_kind: parser::ParseErrorKind, + ) -> ParseResult<'src, 'arena, TokenPosition> { + // Anchors EOF diagnostics at the last consumed token + // when no current token exists. + let anchor = self + .peek_position() + .unwrap_or_else(|| self.last_consumed_position_or_start()); + // `Token` equality is enough here because lexeme and position + // are stored separately. if self.peek_token() == Some(expected) { self.advance(); - Ok(token_position) + Ok(anchor) } else { - Err(crate::parser::ParseError { - kind: error_kind, - source_span: crate::ast::AstSpan::new(token_position), - }) + Err(self + .make_error_at(error_kind, anchor) + .blame(AstSpan::new(anchor))) + } + } + + /// Expects `expected` keyword as the next significant token. + /// + /// On match consumes the keyword and returns its [`TokenPosition`]. + /// Otherwise returns an error of `error_kind` anchored at the current + /// token, or at the last consumed token if the stream is exhausted. + pub(crate) fn expect_keyword( + &mut self, + expected: Keyword, + error_kind: parser::ParseErrorKind, + ) -> ParseResult<'src, 'arena, TokenPosition> { + self.expect(Token::Keyword(expected), error_kind) + } + + /// Returns position of the last significant token that was actually + /// consumed by [`parser::Parser::advance`]. + /// + /// Returns [`None`] if no tokens have been consumed yet. + #[must_use] + pub(crate) const fn last_consumed_position(&self) -> Option { + self.cursor.last_consumed_position + } + + /// Returns the position of the last significant token consumed by + /// [`parser::Parser::advance`], or the start of the stream if no token has + /// been consumed yet. + /// + /// Useful when diagnostics need a stable anchor even at the beginning of + /// input. + #[must_use] + pub(crate) fn last_consumed_position_or_start(&self) -> TokenPosition { + self.cursor + .last_consumed_position + .unwrap_or(TokenPosition(0)) + } + + /// Ensures that parsing has advanced past `old_position`. + /// + /// This is intended as a safeguard against infinite-loop bugs while + /// recovering from invalid input. In debug builds it asserts that progress + /// was made; in release builds it consumes one significant token when + /// the parser stalls. + #[track_caller] + pub(crate) fn ensure_forward_progress(&mut self, old_position: TokenPosition) { + if let Some(peeked_position) = self.peek_position() { + debug_assert!( + peeked_position > old_position, + "parser made no forward progress" + ); + if peeked_position <= old_position { + self.advance(); + } } } } diff --git a/rottlib/src/parser/errors.rs b/rottlib/src/parser/errors.rs index c58c0e4..c33bfaa 100644 --- a/rottlib/src/parser/errors.rs +++ b/rottlib/src/parser/errors.rs @@ -1,6 +1,6 @@ //! Submodule with parsing related errors. -use crate::ast::AstSpan; +use crate::{ast::AstSpan, lexer::TokenPosition}; /// Internal parse error kinds. /// @@ -14,13 +14,89 @@ use crate::ast::AstSpan; /// `UnexpectedToken`, `MultipleDefaults`, etc.). #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum ParseErrorKind { + // ================== New errors that are 100% used! ================== + // headline: empty parenthesized expression + // primary label on ): expected an expression before this \)'` + // secondary label on (: parenthesized expression starts here + // Remove the parentheses or put an expression inside them. + ParenthesizedExpressionEmpty { + left_parenthesis_position: TokenPosition, + }, + // headline: missing type argument in \class<...>`` + // primary label on > or insertion site: expected a type name here + // secondary label on < or on class: type argument list starts here + // help: Write a type name, for example \class`.` + ClassTypeMissingTypeArgument { + left_angle_bracket_position: TokenPosition, + }, + // headline: missing closing \>` in `class<...>`` + // primary label on offending following token or EOF: expected \>` before this token` or at EOF: expected \>` here` + // secondary label on <: this \<` starts the type argument` + // help: Add \>` to close the class type expression.` + ClassTypeMissingClosingAngleBracket { + left_angle_bracket_position: TokenPosition, + }, + // headline: missing closing \)'` + // primary label on the point where ) was expected: expected \)' here` or, if you have a real token there, expected \)' before this token` + // secondary label on the opening (: this \(` starts the parenthesized expression` + // help: Add \)' to close the expression.` + ParenthesizedExpressionMissingClosingParenthesis { + left_parenthesis_position: TokenPosition, + }, + // headline: expected expression + // primary label: this token cannot start an expression + // optional help: Expressions can start with literals, identifiers, \(`, `{`, or expression keywords.` + ExpressionExpected, + // headline: invalid type argument in \class<...>`` + // primary label on the bad token inside the angle brackets: expected a qualified type name here + // secondary label on class or <: while parsing this class type expression + // note: Only a type name is accepted between \<` and `>` here.` + ClassTypeInvalidTypeArgument { + left_angle_bracket_position: TokenPosition, + }, + // headline: too many arguments in \new(...)`` + // primary label on the fourth argument, or on the comma before it if that is easier: unexpected extra argument + // secondary label on the opening (: this argument list accepts at most three arguments + // note: The three slots are \outer`, `name`, and `flags`.` + // help: Remove the extra argument. + NewTooManyArguments { + left_parenthesis_position: TokenPosition, + }, + // headline: missing closing \)' in `new(...)`` + // primary label: expected \)' here` + // secondary label on the opening (: this argument list starts here + // help: Add \)' to close the argument list.` + NewMissingClosingParenthesis { + left_parenthesis_position: TokenPosition, + }, + // missing class specifier in \new` expression` + // Primary label on the first token where a class specifier should have started: expected a class specifier here + // Secondary label on new: \new` expression starts here` If there was an argument list, an additional secondary on ( is also reasonable: optional \new(...)` arguments end here` + // Help: Add the class or expression to instantiate after \new` or `new(...)`.` + NewMissingClassSpecifier { + new_keyword_position: TokenPosition, + }, + // ================== Old errors to be thrown away! ================== /// Expression inside `(...)` could not be parsed and no closing `)` /// was found. - ExpressionMissingClosingParenthesis, + FunctionCallMissingClosingParenthesis, /// A `do` block was not followed by a matching `until`. DoMissingUntil, /// Found an unexpected token while parsing an expression. ExpressionUnexpectedToken, + DeclEmptyVariableDeclarations, + DeclNoSeparatorBetweenVariableDeclarations, + DeclExpectedRightBracketAfterArraySize, + DeclExpectedCommaAfterVariableDeclarator, + TypeSpecExpectedType, + TypeSpecInvalidNamedTypeName, + + TypeSpecArrayMissingOpeningAngle, + TypeSpecArrayMissingInnerType, + TypeSpecArrayMissingClosingAngle, + + TypeSpecClassMissingInnerType, + TypeSpecClassMissingClosingAngle, /// A `for` loop is missing its opening `(`. ForMissingOpeningParenthesis, /// The first `;` in `for (init; cond; step)` is missing. @@ -33,6 +109,7 @@ pub enum ParseErrorKind { BlockMissingSemicolonAfterExpression, /// A statement inside a block is not terminated with `;`. BlockMissingSemicolonAfterStatement, + BlockMissingClosingBrace, /// `switch` has no body (missing matching braces). SwitchMissingBody, /// The first top-level item in a `switch` body is not a `case`. @@ -43,6 +120,7 @@ pub enum ParseErrorKind { SwitchDuplicateDefault, /// Found `case` arms after a `default` branch. SwitchCasesAfterDefault, + SwitchMissingClosingBrace, /// A `goto` was not followed by a label. GotoMissingLabel, /// Unexpected end of input while parsing. @@ -75,6 +153,184 @@ pub enum ParseErrorKind { /// Expected one of: integer, float, string, `true`, `false`, `none` /// or an identifier. DeclarationLiteralUnexpectedToken, + /// A class name was expected, but the current token is not an identifier. + /// + /// Emitted when parsing `class Foo` and the token after `class` is not an + /// identifier (so its string value cannot be extracted). + ClassNameNotIdentifier, + /// A parent class name after `extends` was expected, but the token is not + /// an identifier. + /// + /// Emitted when parsing `class Foo extends Bar` and the token after + /// `extends` is not an identifier. + ClassParentNameNotIdentifier, + /// A class declaration was not terminated with `;`. + /// + /// Emitted when the parser reaches the end of a class definition but + /// does not encounter the required semicolon. + ClassMissingSemicolon, + /// An identifier was expected inside optional parentheses, but the token + /// is not an identifier. + /// + /// Emitted by helpers that parse either `()` or bare ``. + ParenthesisedIdentifierNameNotIdentifier, + /// A `(` was seen before an identifier, but the matching `)` was not found. + /// + /// Emitted when parsing a parenthesised identifier like `(Foo)`. + ParenthesisedIdentifierMissingClosingParenthesis, + /// `HideCategories` is missing the opening `(` before the category list. + /// + /// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`. + HideCategoriesMissingOpeningParenthesis, + /// `HideCategories` is missing the closing `)` after the category list. + HideCategoriesMissingClosingParenthesis, + /// `HideCategories` is missing the opening `(` before the category list. + /// + /// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`. + ShowCategoriesMissingOpeningParenthesis, + /// `HideCategories` is missing the closing `)` after the category list. + ShowCategoriesMissingClosingParenthesis, + /// `Within` must be followed by a class or package name identifier. + /// + /// Example: `Within(MyOuterClass)`. + WithinNameNotIdentifier, + /// `operator` modifier is missing the opening `(` before + /// the precedence rank. + /// + /// Expected syntax: `operator()`. + OperatorMissingOpeningParenthesis, + /// `operator(<...>)` must contain an integer literal precedence rank. + /// + /// Emitted when the token inside parentheses is not an integer literal. + OperatorPrecedenceNotIntegerLiteral, + /// `operator(` is missing the closing `)`. + OperatorMissingClosingParenthesis, + ParamInvalidTypeName, + ParamMissingIdentifier, + FunctionReturnTypeNotTypeName, + FunctionNameNotIdentifier, + FunctionParamsMissingOpeningParenthesis, + FunctionParamsMissingClosingParenthesis, + ClassUnexpectedItem, + EnumMissingLeftBrace, + EnumBadVariant, + StructFieldMissingName, + StructFieldMissingSemicolon, + StructMissingRightBrace, + // Named enum/struct typedefs + EnumMissingKeyword, // class member: expected `enum` + EnumExpectedNameOrBrace, // after `enum`, expected identifier + EnumNoClosingBrace, + EnumEmptyVariants, + EnumNoSeparatorBetweenVariants, + EnumMissingLBrace, + StructMissingKeyword, // class member: expected `struct` + StructExpectedNameOrBrace, // after `struct`, expected identifier + StructExpectedExtendsOrBrace, + StructMissingLeftBrace, + StructExpectedBaseName, + StructBodyUnexpectedItem, + CppDirectiveMissingCppBlock, + + // var(...) field decls + VarMissingKeyword, // class member: expected `var` + VarSpecsMissingOpeningParenthesis, // after `var`, expected '(' + VarSpecNotIdentifier, // inside var(...), expected identifier + VarSpecsMissingClosingParenthesis, // var(...) missing ')' + + // Generic decl end + DeclMissingSemicolon, // class-level declaration missing `;` + // --- Replication --- + ReplicationMissingReliability, + ReplicationIfMissingOpeningParenthesis, + ReplicationIfMissingClosingParenthesis, + ReplicationMemberNotIdentifier, + ReplicationMemberMissingClosingParenthesis, + ReplicationRuleMissingSemicolon, + ReplicationMissingKeyword, + ReplicationMissingLBrace, + ReplicationMissingRBrace, + + // --- DefaultProperties --- + DefaultPropPathExpectedIdentifier, + DefaultPropIndexNotIntegerLiteral, + DefaultPropIndexMissingClosingParenthesis, + DefaultPropAssignMissingEq, + DefaultPropsMissingKeyword, + DefaultPropsMissingLBrace, + DefaultPropsMissingRBrace, + + // --- Begin/End Object headers --- + ObjectBeginMissingKeyword, + ObjectMissingKeyword, + ObjectHeaderKeyNotIdentifier, + ObjectHeaderMissingEq, + + // --- State / ignores --- + IgnoresItemNotIdentifier, + IgnoresMissingSemicolon, + StateMissingKeyword, + StateNameNotIdentifier, + StateParentNameNotIdentifier, + StateMissingLBrace, + StateMissingRBrace, + + ClassMissingKeyword, + TypeMissingLT, + TypeMissingGT, + StateParensMissingRParen, + BadTypeInClassTypeDeclaration, + IdentifierExpected, + + // --- Generic list diagnostics (comma-separated, closed by `)`) --- + /// Saw `)` immediately after `(`, or closed the list without any items. + /// Use when a construct requires at least one item: e.g. `HideCategories(...)`. + ListEmpty, + + /// Parser was positioned where an item was required but found neither an + /// item nor a terminator. Typical triggers: + /// - Leading comma: `(, Foo)` + /// - Double comma: `(Foo,, Bar)` + /// - Garbage in place of an item: `(@@, Foo)` + /// + /// Recovery: skip to next comma or `)`. + ListMissingIdentifierBeforeSeparator, + + /// Parser was positioned where an item was required but found neither an + /// item nor a terminator. Typical triggers: + /// - Leading comma: `(, Foo)` + /// - Double comma: `(Foo,, Bar)` + /// - Garbage in place of an item: `(@@, Foo)` + /// + /// Recovery: skip to next comma or `)`. + ListInvalidIdentifier, + + /// Two items without a comma (or some token after an item where a comma + /// was required). Typical triggers: + /// - Adjacent identifiers: `(Foo Bar)` + /// - Token after an item where only `,` or `)` are valid. + /// + /// Recovery: behave as if a comma were present; continue with the next item. + ListMissingSeparator, + + /// Comma directly before `)`: `(Foo, )`. + /// Treat as a soft error or warning, depending on your policy. + ListTrailingSeparator, + FunctionArgumentMissingComma, + // Expression was required, but none started + MissingExpression, + MissingBranchBody, + CallableExpectedHeader, + CallableExpectedKind, + CallableOperatorInvalidPrecedence, + CallableMissingBodyOrSemicolon, + CallableNameNotIdentifier, + CallablePrefixOperatorInvalidSymbol, + CallableInfixOperatorInvalidSymbol, + CallablePostfixOperatorInvalidSymbol, + CallableParamsMissingOpeningParenthesis, + CallableParamsMissingClosingParenthesis, + NativeModifierIdNotIntegerLiteral, } /// Enumerates all specific kinds of parsing errors that the parser can emit. @@ -83,18 +339,32 @@ pub enum ParseErrorKind { pub struct ParseError { /// The specific kind of parse error that occurred. pub kind: ParseErrorKind, + pub anchor: TokenPosition, + /// Where the user should look first. + pub blame_span: AstSpan, /// The source span in which the error was detected. - pub source_span: AstSpan, + pub covered_span: AstSpan, + pub related_span: Option, } pub type ParseResult<'src, 'arena, T> = Result; -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - #[must_use] - pub(crate) fn make_error_here(&mut self, error_kind: ParseErrorKind) -> ParseError { +impl crate::parser::Parser<'_, '_> { + pub(crate) fn make_error_here(&self, error_kind: ParseErrorKind) -> ParseError { + self.make_error_at(error_kind, self.last_consumed_position_or_start()) + } + + pub(crate) fn make_error_at( + &self, + error_kind: ParseErrorKind, + position: TokenPosition, + ) -> ParseError { ParseError { kind: error_kind, - source_span: AstSpan::new(self.peek_location()), + anchor: position, + blame_span: AstSpan::new(position), + covered_span: AstSpan::new(position), + related_span: None, } } } diff --git a/rottlib/src/parser/grammar/block.rs b/rottlib/src/parser/grammar/block.rs deleted file mode 100644 index d79e856..0000000 --- a/rottlib/src/parser/grammar/block.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::ast::Expression; -use crate::lexer::Token; -use crate::parser::ParseErrorKind; - -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Parses a block `{ ... }` after `{`. - /// - /// Consumes tokens until the matching `}` and returns - /// an [`Expression::Block`] spanning from the opening `{` to - /// the closing `}`. - /// Returns a best-effort block on premature end-of-file. - #[must_use] - pub(crate) fn parse_block_cont( - &mut self, - block_start_location: crate::lexer::TokenLocation, - ) -> crate::ast::ExpressionRef<'src, 'arena> { - let mut statements = self.arena.vec(); - let mut tail = None; - loop { - let Some((token, token_location)) = self.peek_token_and_location() else { - self.report_error_here(ParseErrorKind::UnexpectedEndOfFile); - return self.arena.alloc( - Expression::Block { statements, tail }, - crate::ast::AstSpan { - from: block_start_location, - to: self.peek_location(), - }, - ); - }; - if let Token::RightBrace = token { - self.advance(); // '}' - let block_span = crate::ast::AstSpan { - from: block_start_location, - to: token_location, - }; - return self - .arena - .alloc(Expression::Block { statements, tail }, block_span); - } - // We know that at this point: - // 1. There is still a token and it is not end-of-file; - // 2. It isn't end of the block. - // So having a tail statement there is a problem! - if let Some(tail_expression) = tail { - self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression); - let tail_span = *tail_expression.span(); - let node = self.arena.alloc( - crate::ast::Statement::Expression(tail_expression), - tail_span, - ); - statements.push(node); - } - tail = self.parse_block_item(&mut statements); - // Ensure forward progress under errors to avoid infinite loops. - if self.peek_location() <= token_location { - self.advance(); - } - } - } -} diff --git a/rottlib/src/parser/grammar/class.rs b/rottlib/src/parser/grammar/class.rs new file mode 100644 index 0000000..1bc5133 --- /dev/null +++ b/rottlib/src/parser/grammar/class.rs @@ -0,0 +1,959 @@ +// rottlib/src/parser/grammar/class.rs + +#![allow(clippy::all, clippy::pedantic, clippy::nursery)] + +use crate::ast::{ + AstSpan, BlockBody, ClassConstDecl, ClassConstDeclRef, ClassDeclaration, ClassDefinition, + ClassMember, ClassModifier, ClassModifierRef, ClassVarDecl, ClassVarDeclRef, + DeclarationLiteral, DeclarationLiteralRef, ExecDirective, ExecDirectiveRef, ExpressionRef, + IdentifierToken, Reliability, ReplicationBlock, ReplicationBlockRef, ReplicationRule, + ReplicationRuleRef, StateDecl, StateDeclRef, StateModifier, VariableDeclarator, + VariableDeclaratorRef, +}; +use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt, SyncLevel}; + +impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { + #[inline] + pub fn ensure_progress_or_break(&mut self, before: TokenPosition) -> bool { + match self.peek_position() { + Some(position) if position > before => true, + _ => self.advance().is_some(), + } + } + + fn parse_exec_directive(&mut self) -> ParseResult<'src, 'arena, ExecDirectiveRef<'arena>> { + let (token, lexeme, start_position) = + self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?; + debug_assert!(matches!(token, Token::ExecDirective)); + + let trimmed = lexeme.trim_end_matches(['\r', '\n']); + self.advance(); + + let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + Ok(self.arena.alloc_node( + ExecDirective { + text: self.arena.string(trimmed), + span, + }, + span, + )) + } + + fn parse_parenthesised_identifier(&mut self) -> ParseResult<'src, 'arena, IdentifierToken> { + let has_opening_parenthesis = self.eat(Token::LeftParenthesis); + let identifier = + self.parse_identifier(ParseErrorKind::ParenthesisedIdentifierNameNotIdentifier)?; + if has_opening_parenthesis { + self.expect( + Token::RightParenthesis, + ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + } + Ok(identifier) + } + + #[inline] + fn lookahead_state_after_mods(&mut self) -> bool { + let mut lookahead = 0; + loop { + match self.peek_keyword_at(lookahead) { + Some(Keyword::Auto | Keyword::Simulated) => { + lookahead += 1; + } + Some(Keyword::State) => return true, + _ => return false, + } + } + } + + pub fn parse_array_len_expr( + &mut self, + ) -> ParseResult<'src, 'arena, Option>> { + if !self.eat(Token::LeftBracket) { + return Ok(None); + } + + let expression = self.parse_expression(); + + self.expect( + Token::RightBracket, + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { + left_parenthesis_position: self.last_consumed_position_or_start(), + }, + ) + .sync_error_at(self, SyncLevel::CloseBracket)?; + + Ok(Some(expression)) + } + + pub fn parse_class_declaration_modifier( + &mut self, + ) -> ParseResult<'src, 'arena, Option>> { + use ClassModifier::{ + Abstract, CacheExempt, CollapseCategories, Config, Const, DependsOn, Deprecated, + DontCollapseCategories, DynamicRecompile, EditConst, EditInline, EditInlineNew, Export, + ExportStructs, Final, GlobalConfig, HideCategories, HideDropdown, Instanced, Localized, + Native, NativeReplication, NoExport, NotEditInlineNew, NotPlaceable, ParseConfig, + PerObjectConfig, Placeable, Private, Protected, Public, SafeReplace, ShowCategories, + Static, Transient, Within, + }; + + let Some((token, modifier_position)) = self.peek_token_and_position() else { + return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); + }; + + let mut consumed_inside_match = false; + let mut span = AstSpan::new(modifier_position); + + let modifier = match token { + Token::Keyword(Keyword::Final) => Final, + Token::Keyword(Keyword::Native) => Native, + Token::Keyword(Keyword::Abstract) => Abstract, + Token::Keyword(Keyword::Transient) => Transient, + Token::Keyword(Keyword::Public) => Public, + Token::Keyword(Keyword::Protected) => Protected, + Token::Keyword(Keyword::Private) => Private, + Token::Keyword(Keyword::Static) => Static, + Token::Keyword(Keyword::Const) => Const, + Token::Keyword(Keyword::Deprecated) => Deprecated, + Token::Keyword(Keyword::NoExport) => NoExport, + Token::Keyword(Keyword::Export) => Export, + + Token::Keyword(Keyword::Config) => { + self.advance(); + consumed_inside_match = true; + let value = if self.peek_token() == Some(Token::LeftParenthesis) { + Some(self.parse_parenthesised_identifier()?) + } else { + None + }; + Config(value) + } + + Token::Keyword(Keyword::Localized) => Localized, + Token::Keyword(Keyword::Placeable) => Placeable, + Token::Keyword(Keyword::NotPlaceable) => NotPlaceable, + Token::Keyword(Keyword::Instanced) => Instanced, + Token::Keyword(Keyword::EditConst) => EditConst, + Token::Keyword(Keyword::EditInline) => EditInline, + Token::Keyword(Keyword::EditInlineNew) => EditInlineNew, + Token::Keyword(Keyword::NotEditInlineNew) => NotEditInlineNew, + Token::Keyword(Keyword::CollapseCategories) => CollapseCategories, + Token::Keyword(Keyword::DontCollapseCategories) => DontCollapseCategories, + Token::Keyword(Keyword::GlobalConfig) => GlobalConfig, + Token::Keyword(Keyword::PerObjectConfig) => PerObjectConfig, + Token::Keyword(Keyword::DynamicRecompile) => DynamicRecompile, + Token::Keyword(Keyword::CacheExempt) => CacheExempt, + Token::Keyword(Keyword::HideDropdown) => HideDropdown, + Token::Keyword(Keyword::ParseConfig) => ParseConfig, + Token::Keyword(Keyword::NativeReplication) => NativeReplication, + Token::Keyword(Keyword::ExportStructs) => ExportStructs, + Token::Keyword(Keyword::SafeReplace) => SafeReplace, + + Token::Keyword(Keyword::HideCategories) => { + self.advance(); + consumed_inside_match = true; + self.expect( + Token::LeftParenthesis, + ParseErrorKind::HideCategoriesMissingOpeningParenthesis, + )?; + let categories = self.parse_identifier_list(); + self.expect( + Token::RightParenthesis, + ParseErrorKind::HideCategoriesMissingClosingParenthesis, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis)?; + HideCategories(categories) + } + Token::Keyword(Keyword::ShowCategories) => { + self.advance(); + consumed_inside_match = true; + self.expect( + Token::LeftParenthesis, + ParseErrorKind::ShowCategoriesMissingOpeningParenthesis, + )?; + let categories = self.parse_identifier_list(); + self.expect( + Token::RightParenthesis, + ParseErrorKind::ShowCategoriesMissingClosingParenthesis, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis)?; + ShowCategories(categories) + } + Token::Keyword(Keyword::Within) => { + self.advance(); + consumed_inside_match = true; + Within(self.parse_identifier(ParseErrorKind::WithinNameNotIdentifier)?) + } + Token::Keyword(Keyword::DependsOn) => { + self.advance(); + consumed_inside_match = true; + DependsOn(self.parse_parenthesised_identifier()?) + } + + _ => return Ok(None), + }; + + if !consumed_inside_match { + self.advance(); + } + span.extend_to(self.last_consumed_position_or_start()); + Ok(Some(self.arena.alloc_node(modifier, span))) + } + + pub(crate) fn parse_class_header_cont( + &mut self, + ) -> ParseResult<'src, 'arena, ClassDeclaration<'arena>> + where + 'src: 'arena, + { + let class_name = self.parse_identifier(ParseErrorKind::ClassNameNotIdentifier)?; + + let parent_class_name = if self.eat_keyword(Keyword::Extends) { + let qualified_parent = + self.parse_qualified_identifier(ParseErrorKind::ClassParentNameNotIdentifier)?; + Some(qualified_parent) + } else { + None + }; + + let mut modifiers = Vec::new(); + loop { + match self.parse_class_declaration_modifier() { + Ok(Some(next_modifier)) => modifiers.push(next_modifier), + Ok(None) => break, + Err(error) => { + self.report_error(error); + break; + } + } + } + + self.expect(Token::Semicolon, ParseErrorKind::ClassMissingSemicolon)?; + Ok(ClassDeclaration { + name: class_name, + parent: parent_class_name.map(|identifier| identifier.head()), + modifiers, + }) + } + + fn parse_class_var_decl(&mut self) -> ParseResult<'src, 'arena, ClassVarDeclRef<'src, 'arena>> { + let start_position = self.expect( + Token::Keyword(Keyword::Var), + ParseErrorKind::VarMissingKeyword, + )?; + + let paren_specs = self.parse_var_editor_specifier_list(); + let modifiers = self.parse_var_declaration_modifiers(); + let type_spec = self.parse_type_specifier()?; + let declarators = self.parse_class_var_declarators(); + + self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?; + + let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + Ok(self.arena.alloc_node( + ClassVarDecl { + paren_specs, + modifiers, + type_spec, + declarators, + span, + }, + span, + )) + } + + fn parse_replication_rule( + &mut self, + ) -> ParseResult<'src, 'arena, ReplicationRuleRef<'src, 'arena>> { + let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?; + + let reliability = match self.peek_token() { + Some(Token::Keyword(Keyword::Reliable)) => { + self.advance(); + Reliability::Reliable + } + Some(Token::Keyword(Keyword::Unreliable)) => { + self.advance(); + Reliability::Unreliable + } + _ => return Err(self.make_error_here(ParseErrorKind::ReplicationMissingReliability)), + }; + + let condition = if self.eat_keyword(Keyword::If) { + self.expect( + Token::LeftParenthesis, + ParseErrorKind::ReplicationIfMissingOpeningParenthesis, + )?; + let expression = self.parse_expression(); + self.expect( + Token::RightParenthesis, + ParseErrorKind::ReplicationIfMissingClosingParenthesis, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis)?; + Some(expression) + } else { + None + }; + + let mut members = self.arena.vec(); + loop { + let identifier = + self.parse_identifier(ParseErrorKind::ReplicationMemberNotIdentifier)?; + members.push(identifier); + + if self.eat(Token::LeftParenthesis) { + self.expect( + Token::RightParenthesis, + ParseErrorKind::ReplicationMemberMissingClosingParenthesis, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + } + + if !self.eat(Token::Comma) { + break; + } + } + + self.expect( + Token::Semicolon, + ParseErrorKind::ReplicationRuleMissingSemicolon, + )?; + let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + Ok(self.arena.alloc_node( + ReplicationRule { + reliability, + condition, + members, + span, + }, + span, + )) + } + + fn parse_replication_block( + &mut self, + ) -> ParseResult<'src, 'arena, ReplicationBlockRef<'src, 'arena>> { + let start_position = self.expect( + Token::Keyword(Keyword::Replication), + ParseErrorKind::ReplicationMissingKeyword, + )?; + self.expect(Token::LeftBrace, ParseErrorKind::ReplicationMissingLBrace)?; + + let mut rules = self.arena.vec(); + while !matches!(self.peek_token(), Some(Token::RightBrace)) { + let loop_start = self + .peek_position() + .unwrap_or_else(|| self.last_consumed_position_or_start()); + + if self.peek_token().is_none() { + return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); + } + + match self.parse_replication_rule() { + Ok(rule) => rules.push(rule), + Err(error) => { + self.report_error(error); + self.recover_until(SyncLevel::Statement); + let _ = self.eat(Token::Semicolon); + if !self.ensure_progress_or_break(loop_start) { + break; + } + continue; + } + } + + if !self.ensure_progress_or_break(loop_start) { + break; + } + } + + self.expect(Token::RightBrace, ParseErrorKind::ReplicationMissingRBrace)?; + let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + Ok(self + .arena + .alloc_node(ReplicationBlock { rules, span }, span)) + } + + fn parse_ignores_clause( + &mut self, + ) -> ParseResult<'src, 'arena, Option>> { + if !self.eat_keyword(Keyword::Ignores) { + return Ok(None); + } + + let mut identifiers = self.arena.vec(); + loop { + let identifier = self.parse_identifier(ParseErrorKind::IgnoresItemNotIdentifier)?; + identifiers.push(identifier); + if !self.eat(Token::Comma) { + break; + } + } + + self.expect(Token::Semicolon, ParseErrorKind::IgnoresMissingSemicolon)?; + Ok(Some(identifiers)) + } + + fn parse_state_decl(&mut self) -> ParseResult<'src, 'arena, StateDeclRef<'src, 'arena>> { + let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?; + + let mut modifiers = self.arena.vec(); + loop { + match self.peek_keyword() { + Some(Keyword::Auto) => { + self.advance(); + modifiers.push(StateModifier::Auto); + } + Some(Keyword::Simulated) => { + self.advance(); + modifiers.push(StateModifier::Simulated); + } + _ => break, + } + } + + self.expect( + Token::Keyword(Keyword::State), + ParseErrorKind::StateMissingKeyword, + )?; + + loop { + match self.peek_keyword() { + Some(Keyword::Auto) => { + self.advance(); + modifiers.push(StateModifier::Auto); + } + Some(Keyword::Simulated) => { + self.advance(); + modifiers.push(StateModifier::Simulated); + } + _ => break, + } + } + + if self.eat(Token::LeftParenthesis) { + self.expect( + Token::RightParenthesis, + ParseErrorKind::StateParensMissingRParen, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + } + + let name = self.parse_identifier(ParseErrorKind::StateNameNotIdentifier)?; + let parent = if self.eat_keyword(Keyword::Extends) { + Some(self.parse_identifier(ParseErrorKind::StateParentNameNotIdentifier)?) + } else { + None + }; + + let opening_brace_position = + self.expect(Token::LeftBrace, ParseErrorKind::StateMissingLBrace)?; + let ignores = self.parse_ignores_clause()?; + let BlockBody { + statements: body, + span: inner_span, + } = self.parse_braced_block_statements_tail(opening_brace_position); + + let span = AstSpan::range(start_position, inner_span.token_to); + Ok(self.arena.alloc_node( + StateDecl { + name, + parent, + modifiers, + ignores, + body, + span, + }, + span, + )) + } + + pub(crate) fn parse_class_definition_cont( + &mut self, + ) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> { + let header = self.parse_class_header_cont()?; + let mut members = self.arena.vec(); + + loop { + let Some((token, member_start)) = self.peek_token_and_position() else { + break; + }; + + match token { + Token::Keyword(Keyword::DefaultProperties) => break, + + _ if self.lookahead_state_after_mods() => { + let state = self + .parse_state_decl() + .widen_error_span_from(member_start) + .unwrap_or_fallback(self); + let span = *state.span(); + members.push(self.arena.alloc_node(ClassMember::State(state), span)); + } + + _ if self.is_callable_header_ahead() => { + let callable = self.parse_callable_definition(); + let span = *callable.span(); + members.push(self.arena.alloc_node(ClassMember::Function(callable), span)); + } + + Token::Keyword(Keyword::Const) => { + let constant = self + .parse_class_const_decl() + .widen_error_span_from(member_start) + .unwrap_or_fallback(self); + let span = *constant.span(); + members.push(self.arena.alloc_node(ClassMember::Const(constant), span)); + } + + Token::Keyword(Keyword::Enum) + if !matches!(self.peek_token_at(1), Some(Token::LeftBrace)) => + { + self.advance(); + let enum_definition = self.parse_enum_definition_tail(member_start); + let span = *enum_definition.span(); + members.push( + self.arena + .alloc_node(ClassMember::TypeDefEnum(enum_definition), span), + ); + let _ = self.eat(Token::Semicolon); + } + + Token::Keyword(Keyword::Struct) => { + self.advance(); + let struct_definition = self.parse_struct_definition_tail(member_start); + let span = *struct_definition.span(); + members.push( + self.arena + .alloc_node(ClassMember::TypeDefStruct(struct_definition), span), + ); + let _ = self.eat(Token::Semicolon); + } + + Token::Keyword(Keyword::Var) => { + let variable_declaration = self + .parse_class_var_decl() + .widen_error_span_from(member_start) + .unwrap_or_fallback(self); + let span = *variable_declaration.span(); + members.push( + self.arena + .alloc_node(ClassMember::Var(variable_declaration), span), + ); + } + + Token::Keyword(Keyword::Replication) => { + let replication = self + .parse_replication_block() + .widen_error_span_from(member_start) + .unwrap_or_fallback(self); + let span = *replication.span(); + members.push( + self.arena + .alloc_node(ClassMember::Replication(replication), span), + ); + } + + Token::ExecDirective => { + let directive = self + .parse_exec_directive() + .widen_error_span_from(member_start) + .unwrap_or_fallback(self); + let span = *directive.span(); + members.push(self.arena.alloc_node(ClassMember::Exec(directive), span)); + } + + Token::Keyword(Keyword::CppText | Keyword::CppStruct) => { + self.advance(); + if !self.eat(Token::CppBlock) { + self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock); + } + } + + Token::Keyword(Keyword::Class) => break, + + Token::Semicolon => { + self.advance(); + } + + _ => { + self.report_error_here(ParseErrorKind::ClassUnexpectedItem); + while let Some(next_token) = self.peek_token() { + match next_token { + Token::Keyword( + Keyword::Function + | Keyword::Event + | Keyword::Enum + | Keyword::Struct + | Keyword::Var + | Keyword::Replication + | Keyword::State + | Keyword::Class + | Keyword::DefaultProperties, + ) => break, + _ => { + self.advance(); + } + } + } + } + } + + if !self.ensure_progress_or_break(member_start) { + break; + } + } + + Ok(ClassDefinition { header, members }) + } + + pub fn parse_source_file( + &mut self, + ) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> { + loop { + match self.peek_token() { + Some(Token::Semicolon) => { + self.advance(); + } + Some(Token::ExecDirective) => { + if let Err(error) = self.parse_exec_directive() { + self.report_error(error); + } + } + //Some(Token::Keyword(Keyword::Class)) | None => break, + _ => break, + } + } + + self.expect( + Token::Keyword(Keyword::Class), + ParseErrorKind::ClassMissingKeyword, + )?; + let class_definition = self.parse_class_definition_cont()?; + + if matches!( + self.peek_token(), + Some(Token::Keyword(Keyword::DefaultProperties)) + ) { + return Ok(class_definition); + } + + loop { + match self.peek_token() { + Some(Token::Semicolon) => { + self.advance(); + } + Some(_) => { + self.report_error_here(ParseErrorKind::ClassUnexpectedItem); + while self.peek_token().is_some() { + self.advance(); + } + break; + } + None => break, + } + } + + Ok(class_definition) + } + + fn decode_signed_integer_literal(&self, s: &str) -> ParseResult<'src, 'arena, i128> { + let (negative, body) = if let Some(rest) = s.strip_prefix('-') { + (true, rest) + } else if let Some(rest) = s.strip_prefix('+') { + (false, rest) + } else { + (false, s) + }; + + let magnitude: u128 = self.decode_unsigned_integer_magnitude(body)?; + + if negative { + const MIN_MAGNITUDE: u128 = 1u128 << 127; + if magnitude == MIN_MAGNITUDE { + Ok(i128::MIN) + } else { + let magnitude_as_i128 = i128::try_from(magnitude) + .map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))?; + Ok(-magnitude_as_i128) + } + } else { + i128::try_from(magnitude) + .map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) + } + } + + fn decode_unsigned_integer_magnitude(&self, body: &str) -> ParseResult<'src, 'arena, u128> { + use ParseErrorKind::InvalidNumericLiteral; + + if body.is_empty() { + return Err(self.make_error_here(InvalidNumericLiteral)); + } + + let (base, digits) = + if let Some(rest) = body.strip_prefix("0x").or_else(|| body.strip_prefix("0X")) { + (16u128, rest) + } else if let Some(rest) = body.strip_prefix("0b").or_else(|| body.strip_prefix("0B")) { + (2u128, rest) + } else if let Some(rest) = body.strip_prefix("0o").or_else(|| body.strip_prefix("0O")) { + (8u128, rest) + } else { + (10u128, body) + }; + + if digits.is_empty() { + return Err(self.make_error_here(InvalidNumericLiteral)); + } + + let mut accumulator: u128 = 0; + for character in digits.chars() { + if character == '_' { + continue; + } + let digit_value = match character { + '0'..='9' => u128::from(character as u32 - '0' as u32), + 'a'..='f' => u128::from(10 + (character as u32 - 'a' as u32)), + 'A'..='F' => u128::from(10 + (character as u32 - 'A' as u32)), + _ => return Err(self.make_error_here(InvalidNumericLiteral)), + }; + if digit_value >= base { + return Err(self.make_error_here(InvalidNumericLiteral)); + } + accumulator = accumulator + .checked_mul(base) + .and_then(|value| value.checked_add(digit_value)) + .ok_or_else(|| self.make_error_here(InvalidNumericLiteral))?; + } + + Ok(accumulator) + } + + fn parse_declaration_literal_class( + &mut self, + ) -> ParseResult<'src, 'arena, DeclarationLiteralRef<'src, 'arena>> { + let (token, lexeme, token_position) = + self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?; + + let literal = match token { + Token::Plus | Token::Minus => { + let is_negative = matches!(token, Token::Minus); + self.advance(); + + let (next_token, next_lexeme, _) = + self.require_token_lexeme_and_position(ParseErrorKind::InvalidNumericLiteral)?; + + match next_token { + Token::IntegerLiteral => { + let value = if is_negative { + self.decode_signed_integer_literal(&format!("-{next_lexeme}"))? + } else { + self.decode_signed_integer_literal(next_lexeme)? + }; + self.advance(); + DeclarationLiteral::Integer(value) + } + Token::FloatLiteral => { + let mut signed_lexeme = String::with_capacity(1 + next_lexeme.len()); + signed_lexeme.push(if is_negative { '-' } else { '+' }); + signed_lexeme.push_str(next_lexeme); + let value = self.decode_float_literal(&signed_lexeme)?; + self.advance(); + DeclarationLiteral::Float(value) + } + _ => { + return Err( + self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken) + ); + } + } + } + Token::IntegerLiteral => { + let value = self.decode_signed_integer_literal(lexeme)?; + self.advance(); + DeclarationLiteral::Integer(value) + } + Token::FloatLiteral => { + let value = self.decode_float_literal(lexeme)?; + self.advance(); + DeclarationLiteral::Float(value) + } + Token::StringLiteral => { + let value = self.unescape_string_literal(lexeme); + self.advance(); + DeclarationLiteral::String(value) + } + Token::Keyword(Keyword::True) => { + self.advance(); + DeclarationLiteral::Bool(true) + } + Token::Keyword(Keyword::False) => { + self.advance(); + DeclarationLiteral::Bool(false) + } + Token::Keyword(Keyword::None) => { + self.advance(); + DeclarationLiteral::None + } + Token::NameLiteral => { + let inner = &lexeme[1..lexeme.len() - 1]; + let value = self.arena.string(inner); + self.advance(); + DeclarationLiteral::String(value) + } + Token::Keyword(Keyword::Class) => { + self.advance(); + let (next_token, next_lexeme, _) = self.require_token_lexeme_and_position( + ParseErrorKind::DeclarationLiteralUnexpectedToken, + )?; + if !matches!(next_token, Token::NameLiteral) { + return Err( + self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken) + ); + } + let inner = &next_lexeme[1..next_lexeme.len() - 1]; + let quoted_name = self.arena.string(inner); + self.advance(); + DeclarationLiteral::TaggedName { + tag: IdentifierToken(token_position), + quoted: quoted_name, + } + } + _ if token.is_valid_identifier_name() => { + self.advance(); + DeclarationLiteral::Identifier(lexeme) + } + _ => return Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken)), + }; + + Ok(DeclarationLiteralRef { + literal, + position: token_position, + }) + } + + fn parse_class_const_decl( + &mut self, + ) -> ParseResult<'src, 'arena, ClassConstDeclRef<'src, 'arena>> { + let start_position = self.expect( + Token::Keyword(Keyword::Const), + ParseErrorKind::ClassUnexpectedItem, + )?; + + let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?; + self.expect(Token::Assign, ParseErrorKind::TypeSpecInvalidNamedTypeName)?; + let value = self.parse_declaration_literal_class()?; + + self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?; + let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + + Ok(self + .arena + .alloc_node(ClassConstDecl { name, value, span }, span)) + } + + fn parse_class_var_declarators( + &mut self, + ) -> crate::arena::ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> { + let mut declarators = self.arena.vec(); + + loop { + match self.peek_token_and_position() { + Some((next_token, declarator_start)) if next_token.is_valid_identifier_name() => { + let identifier = self + .parse_identifier(ParseErrorKind::DeclBadVariableIdentifier) + .unwrap_or(IdentifierToken(declarator_start)); + + let array_size = match self.parse_array_len_expr() { + Ok(value) => value, + Err(error) => { + self.report_error(error); + self.recover_until(SyncLevel::CloseBracket); + let _ = self.eat(Token::RightBracket); + None + } + }; + + let span = AstSpan::range(identifier.0, self.last_consumed_position_or_start()); + declarators.push(self.arena.alloc_node( + VariableDeclarator { + name: identifier, + initializer: None, + array_size, + }, + span, + )); + + if self.eat(Token::Comma) { + if self.peek_token() == Some(Token::Semicolon) { + break; + } + continue; + } + break; + } + Some((_, _)) if declarators.is_empty() => { + self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier); + self.recover_until(SyncLevel::Statement); + let _ = self.eat(Token::Semicolon); + break; + } + _ => break, + } + } + + declarators + } + + pub(crate) fn parse_identifier_list( + &mut self, + ) -> crate::arena::ArenaVec<'arena, IdentifierToken> { + let list_start = self.last_consumed_position_or_start(); + let mut identifiers = self.arena.vec(); + + while let Some((token, _lexeme, identifier_position)) = + self.peek_token_lexeme_and_position() + { + match token { + Token::RightParenthesis => break, + Token::Comma => { + self.advance(); + self.report_error_here(ParseErrorKind::ListMissingIdentifierBeforeSeparator); + } + _ if token.is_valid_identifier_name() => { + self.advance(); + identifiers.push(IdentifierToken(identifier_position)); + if !self.eat(Token::Comma) + && let Some(next_token) = self.peek_token() + && next_token != Token::RightParenthesis + { + self.report_error_here(ParseErrorKind::ListMissingSeparator); + } + } + _ => { + self.make_error_here(ParseErrorKind::ListInvalidIdentifier) + .sync_error_until(self, SyncLevel::ListSeparator) + .report_error(self); + } + } + } + + if identifiers.is_empty() { + let list_end = self.last_consumed_position_or_start(); + self.report_error(crate::parser::ParseError { + kind: ParseErrorKind::ListEmpty, + anchor: list_start, + blame_span: AstSpan::range(list_start, list_end), + covered_span: AstSpan::range(list_start, list_end), + related_span: None, + }); + } + + identifiers + } +} diff --git a/rottlib/src/parser/grammar/control.rs b/rottlib/src/parser/grammar/control.rs deleted file mode 100644 index 213fb46..0000000 --- a/rottlib/src/parser/grammar/control.rs +++ /dev/null @@ -1,180 +0,0 @@ -use crate::ast::{AstSpan, Expression, ExpressionRef}; -use crate::lexer::{Token, TokenLocation}; -use crate::parser::{ParseErrorKind, ResultRecoveryExt}; - -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Parses an `if` block, assuming that `if` token was consumed. - /// - /// Produces an [`Expression::If`] spanning from the `if` keyword to - /// the end of the last arm (`else` body if present, - /// otherwise the `if` body). - #[must_use] - pub(crate) fn parse_if_cont( - &mut self, - if_start_location: TokenLocation, - ) -> ExpressionRef<'src, 'arena> { - let condition = self.parse_expression(); - let body = self.parse_expression(); - - let (else_body, if_end_location) = if let Some(Token::Else) = self.peek_token() { - self.advance(); // else - let else_body = self.parse_expression(); - // Capture end before moving `else_body` to build the full `if` span - let body_end = else_body.span().to; - (Some(else_body), body_end) - } else { - (None, body.span().to) - }; - - let span = AstSpan { - from: if_start_location, - to: if_end_location, - }; - self.arena.alloc( - Expression::If { - condition, - body, - else_body, - }, - span, - ) - } - - /// Parses a `while` loop, assuming that `while` token was consumed. - /// - /// Produces an [`Expression::While`] spanning from the `while` keyword - /// to the end of the body. - #[must_use] - pub(crate) fn parse_while_cont( - &mut self, - while_start_location: TokenLocation, - ) -> ExpressionRef<'src, 'arena> { - let condition = self.parse_expression(); - let body = self.parse_expression(); - let span = AstSpan { - from: while_start_location, - to: body.span().to, - }; - self.arena - .alloc(Expression::While { condition, body }, span) - } - - /// Parses a `do ... until ...` loop after `do`, assuming that `do` token - /// was consumed. - /// - /// On a missing `until`, returns an error - /// [`ParseErrorKind::DoMissingUntil`]. - /// On success, produces an [`Expression::DoUntil`] spanning from `do` - /// to the end of the condition. - #[must_use] - pub(crate) fn parse_do_until_cont( - &mut self, - do_start_location: TokenLocation, - ) -> crate::parser::ParseExpressionResult<'src, 'arena> { - let body = self.parse_expression(); - - self.expect(Token::Until, ParseErrorKind::DoMissingUntil) - .widen_error_span_from(do_start_location)?; - let condition = self.parse_expression(); - let span = AstSpan { - from: do_start_location, - to: condition.span().to, - }; - Ok(self - .arena - .alloc(Expression::DoUntil { condition, body }, span)) - } - - /// Parses a `foreach` loop, assuming that `foreach` token was consumed. - /// - /// Produces an [`Expression::ForEach`] spanning from `foreach` - /// to the end of the body. - #[must_use] - pub(crate) fn parse_foreach_cont( - &mut self, - foreach_start_location: TokenLocation, - ) -> ExpressionRef<'src, 'arena> { - let iterator = self.parse_expression(); - - let body = self.parse_expression(); - let span = AstSpan { - from: foreach_start_location, - to: body.span().to, - }; - self.arena - .alloc(Expression::ForEach { iterator, body }, span) - } - - /// Parses a `for` loop after `for`, assuming that `for` token was consumed. - /// - /// Grammar: `for (init?; condition?; step?) body`. - /// Any of `init`, `condition`, or `step` may be omitted. - /// Emits specific `ParseErrorKind` values for missing - /// delimiters/separators. - /// On success returns an [`Expression::For`] spanning from `for` to - /// the end of the body. - #[must_use] - pub(crate) fn parse_for_cont( - &mut self, - for_start_location: TokenLocation, - ) -> crate::parser::ParseResult<'src, 'arena, ExpressionRef<'src, 'arena>> { - self.expect( - Token::LeftParenthesis, - ParseErrorKind::ForMissingOpeningParenthesis, - ) - .widen_error_span_from(for_start_location)?; - - let init = if let Some(Token::Semicolon) = self.peek_token() { - self.advance(); - None - } else { - let init = self.parse_expression(); - self.expect( - Token::Semicolon, - ParseErrorKind::ForMissingInitializationSemicolon, - )?; - Some(init) - }; - - let condition = if let Some(Token::Semicolon) = self.peek_token() { - self.advance(); - None - } else { - let condition = self.parse_expression(); - self.expect( - Token::Semicolon, - ParseErrorKind::ForMissingConditionSemicolon, - )?; - Some(condition) - }; - - let step = if let Some(Token::RightParenthesis) = self.peek_token() { - self.advance(); - None - } else { - let step = self.parse_expression(); - self.expect( - Token::RightParenthesis, - ParseErrorKind::ForMissingClosingParenthesis, - ) - .widen_error_span_from(for_start_location) - .sync_error_until(self, crate::parser::SyncLevel::CloseParenthesis)?; - Some(step) - }; - - let body = self.parse_expression(); - let span = AstSpan { - from: for_start_location, - to: body.span().to, - }; - Ok(self.arena.alloc( - Expression::For { - init, - condition, - step, - body, - }, - span, - )) - } -} diff --git a/rottlib/src/parser/grammar/declarations/enum_definition.rs b/rottlib/src/parser/grammar/declarations/enum_definition.rs new file mode 100644 index 0000000..b7387f6 --- /dev/null +++ b/rottlib/src/parser/grammar/declarations/enum_definition.rs @@ -0,0 +1,138 @@ +//! Parsing of enum definitions for Fermented `UnrealScript`. + +use std::ops::ControlFlow; + +use crate::arena::ArenaVec; +use crate::ast::{AstSpan, EnumDefRef, EnumDefinition, IdentifierToken}; +use crate::lexer::Token; +use crate::lexer::TokenPosition; +use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; + +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +enum EnumParseState { + ExpectingVariant, + ExpectingSeparator, +} + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses an `enum` definition after the `enum` keyword has been consumed. + /// + /// Returns a reference to the allocated enum definition containing its name + /// and variants. + pub(crate) fn parse_enum_definition_tail( + &mut self, + enum_keyword_position: TokenPosition, + ) -> EnumDefRef<'src, 'arena> { + let name = self + .parse_identifier(ParseErrorKind::EnumExpectedNameOrBrace) + .unwrap_or_fallback(self); + self.expect(Token::LeftBrace, ParseErrorKind::EnumMissingLeftBrace) + .report_error(self); + let variants = self.parse_enum_variants(); + self.expect(Token::RightBrace, ParseErrorKind::EnumNoClosingBrace) + .report_error(self); + + let span = AstSpan::range( + enum_keyword_position, + self.last_consumed_position_or_start(), + ); + self.arena + .alloc_node(EnumDefinition { name, variants }, span) + } + + /// Parses the list of enum variants inside braces, handling commas and + /// errors. + /// + /// Returns a vector of successfully parsed variant identifiers. + fn parse_enum_variants(&mut self) -> ArenaVec<'arena, IdentifierToken> { + use EnumParseState::{ExpectingSeparator, ExpectingVariant}; + + let mut variants = self.arena.vec(); + let mut parser_state = ExpectingVariant; + while let Some((next_token, next_token_position)) = self.peek_token_and_position() { + let should_break = match (parser_state, next_token) { + (_, Token::RightBrace) => break, + (ExpectingVariant, Token::Comma) => self + .recover_from_empty_enum_variant(next_token_position) + .is_break(), + (ExpectingVariant, _) => { + parser_state = ExpectingSeparator; + self.parse_and_push_enum_variant(&mut variants).is_break() + } + (ExpectingSeparator, Token::Comma) => { + self.advance(); // `,` + parser_state = ExpectingVariant; + false + } + (ExpectingSeparator, _) => self + .parse_enum_variant_after_missing_separator(next_token_position, &mut variants) + .is_break(), + }; + if should_break { + break; + } + self.ensure_forward_progress(next_token_position); + } + variants + } + + /// Recovers from one or more commas appearing where a variant is expected. + /// + /// Stops parsing if only a closing brace or end-of-file remains. + fn recover_from_empty_enum_variant( + &mut self, + error_start_position: TokenPosition, + ) -> ControlFlow<()> { + while self.peek_token() == Some(Token::Comma) { + self.advance(); + } + self.make_error_here(ParseErrorKind::EnumEmptyVariants) + .widen_error_span_from(error_start_position) + .report_error(self); + if matches!(self.peek_token(), Some(Token::RightBrace) | None) { + ControlFlow::Break(()) + } else { + ControlFlow::Continue(()) + } + } + + /// Parses one enum variant and appends it to `variants`. + /// + /// Stops parsing if recovery does not produce a valid identifier. + fn parse_and_push_enum_variant( + &mut self, + variants: &mut ArenaVec<'arena, IdentifierToken>, + ) -> ControlFlow<()> { + self.parse_identifier(ParseErrorKind::EnumBadVariant) + .sync_error_until(self, SyncLevel::Statement) + .ok_or_report(self) + .map_or(ControlFlow::Break(()), |variant| { + variants.push(variant); + ControlFlow::Continue(()) + }) + } + + /// Parses a variant after a missing separator and reports the missing-comma + /// diagnostic if recovery succeeds. + fn parse_enum_variant_after_missing_separator( + &mut self, + error_start_position: TokenPosition, + variants: &mut ArenaVec<'arena, IdentifierToken>, + ) -> ControlFlow<()> { + let Some(variant) = self + .parse_identifier(ParseErrorKind::EnumBadVariant) + .widen_error_span_from(error_start_position) + .sync_error_until(self, SyncLevel::Statement) + .ok_or_report(self) + else { + // If we don't even get a good identifier - error is different + return ControlFlow::Break(()); + }; + self.make_error_here(ParseErrorKind::EnumNoSeparatorBetweenVariants) + .widen_error_span_from(error_start_position) + .report_error(self); + + variants.push(variant); + ControlFlow::Continue(()) + } +} diff --git a/rottlib/src/parser/grammar/declarations/mod.rs b/rottlib/src/parser/grammar/declarations/mod.rs new file mode 100644 index 0000000..e253187 --- /dev/null +++ b/rottlib/src/parser/grammar/declarations/mod.rs @@ -0,0 +1,11 @@ +//! Declaration parsing for Fermented `UnrealScript`. +//! +//! Implements recursive-descent parsing for declaration-related grammar: +//! type specifiers, enum and struct definitions, `var(...)` prefixes, +//! and variable declarators. + +mod enum_definition; +mod struct_definition; +mod type_specifier; // Type-specifier parsing (variable types). +mod var_specifiers; // `var(...)` editor specifiers and declaration-modifiers. +mod variable_declarators; // Comma-separated declarator lists (variable lists). diff --git a/rottlib/src/parser/grammar/declarations/struct_definition.rs b/rottlib/src/parser/grammar/declarations/struct_definition.rs new file mode 100644 index 0000000..2f4153b --- /dev/null +++ b/rottlib/src/parser/grammar/declarations/struct_definition.rs @@ -0,0 +1,210 @@ +//! Parsing of struct definitions for Fermented `UnrealScript`. +//! +//! ## C++ block handling +//! +//! The Fermented `UnrealScript` parser must support parsing several legacy +//! source files that contain `cpptext` or `cppstruct`. Our compiler does not +//! compile with C++ code and therefore does not need these blocks in +//! the resulting AST. We treat them the same as trivia and skip them. +//! +//! However, some related tokens are context-sensitive, so handling these +//! blocks in the general trivia-skipping path would complicate the separation +//! between the lexer and the parser. +//! +//! The resulting files will not be compiled, but they can still be used to +//! extract type information. + +use crate::arena::ArenaVec; +use crate::ast::{ + AstSpan, IdentifierToken, QualifiedIdentifierRef, StructDefRef, StructDefinition, StructField, + StructFieldRef, StructModifier, StructModifierKind, TypeSpecifierRef, VarEditorSpecifierRef, + VarModifier, +}; +use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; + +#[derive(Debug)] +struct ParsedStructFieldPrefix<'src, 'arena> { + editor_specifiers: Option>>, + declaration_modifiers: ArenaVec<'arena, VarModifier>, + type_specifier: TypeSpecifierRef<'src, 'arena>, +} + +#[derive(Debug)] +enum StructBodyItemParseOutcome<'src, 'arena> { + Field(StructFieldRef<'src, 'arena>), + Skip, + Stop, +} + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a `struct` definition after the `struct` keyword has been + /// consumed. + pub(crate) fn parse_struct_definition_tail( + &mut self, + struct_keyword_position: TokenPosition, + ) -> StructDefRef<'src, 'arena> { + let modifiers = self.parse_struct_declaration_modifiers(); + let (name, base_type_name) = self.parse_struct_name_base_and_open_brace(); + + let mut fields = self.arena.vec(); + while let Some((next_token, next_position)) = self.peek_token_and_position() + && next_token != Token::RightBrace + { + match self.parse_or_skip_struct_body_item() { + StructBodyItemParseOutcome::Field(new_field) => fields.push(new_field), + StructBodyItemParseOutcome::Skip => (), + StructBodyItemParseOutcome::Stop => break, + } + self.ensure_forward_progress(next_position); + } + self.expect(Token::RightBrace, ParseErrorKind::StructMissingRightBrace) + .widen_error_span_from(struct_keyword_position) + .report_error(self); + let span = AstSpan::range( + struct_keyword_position, + self.last_consumed_position_or_start(), + ); + self.arena.alloc_node( + StructDefinition { + name, + base_type_name, + modifiers, + fields, + }, + span, + ) + } + + /// Parses one item in a struct body or skips an unsupported one. + /// + /// Returns [`StructBodyItemParseOutcome::Field`] for a successfully parsed + /// field, [`StructBodyItemParseOutcome::Skip`] when recovery allows parsing + /// to continue, and [`StructBodyItemParseOutcome::Stop`] when parsing + /// should stop at this level. + fn parse_or_skip_struct_body_item(&mut self) -> StructBodyItemParseOutcome<'src, 'arena> { + let Some((token, token_position)) = self.peek_token_and_position() else { + // This is the end of the file; + // it will be handled by a higher-level parser. + return StructBodyItemParseOutcome::Stop; + }; + match token { + Token::Keyword(Keyword::CppText | Keyword::CppStruct) => { + self.advance(); + if !self.eat(Token::CppBlock) { + self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock); + self.recover_until(SyncLevel::Statement); + } + StructBodyItemParseOutcome::Skip + } + Token::Keyword(Keyword::Var) => { + self.advance(); + self.parse_struct_field_tail(token_position) + } + _ => { + self.report_error_here(ParseErrorKind::StructBodyUnexpectedItem); + self.recover_until(SyncLevel::BlockBoundary); + StructBodyItemParseOutcome::Skip + } + } + } + + /// Parses a struct field after the `var` keyword has been consumed. + /// + /// Returns [`StructBodyItemParseOutcome::Skip`] if the field cannot be + /// parsed far enough to produce a usable AST node after recovery. + fn parse_struct_field_tail( + &mut self, + var_keyword_position: TokenPosition, + ) -> StructBodyItemParseOutcome<'src, 'arena> { + let Some(field_prefix) = self.parse_struct_field_prefix() else { + return StructBodyItemParseOutcome::Skip; + }; + let declarators = self.parse_variable_declarators(); + if !self.eat(Token::Semicolon) { + self.report_error_here(ParseErrorKind::StructFieldMissingSemicolon); + self.recover_until(SyncLevel::BlockBoundary); + let _ = self.eat(Token::Semicolon); + } + if declarators.is_empty() { + return StructBodyItemParseOutcome::Skip; + } + let span = AstSpan::range(var_keyword_position, self.last_consumed_position_or_start()); + StructBodyItemParseOutcome::Field(self.arena.alloc_node( + StructField { + type_specifier: field_prefix.type_specifier, + declaration_modifiers: field_prefix.declaration_modifiers, + editor_specifiers: field_prefix.editor_specifiers, + declarators, + }, + span, + )) + } + + fn parse_struct_field_prefix(&mut self) -> Option> { + let editor_specifiers = self.parse_var_editor_specifier_list(); + let declaration_modifiers = self.parse_var_declaration_modifiers(); + let type_specification = self + .parse_type_specifier() + .sync_error_until(self, SyncLevel::BlockBoundary) + .ok_or_report(self)?; + Some(ParsedStructFieldPrefix { + editor_specifiers, + declaration_modifiers, + type_specifier: type_specification, + }) + } + + /// Parses the struct name, optional base type, and opening brace. + /// + /// Accepts anonymous structs that begin immediately with `{`. + fn parse_struct_name_base_and_open_brace( + &mut self, + ) -> ( + Option, + Option>, + ) { + if self.eat(Token::LeftBrace) { + return (None, None); + } + let name = self + .parse_identifier(ParseErrorKind::StructExpectedNameOrBrace) + .ok_or_report(self); + let base_type_name = + if let Some((Token::Keyword(Keyword::Extends), extends_keyword_position)) = + self.peek_token_and_position() + { + self.advance(); + self.parse_qualified_identifier(ParseErrorKind::StructExpectedBaseName) + .widen_error_span_from(extends_keyword_position) + .ok_or_report(self) + } else { + None + }; + self.expect(Token::LeftBrace, ParseErrorKind::StructMissingLeftBrace) + .report_error(self); + (name, base_type_name) + } + + fn parse_struct_declaration_modifiers(&mut self) -> ArenaVec<'arena, StructModifier> { + let mut modifiers = self.arena.vec(); + while let Some((next_keyword, next_keyword_position)) = self.peek_keyword_and_position() { + let next_modifier_kind = match next_keyword { + Keyword::Native => StructModifierKind::Native, + Keyword::Init => StructModifierKind::Init, + Keyword::Export => StructModifierKind::Export, + Keyword::NoExport => StructModifierKind::NoExport, + Keyword::Transient => StructModifierKind::Transient, + Keyword::Deprecated => StructModifierKind::Deprecated, + Keyword::Long => StructModifierKind::Long, + _ => break, + }; + modifiers.push(StructModifier { + kind: next_modifier_kind, + position: next_keyword_position, + }); + self.advance(); + } + modifiers + } +} diff --git a/rottlib/src/parser/grammar/declarations/type_specifier.rs b/rottlib/src/parser/grammar/declarations/type_specifier.rs new file mode 100644 index 0000000..6a86a93 --- /dev/null +++ b/rottlib/src/parser/grammar/declarations/type_specifier.rs @@ -0,0 +1,116 @@ +//! Parsing of type specifiers for Fermented `UnrealScript`. + +use crate::ast::{AstSpan, TypeSpecifier, TypeSpecifierRef}; +use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::parser::{ParseErrorKind, ParseResult, Parser}; + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a type specifier used in variable declarations. + /// + /// Accepts named types, `class<...>` types, `array<...>` types, and inline + /// `enum` and `struct` definitions. + /// + /// Returns an error if the next tokens do not form a valid type specifier. + pub(crate) fn parse_type_specifier( + &mut self, + ) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> { + let (starting_token, starting_token_position) = + self.require_token_and_position(ParseErrorKind::TypeSpecExpectedType)?; + + match starting_token { + Token::Keyword(Keyword::Enum) => { + self.advance(); + Ok(self.parse_inline_enum_tail(starting_token_position)) + } + Token::Keyword(Keyword::Struct) => { + self.advance(); + Ok(self.parse_inline_struct_tail(starting_token_position)) + } + Token::Keyword(Keyword::Array) => { + self.advance(); + self.parse_array_type_specification_tail(starting_token_position) + } + Token::Keyword(Keyword::Class) => { + self.advance(); + self.parse_class_type_specification_tail(starting_token_position) + } + _ if starting_token.is_valid_type_name() => { + let type_name = + self.parse_qualified_identifier(ParseErrorKind::TypeSpecInvalidNamedTypeName)?; + let full_span = *type_name.span(); + Ok(self + .arena + .alloc_node(TypeSpecifier::Named(type_name), full_span)) + } + _ => Err(self.make_error_here(ParseErrorKind::TypeSpecExpectedType)), + } + } + + fn parse_inline_enum_tail( + &mut self, + starting_token_position: TokenPosition, + ) -> TypeSpecifierRef<'src, 'arena> { + let enum_definition = self.parse_enum_definition_tail(starting_token_position); + let enum_span = AstSpan::range(starting_token_position, enum_definition.span().token_to); + self.arena + .alloc_node(TypeSpecifier::InlineEnum(enum_definition), enum_span) + } + + fn parse_inline_struct_tail( + &mut self, + starting_token_position: TokenPosition, + ) -> TypeSpecifierRef<'src, 'arena> { + let struct_definition = self.parse_struct_definition_tail(starting_token_position); + let struct_span = + AstSpan::range(starting_token_position, struct_definition.span().token_to); + self.arena + .alloc_node(TypeSpecifier::InlineStruct(struct_definition), struct_span) + } + + fn parse_array_type_specification_tail( + &mut self, + starting_token_position: TokenPosition, + ) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> { + self.expect( + Token::Less, + ParseErrorKind::TypeSpecArrayMissingOpeningAngle, + )?; + let element_modifiers = self.parse_var_declaration_modifiers(); + let element_type = self.parse_type_specifier()?; + let closing_angle_bracket_position = self.expect( + Token::Greater, + ParseErrorKind::TypeSpecArrayMissingClosingAngle, + )?; + let array_span = AstSpan::range(starting_token_position, closing_angle_bracket_position); + + Ok(self.arena.alloc_node( + TypeSpecifier::Array { + element_type, + element_modifiers, + }, + array_span, + )) + } + + fn parse_class_type_specification_tail( + &mut self, + starting_token_position: TokenPosition, + ) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> { + let (inner_type_name, class_type_end) = if self.eat(Token::Less) { + let inner_type_name = Some( + self.parse_qualified_identifier(ParseErrorKind::TypeSpecClassMissingInnerType)?, + ); + let class_type_end = self.expect( + Token::Greater, + ParseErrorKind::TypeSpecClassMissingClosingAngle, + )?; + (inner_type_name, class_type_end) + } else { + (None, starting_token_position) + }; + let span = AstSpan::range(starting_token_position, class_type_end); + Ok(self + .arena + .alloc_node(TypeSpecifier::Class(inner_type_name), span)) + } +} diff --git a/rottlib/src/parser/grammar/declarations/var_specifiers.rs b/rottlib/src/parser/grammar/declarations/var_specifiers.rs new file mode 100644 index 0000000..bc144aa --- /dev/null +++ b/rottlib/src/parser/grammar/declarations/var_specifiers.rs @@ -0,0 +1,89 @@ +//! Parsing of declaration specifiers used in `var(...) ...` syntax for +//! Fermented `UnrealScript`. + +use crate::arena::ArenaVec; +use crate::ast::{VarEditorSpecifier, VarEditorSpecifierRef, VarModifier}; +use crate::lexer::Token; +use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a consecutive run of variable declaration modifiers. + /// + /// This is used for declarations such as + /// `var transient config editconst int X;`. + /// + /// Parsing stops when the next token is not a recognized [`VarModifier`]. + /// That token is left unconsumed for the caller. + /// + /// Returns the parsed modifiers in source order, or an empty vector if the + /// current token does not begin a modifier list. + #[must_use] + pub(crate) fn parse_var_declaration_modifiers(&mut self) -> ArenaVec<'arena, VarModifier> { + let mut modifiers = self.arena.vec(); + while let Some(current_token_and_position) = self.peek_token_and_position() { + let Ok(parsed_modifier) = VarModifier::try_from(current_token_and_position) else { + break; + }; + self.advance(); + modifiers.push(parsed_modifier); + } + modifiers + } + + /// Parses the optional parenthesized editor specifier list in `var(...)`. + /// + /// Assumes that `var` has already been consumed. + /// + /// Returns `None` if the current token is not `(`. Returns `Some(...)` once + /// `(` is present, including for an empty list. + /// + /// Recovery is intentionally minimal because these specifier lists are not + /// important enough to justify aggressive repair. + #[must_use] + pub(crate) fn parse_var_editor_specifier_list( + &mut self, + ) -> Option>> { + if !self.eat(Token::LeftParenthesis) { + return None; + } + let mut editor_specifiers = self.arena.vec(); + while let Some((next_token, next_token_lexeme, next_token_position)) = + self.peek_token_lexeme_and_position() + && next_token != Token::RightParenthesis + { + if next_token == Token::StringLiteral { + self.advance(); + let string_value = self.unescape_string_literal(next_token_lexeme); + editor_specifiers.push(self.arena.alloc_node_at( + VarEditorSpecifier::String(string_value), + next_token_position, + )); + } else if let Some(specifier_identifier) = + Self::identifier_token_from_token(next_token, next_token_position) + { + self.advance(); + editor_specifiers.push(self.arena.alloc_node_at( + VarEditorSpecifier::Identifier(specifier_identifier), + next_token_position, + )); + } else { + self.make_error_here(ParseErrorKind::VarSpecNotIdentifier) + .sync_error_until(self, SyncLevel::ListSeparator) + .report_error(self); + } + // Detailed recovery is not worthwhile here; + // stop once list structure becomes unclear. + if !self.eat(Token::Comma) { + break; + } + self.ensure_forward_progress(next_token_position); + } + self.expect( + Token::RightParenthesis, + ParseErrorKind::VarSpecsMissingClosingParenthesis, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + Some(editor_specifiers) + } +} diff --git a/rottlib/src/parser/grammar/declarations/variable_declarators.rs b/rottlib/src/parser/grammar/declarations/variable_declarators.rs new file mode 100644 index 0000000..54a7a41 --- /dev/null +++ b/rottlib/src/parser/grammar/declarations/variable_declarators.rs @@ -0,0 +1,172 @@ +//! Parsing of comma-separated variable declarator lists for +//! Fermented `UnrealScript`. +//! +//! Extends original `UnrealScript` by allowing array-size expressions and +//! declarator initializers. + +#![allow(clippy::option_if_let_else)] + +use std::ops::ControlFlow; + +use crate::arena::ArenaVec; +use crate::ast::{AstSpan, OptionalExpression, VariableDeclarator, VariableDeclaratorRef}; +use crate::lexer::{Token, TokenPosition}; +use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel}; + +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +enum VariableDeclaratorParseState { + ExpectingDeclarator, + ExpectingSeparator, +} + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a comma-separated list of variable declarators. + /// + /// Accepts optional array-size expressions and `=` initializers. + #[must_use] + pub(crate) fn parse_variable_declarators( + &mut self, + ) -> ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> { + use VariableDeclaratorParseState::{ExpectingDeclarator, ExpectingSeparator}; + + let mut declarators = self.arena.vec(); + let mut parser_state = ExpectingDeclarator; + while let Some((next_token, next_token_position)) = self.peek_token_and_position() { + match (parser_state, next_token) { + (ExpectingDeclarator, Token::Semicolon) => { + self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations); + return declarators; + } + (ExpectingDeclarator, Token::Comma) => { + if self + .recover_empty_variable_declarator(next_token_position) + .is_break() + { + return declarators; + } + } + (ExpectingDeclarator, _) => { + if self + .parse_variable_declarator_into(&mut declarators) + .is_break() + { + // Breaking means we've failed to parse declarator + self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations); + break; + } + parser_state = ExpectingSeparator; + } + (ExpectingSeparator, Token::Comma) => { + self.advance(); + parser_state = ExpectingDeclarator; + } + (ExpectingSeparator, Token::Semicolon) => break, + (ExpectingSeparator, _) => { + if self + .recover_missing_variable_declarator_separator( + next_token_position, + &mut declarators, + ) + .is_break() + { + break; + } + } + } + self.ensure_forward_progress(next_token_position); + } + // In case of reaching EOF here, it does not matter if we emit + // an additional diagnostic. + // The caller is expected to report the more relevant enclosing error. + declarators + } + + fn recover_empty_variable_declarator( + &mut self, + error_start_position: TokenPosition, + ) -> ControlFlow<()> { + while self.peek_token() == Some(Token::Comma) { + self.advance(); + } + self.make_error_here(ParseErrorKind::DeclEmptyVariableDeclarations) + .widen_error_span_from(error_start_position) + .report_error(self); + if matches!(self.peek_token(), Some(Token::Semicolon) | None) { + ControlFlow::Break(()) + } else { + ControlFlow::Continue(()) + } + } + + fn parse_variable_declarator_into( + &mut self, + declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, + ) -> ControlFlow<()> { + if let Some(parsed_declarator) = self + .parse_variable_declarator() + .sync_error_until(self, SyncLevel::Statement) + .ok_or_report(self) + { + declarators.push(parsed_declarator); + ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) + } + } + + fn recover_missing_variable_declarator_separator( + &mut self, + error_start_position: TokenPosition, + declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, + ) -> ControlFlow<()> { + if let Some(parsed_declarator) = self + .parse_variable_declarator() + .widen_error_span_from(error_start_position) + .sync_error_until(self, SyncLevel::Statement) + .ok_or_report(self) + { + self.make_error_here(ParseErrorKind::DeclNoSeparatorBetweenVariableDeclarations) + .widen_error_span_from(error_start_position) + .report_error(self); + declarators.push(parsed_declarator); + ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) + } + } + + fn parse_variable_declarator( + &mut self, + ) -> ParseResult<'src, 'arena, VariableDeclaratorRef<'src, 'arena>> { + let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?; + let array_size = self.parse_optional_array_size(); + let initializer = self.parse_optional_variable_initializer(); + let span = AstSpan::range(name.0, self.last_consumed_position_or_start()); + Ok(self.arena.alloc_node( + VariableDeclarator { + name, + initializer, + array_size, + }, + span, + )) + } + + fn parse_optional_array_size(&mut self) -> OptionalExpression<'src, 'arena> { + if !self.eat(Token::LeftBracket) { + return None; + } + let array_size_expression = self.parse_expression(); + self.expect( + Token::RightBracket, + ParseErrorKind::DeclExpectedRightBracketAfterArraySize, + ) + .sync_error_at(self, SyncLevel::CloseBracket) + .report_error(self); + Some(array_size_expression) + } + + fn parse_optional_variable_initializer(&mut self) -> OptionalExpression<'src, 'arena> { + self.eat(Token::Assign).then(|| self.parse_expression()) + } +} diff --git a/rottlib/src/parser/grammar/expression/block.rs b/rottlib/src/parser/grammar/expression/block.rs new file mode 100644 index 0000000..192a19c --- /dev/null +++ b/rottlib/src/parser/grammar/expression/block.rs @@ -0,0 +1,109 @@ +//! Block-body parsing for Fermented `UnrealScript`. +//! +//! Provides shared routines for parsing `{ ... }`-delimited bodies used in +//! function, loop, state, and similar constructs after the opening `{` +//! has been consumed. + +use crate::arena::ArenaVec; +use crate::ast::{AstSpan, BlockBody, Expression, ExpressionRef, Statement, StatementRef}; +use crate::lexer::{Token, TokenPosition}; +use crate::parser::{ParseErrorKind, Parser}; + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a `{ ... }` block after the opening `{` has been consumed. + /// + /// Consumes tokens until the matching `}` and returns an + /// [`Expression::Block`] whose span covers the entire block, from + /// `opening_brace_position` to the closing `}`. + /// + /// On premature end-of-file, returns a best-effort block. + #[must_use] + pub(crate) fn parse_block_tail( + &mut self, + opening_brace_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let BlockBody { statements, span } = + self.parse_braced_block_statements_tail(opening_brace_position); + self.arena.alloc_node(Expression::Block(statements), span) + } + + /// Parses a `{ ... }` block after the opening `{` has been consumed. + /// + /// Consumes tokens until the matching `}` and returns the contained + /// statements together with a span that covers the entire block, from + /// `opening_brace_position` to the closing `}`. + /// + /// On premature end-of-file, returns a best-effort statement list and span. + #[must_use] + pub(crate) fn parse_braced_block_statements_tail( + &mut self, + opening_brace_position: TokenPosition, + ) -> BlockBody<'src, 'arena> { + let mut statements = self.arena.vec(); + while let Some((token, token_position)) = self.peek_token_and_position() { + if token == Token::RightBrace { + self.advance(); // '}' + let span = AstSpan::range(opening_brace_position, token_position); + return BlockBody { statements, span }; + } + self.parse_next_block_item_into(&mut statements); + self.ensure_forward_progress(token_position); + } + // Reached EOF without a closing `}` + self.report_error_here(ParseErrorKind::BlockMissingClosingBrace); + let span = AstSpan::range( + opening_brace_position, + self.last_consumed_position_or_start(), + ); + BlockBody { statements, span } + } + + /// Parses one statement inside a `{ ... }` block and appends it to + /// `statements`. + /// + /// This method never consumes the closing `}` and is only meant to be + /// called while parsing inside a block. It always appends at least one + /// statement, even in the presence of syntax errors. + pub(crate) fn parse_next_block_item_into( + &mut self, + statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>, + ) { + let mut next_statement = self.parse_statement().unwrap_or_else(|| { + let next_expression = self.parse_expression(); + let next_expression_span = *next_expression.span(); + self.arena + .alloc_node(Statement::Expression(next_expression), next_expression_span) + }); + if statement_needs_semicolon(&next_statement) + && let Some((Token::Semicolon, semicolon_position)) = self.peek_token_and_position() + { + next_statement.span_mut().extend_to(semicolon_position); + self.advance(); // ';' + } + statements.push(next_statement); + } +} + +fn statement_needs_semicolon(statement: &Statement) -> bool { + use Statement::{Empty, Error, Expression, Function, Label, LocalVariableDeclaration}; + match statement { + Empty | Label(_) | Error | Function(_) => false, + Expression(expression) => expression_needs_semicolon(expression), + LocalVariableDeclaration { .. } => true, + } +} + +const fn expression_needs_semicolon(expression: &Expression) -> bool { + use Expression::{Block, DoUntil, Error, For, ForEach, If, Switch, While}; + matches!( + expression, + Block { .. } + | If { .. } + | While { .. } + | DoUntil { .. } + | ForEach { .. } + | For { .. } + | Switch { .. } + | Error + ) +} diff --git a/rottlib/src/parser/grammar/expression/control_flow.rs b/rottlib/src/parser/grammar/expression/control_flow.rs new file mode 100644 index 0000000..eef4358 --- /dev/null +++ b/rottlib/src/parser/grammar/expression/control_flow.rs @@ -0,0 +1,446 @@ +//! Control expression parsing for Fermented `UnrealScript`. +//! +//! ## Condition parsing and legacy compatibility +//! +//! Fermented `UnrealScript` allows omitting parentheses `(...)` around the +//! condition expression of `if`/`while`/etc. For compatibility with older +//! `UnrealScript` code, we also apply a special rule: +//! +//! If a condition starts with `(`, we parse the condition as exactly the +//! matching parenthesized subexpression and stop at its corresponding `)`. +//! In other words, `( ... )` must cover the whole condition; trailing tokens +//! like `* c == d` are not allowed to continue the condition. +//! +//! This prevents the parser from accidentally consuming the following +//! statement/body as part of the condition in older code such as: +//! +//! ```unrealscript +//! if ( AIController(Controller) != None ) Cross = vect(0,0,0); +//! ``` +//! +//! Trade-off: you cannot write `if (a + b) * c == d`; +//! write `if ((a + b) * c == d)` or `if d == (a + b) * c` instead. +//! +//! ## Disambiguation of `for` as loop vs expression +//! +//! Unlike other control-flow keywords, `for` is disambiguated from a functions +//! or variables with the same name. This is done syntactically in +//! [`Parser::is_for_loop_header_ahead`]: a `for` token followed by +//! a `(` whose contents contain a top-level `;` is unambiguously a loop header. +//! +//! This rule is lightweight, local, and robust, and mirrors the fixed grammar +//! `for (init; condition; step)` without requiring name resolution. +//! +//! ### Why this is not done for `if` / `while` / `do` +//! +//! No similarly reliable way to discriminate `if`, `while`, or related +//! keywords at this stage of parsing: their parenthesized forms are +//! indistinguishable from single argument function calls. +//! +//! Supporting these keywords as identifiers would complicate parsing +//! disproportionately and we always treat them as openers for conditional and +//! cycle expressions. This matches common `UnrealScript` usage and intentionally +//! drops support for moronic design choices where such names were reused +//! as variables or functions (like what author did by declaring +//! a `For` function in Acedia). +//! +//! ### But what about `switch`? +//! +//! `switch` is handled separately because, in existing `UnrealScript` code, +//! it may appear either as a keyword-led construct or as an identifier. +//! +//! Its disambiguation rule is simpler than for `for`: if the next token is +//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains +//! available as an identifier. +//! +//! This rule is local and purely syntactic, matching the behavior expected by +//! the existing codebase we support. The actual parsing of `switch` expressions +//! lives in a separate module because the construct itself is more involved +//! than the control-flow forms handled here. + +use crate::ast::{AstSpan, BranchBody, Expression, ExpressionRef}; +use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a control-flow condition. + /// + /// If the next token is `(`, attempts to consume one parenthesized + /// subexpression and returns it wrapped as [`Expression::Parentheses`]. + /// Otherwise consumes a general expression. + fn parse_condition(&mut self) -> ExpressionRef<'src, 'arena> { + if let Some((Token::LeftParenthesis, left_parenthesis_position)) = + self.peek_token_and_position() + { + self.advance(); // '(' + let condition_expression = self.parse_expression(); + let right_parenthesis_position = self + .expect( + Token::RightParenthesis, + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { + left_parenthesis_position, + }, + ) + .widen_error_span_from(left_parenthesis_position) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .unwrap_or_fallback(self); + self.arena.alloc_node_between( + Expression::Parentheses(condition_expression), + left_parenthesis_position, + right_parenthesis_position, + ) + } else { + self.parse_expression() + } + } + + /// Parses a branch body for a control-flow construct. + /// + /// Normalizes the following source forms into a [`BranchBody`]: + /// + /// - empty body with semicolon: `if (cond);` + /// - empty body before a closing `}`: `if (cond) }` + /// - non-empty block body: `if (cond) { ... }` + /// - non-empty single-expression body: `if (cond) expr;` + /// + /// For non-block bodies, this method consumes a trailing `;` when present + /// and records its position in the returned [`BranchBody`]. + fn parse_branch_body(&mut self) -> BranchBody<'src, 'arena> { + let Some((first_token, first_token_position)) = self.peek_token_and_position() else { + let error = self.make_error_here(ParseErrorKind::MissingBranchBody); + self.report_error(error); + return BranchBody { + expression: None, + semicolon_position: None, + end_anchor_token_position: error.covered_span.token_to, + }; + }; + // `if (is_condition);` + if first_token == Token::Semicolon { + self.advance(); // ';' + return BranchBody { + expression: None, + semicolon_position: Some(first_token_position), + end_anchor_token_position: first_token_position, + }; + } + // `{ ... if (is_condition) }` + if first_token == Token::RightBrace { + return BranchBody { + expression: None, + semicolon_position: None, + // `unwrap` actually triggering is effectively impossible, + // because by the time a branch body is parsed, some prior token + // (e.g. `if`, `)`, etc.) has already been consumed, + // so the parser should have a last-consumed position + end_anchor_token_position: self + .last_consumed_position() + .unwrap_or(first_token_position), + }; + } + let branch_expression = self.parse_expression(); + let end_anchor_token_position = branch_expression.span().token_to; + // A block body in `if {...}` or `if {...};` owns its own terminator; + // a following `;` does not belong to the branch body. + if let Expression::Block(_) = *branch_expression { + return BranchBody { + expression: Some(branch_expression), + semicolon_position: None, + end_anchor_token_position, + }; + } + // For single-expression bodies, consume a trailing semicolon if present + let trailing_semicolon_position = if self.eat(Token::Semicolon) { + self.last_consumed_position() + } else { + None + }; + BranchBody { + expression: Some(branch_expression), + semicolon_position: trailing_semicolon_position, + end_anchor_token_position: trailing_semicolon_position + .unwrap_or(end_anchor_token_position), + } + } + + /// Parses an `if` expression after the `if` keyword. + /// + /// The resulting [`Expression::If`] spans from `if_keyword_position` to the + /// end of the `if` body, or to the end of the `else` body if one is + /// present. + #[must_use] + pub(crate) fn parse_if_tail( + &mut self, + if_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let condition = self.parse_condition(); + let body = self.parse_branch_body(); + + let (else_body, if_end_position) = if self.peek_keyword() == Some(Keyword::Else) { + self.advance(); // 'else' + let else_body = self.parse_branch_body(); + let else_body_end = else_body.end_anchor_token_position; + (Some(else_body), else_body_end) + } else { + (None, body.end_anchor_token_position) + }; + + let span = AstSpan::range(if_keyword_position, if_end_position); + self.arena.alloc_node( + Expression::If { + condition, + body, + else_body, + }, + span, + ) + } + + /// Parses a `while` expression after the `while` keyword. + /// + /// The resulting [`Expression::While`] spans from `while_keyword_position` + /// to the end of its body. + #[must_use] + pub(crate) fn parse_while_tail( + &mut self, + while_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let condition = self.parse_condition(); + let body = self.parse_branch_body(); + let span = AstSpan::range(while_keyword_position, body.end_anchor_token_position); + self.arena + .alloc_node(Expression::While { condition, body }, span) + } + + /// Parses a `do ... until ...` expression after the `do` keyword. + /// + /// The resulting [`Expression::DoUntil`] spans from `do_keyword_position` + /// to the end of the condition. + #[must_use] + pub(crate) fn parse_do_until_tail( + &mut self, + do_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let body = self.parse_branch_body(); + + let condition = if self + .expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil) + .widen_error_span_from(do_keyword_position) + .report_error(self) + { + crate::arena::ArenaNode::new_in( + Expression::Error, + AstSpan::new(body.end_anchor_token_position), + self.arena, + ) + } else { + self.parse_condition() + }; + let span = AstSpan::range(do_keyword_position, condition.span().token_to); + self.arena + .alloc_node(Expression::DoUntil { condition, body }, span) + } + + /// Parses a `foreach` expression after the `foreach` keyword. + /// + /// The iterator part is consumed as a regular expression, followed by a + /// branch body. + /// + /// The resulting [`Expression::ForEach`] spans from + /// `foreach_keyword_position` to the end of the body. + #[must_use] + pub(crate) fn parse_foreach_tail( + &mut self, + foreach_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + // UnrealScript `foreach` iterator expressions are simple enough that + // they do not need the special parenthesized-condition handling used by + // `parse_condition()`. + let iterated_expression = self.parse_expression(); + + let body = self.parse_branch_body(); + let span = AstSpan::range(foreach_keyword_position, body.end_anchor_token_position); + self.arena.alloc_node( + Expression::ForEach { + iterated_expression, + body, + }, + span, + ) + } + + /// Returns whether the upcoming tokens have the syntactic shape of a + /// `for (...)` header. + /// + /// More precisely, this returns `true` iff the next token is `(` and a + /// top-level `;` appears before the matching `)` is closed or input ends. + /// + /// This is used only for loop-vs-identifier disambiguation. + pub(crate) fn is_for_loop_header_ahead(&mut self) -> bool { + if self.peek_token() != Some(Token::LeftParenthesis) { + return false; + } + let mut nesting_depth: usize = 1; + let mut lookahead_token_offset: usize = 1; + while let Some(next_token) = self.peek_token_at(lookahead_token_offset) { + match next_token { + Token::LeftParenthesis => nesting_depth += 1, + Token::RightParenthesis => { + if nesting_depth <= 1 { + // End of the immediate `for (...)` group without a + // top-level `;`: not a loop header. + return false; + } + nesting_depth -= 1; + } + Token::Semicolon if nesting_depth == 1 => return true, + _ => (), + } + lookahead_token_offset += 1; + } + false + } + + /// Parses a `for` expression after the `for` keyword. + /// + /// This method expects the standard header shape + /// `for (initialization; condition; step)` and then parses a branch body. + /// + /// Each header component may be omitted. The resulting [`Expression::For`] + /// spans from `for_keyword_position` to the end of the body. + #[must_use] + pub(crate) fn parse_for_tail( + &mut self, + for_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + // This path is expected to be entered only after + // `is_for_loop_header_ahead()`, so the opening `(` and at least one + // top-level `;` should already be structurally guaranteed. + self.expect( + Token::LeftParenthesis, + ParseErrorKind::ForMissingOpeningParenthesis, + ) + .widen_error_span_from(for_keyword_position) + .report_error(self); + + let initialization = if self.peek_token() == Some(Token::Semicolon) { + self.advance(); + None + } else { + let init = self.parse_expression(); + self.expect( + Token::Semicolon, + ParseErrorKind::ForMissingInitializationSemicolon, + ) + .report_error(self); + Some(init) + }; + + let condition = if self.peek_token() == Some(Token::Semicolon) { + self.advance(); + None + } else { + let condition = self.parse_expression(); + self.expect( + Token::Semicolon, + ParseErrorKind::ForMissingConditionSemicolon, + ) + .report_error(self); + Some(condition) + }; + + let step = if self.peek_token() == Some(Token::RightParenthesis) { + self.advance(); + None + } else { + let step = self.parse_expression(); + self.expect( + Token::RightParenthesis, + ParseErrorKind::ForMissingClosingParenthesis, + ) + .widen_error_span_from(for_keyword_position) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + Some(step) + }; + + let body = self.parse_branch_body(); + let span = AstSpan::range(for_keyword_position, body.end_anchor_token_position); + self.arena.alloc_node( + Expression::For { + initialization, + condition, + step, + body, + }, + span, + ) + } + + /// Parses the continuation of a `return` expression after its keyword. + /// + /// If the next token is not `;`, consumes a return value expression. + /// The terminating `;` is not consumed here. + #[must_use] + pub(crate) fn parse_return_tail( + &mut self, + return_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let (value, span) = if self.peek_token() == Some(Token::Semicolon) { + (None, AstSpan::new(return_keyword_position)) + } else { + let returned_value = self.parse_expression(); + let span = AstSpan::range(return_keyword_position, returned_value.span().token_to); + (Some(returned_value), span) + }; + self.arena.alloc_node(Expression::Return(value), span) + } + + /// Parses the continuation of a `break` expression after its keyword. + /// + /// If the next token is not `;`, consumes a break value expression. + /// The terminating `;` is not consumed here. + #[must_use] + pub(crate) fn parse_break_tail( + &mut self, + break_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let (value, span) = if self.peek_token() == Some(Token::Semicolon) { + (None, AstSpan::new(break_keyword_position)) + } else { + let returned_value = self.parse_expression(); + let span = AstSpan::range(break_keyword_position, returned_value.span().token_to); + (Some(returned_value), span) + }; + self.arena.alloc_node(Expression::Break(value), span) + } + + /// Parses the continuation of a `goto` expression after its keyword. + /// + /// Accepts either a name literal or an identifier as the target label. + #[must_use] + pub(crate) fn parse_goto_tail( + &mut self, + goto_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + if let Some((label_token, label_position)) = self.peek_token_and_position() + && (label_token == Token::NameLiteral || label_token == Token::Identifier) + { + self.advance(); + return self.arena.alloc_node_between( + Expression::Goto(label_position), + goto_keyword_position, + label_position, + ); + } + self.make_error_here(ParseErrorKind::GotoMissingLabel) + .widen_error_span_from(goto_keyword_position) + .sync_error_until(self, SyncLevel::Statement) + .report_error(self); + crate::arena::ArenaNode::new_in( + Expression::Error, + AstSpan::new(goto_keyword_position), + self.arena, + ) + } +} diff --git a/rottlib/src/parser/grammar/expression/identifier.rs b/rottlib/src/parser/grammar/expression/identifier.rs new file mode 100644 index 0000000..2612de5 --- /dev/null +++ b/rottlib/src/parser/grammar/expression/identifier.rs @@ -0,0 +1,76 @@ +//! Identifier parsing for Fermented `UnrealScript`. +//! +//! Provides shared routines for parsing both regular and qualified identifiers, +//! e.g. `KFChar.ZombieClot`. + +use crate::arena::{self, ArenaVec}; +use crate::ast::{AstSpan, IdentifierToken, QualifiedIdentifier, QualifiedIdentifierRef}; +use crate::lexer::{self, Token}; +use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt}; + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses an identifier. + /// + /// On failure (unexpected end-of-file or a token that cannot be used as an + /// identifier), produces `invalid_identifier_error_kind`. + pub(crate) fn parse_identifier( + &mut self, + invalid_identifier_error_kind: ParseErrorKind, + ) -> ParseResult<'src, 'arena, IdentifierToken> { + let (token, token_position) = + self.require_token_and_position(invalid_identifier_error_kind)?; + let identifier = Parser::identifier_token_from_token(token, token_position) + .ok_or_else(|| self.make_error_here(invalid_identifier_error_kind))?; + self.advance(); + Ok(identifier) + } + + /// Returns an [`IdentifierToken`] for `token` if it is valid as an + /// identifier name. + /// + /// This helper performs only token-to-identifier validation/wrapping; + /// it does not consume input from the parser. + pub(crate) fn identifier_token_from_token( + token: Token, + token_position: lexer::TokenPosition, + ) -> Option { + token + .is_valid_identifier_name() + .then_some(IdentifierToken(token_position)) + } + + /// Parses a qualified (dot-separated) identifier path, + /// e.g. `KFChar.ZombieClot`. + /// + /// This is used for name paths where each segment must be + /// a valid identifier and segments are separated by `.` tokens. + /// + /// On failure produces an error of specified [`ParseErrorKind`] + /// `invalid_identifier_error_kind`. + pub(crate) fn parse_qualified_identifier( + &mut self, + invalid_identifier_error_kind: ParseErrorKind, + ) -> ParseResult<'src, 'arena, QualifiedIdentifierRef<'arena>> { + let head = self.parse_identifier(invalid_identifier_error_kind)?; + let mut tail = None; + + let span_start = head.0; + let mut span_end = span_start; + while self.peek_token() == Some(Token::Period) { + self.advance(); // '.' + let next_segment = self + .parse_identifier(invalid_identifier_error_kind) + .widen_error_span_from(head.0)?; + span_end = next_segment.0; + + let tail_vec = tail.get_or_insert_with(|| ArenaVec::new_in(self.arena)); + tail_vec.push(next_segment); + } + + Ok(arena::ArenaNode::new_in( + QualifiedIdentifier { head, tail }, + AstSpan::range(span_start, span_end), + self.arena, + )) + } +} diff --git a/rottlib/src/parser/grammar/expression/literals.rs b/rottlib/src/parser/grammar/expression/literals.rs new file mode 100644 index 0000000..fa99c9e --- /dev/null +++ b/rottlib/src/parser/grammar/expression/literals.rs @@ -0,0 +1,123 @@ +//! Literal decoding for Fermented `UnrealScript`. +//! +//! This module defines the semantic rules for interpreting literal tokens +//! produced by the lexer. It is responsible only for *decoding* the textual +//! representation of literals into their internal values. +//! +//! The rules implemented here intentionally mirror the quirks of +//! Unreal Engine 2’s `UnrealScript`. + +use crate::parser::{ParseErrorKind, ParseResult}; + +impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { + /// Decodes an integer literal string into [`u128`]. + /// + /// Syntax: + /// - Optional base prefix: `0b` | `0o` | `0x` (case-insensitive). + /// No prefix -> decimal. + /// - Digits must match the base (`0-1`/`0-7`/`0-9A-F`). + /// - Underscores are allowed and ignored (e.g., `1_000`, `0xDE_AD`). + /// - No leading sign; parsed as a non-negative magnitude. + /// - Must fit within [`u128`]. + /// + /// Examples: `42`, `0b1010_0011`, `0o755`, `0xDEAD_BEEF`. + /// + /// On failure, returns [`ParseErrorKind::InvalidNumericLiteral`] at + /// the parser's current cursor position. + pub(crate) fn decode_integer_literal(&self, literal: &str) -> ParseResult<'src, 'arena, u128> { + let (base, content) = match literal.split_at_checked(2) { + Some(("0b" | "0B", stripped)) => (2, stripped), + Some(("0o" | "0O", stripped)) => (8, stripped), + Some(("0x" | "0X", stripped)) => (16, stripped), + _ => (10, literal), + }; + let digits_without_underscores = content.replace('_', ""); + u128::from_str_radix(&digits_without_underscores, base) + .map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) + } + + /// Decodes a float literal as `f64`, following the permissive and only + /// partially documented behavior of `UnrealScript`. + /// + /// Unreal Engine 2 does not define a precise and consistent set of rules + /// for float literals and the original compiler contains several quirks. + /// Because of this, we default to normalizing the text using a small set of + /// UnrealScript-specific rules and then parse the result using rust's + /// `f64` parser. + /// + /// Rules implemented here: + /// - Only decimal floats and special literals (e.g. `NaN`, `inf`) + /// are supported (no hex or binary formats). + /// - A single trailing `f` or `F`, if present, is removed before parsing. + /// - The literal text is scanned for periods (`.`). If a second period + /// is found, everything from that second `.` onward is discarded. + /// + /// Examples: + /// * `1.2.3e4` becomes `1.2` + /// * `1.2e3.4` becomes `1.2e3` + /// + /// - After this truncation step, the remaining text is interpreted as a + /// normal rust `f64` literal. This means it may contain digits, at + /// most one decimal point, and an optional exponent part (for example + /// `e3` or `E-2`), but it must otherwise follow rust's `f64` syntax. + /// Underscores, spaces, and other unsupported characters cause a + /// parse error. + /// + /// On failure, this function returns + /// [`ParseErrorKind::InvalidNumericLiteral`] at the current parser + /// position. + pub(crate) fn decode_float_literal(&self, literal: &str) -> ParseResult<'src, 'arena, f64> { + let content = literal + .strip_suffix('f') + .or_else(|| literal.strip_suffix('F')) + .unwrap_or(literal); + // Truncate after the second '.', matching UnrealScript behavior + let content = content + .match_indices('.') + .nth(1) + .and_then(|(period_index, _)| content.get(..period_index)) + .unwrap_or(content); + content + .parse::() + .map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) + } + + /// Unescapes a tokenized string literal into an arena string. + /// + /// Supported escapes: `\n`, `\t`, `\"`, `\\`. + /// Unknown escapes drop the backslash and emit the character unchanged + /// (`UnrealScript` behavior). + /// If `raw_string` ends with a trailing `\` (which should not happen for + /// well-formed tokens), that backslash is simply ignored. + /// + /// This function assumes `raw_string` is the token text without surrounding + /// quotes. + pub(crate) fn unescape_string_literal( + &self, + raw_string: &str, + ) -> crate::arena::ArenaString<'arena> { + let mut buffer = String::with_capacity(raw_string.len()); + let mut characters = raw_string.chars(); + while let Some(next_character) = characters.next() { + if next_character == '\\' { + // Under the lexer contract, string tokens do not end with a lone + // backslash, so there is always a following character. If this + // invariant is broken, the final '\' is simply ignored here. + if let Some(escaped_character) = characters.next() { + match escaped_character { + 'n' => buffer.push('\n'), + 't' => buffer.push('\t'), + '"' => buffer.push('"'), + '\\' => buffer.push('\\'), + // Simply leaving the escaped character matches + // UnrealScript behavior. + unrecognized_escape_char => buffer.push(unrecognized_escape_char), + } + } + } else { + buffer.push(next_character); + } + } + self.arena.string(&buffer) + } +} diff --git a/rottlib/src/parser/grammar/expression/mod.rs b/rottlib/src/parser/grammar/expression/mod.rs new file mode 100644 index 0000000..ec0d419 --- /dev/null +++ b/rottlib/src/parser/grammar/expression/mod.rs @@ -0,0 +1,32 @@ +//! Expression parsing for Fermented `UnrealScript`. +//! +//! This module group implements the language's expression parser around a +//! Pratt-style core. It is split into small submodules by role: precedence, +//! identifiers, literals, selectors, block bodies, keyword-led/control-flow +//! forms, primary-expression dispatch, and the Pratt driver itself. +//! +//! The parser is designed to keep building a best-effort AST on malformed +//! input. Syntax problems are reported through diagnostics, while committed +//! parsers recover locally and return fallback nodes or partial structures when +//! necessary. +//! +//! ## Expression layering +//! +//! The parser distinguishes several layers of expression parsing: +//! +//! - **primaries**: forms that can be parsed directly from the current token, +//! without an already parsed left-hand side; +//! - **selectors**: suffix continuations such as member access, indexing, and +//! calls, which require a left-hand side; +//! - **prefix / postfix / infix operators**: handled by the Pratt parser using +//! precedence ranks. + +mod block; // `{ ... }` block-body parsing and block/expression item handling. +mod control_flow; // `if`, `while`, `do`, `foreach`, `for`, `return`, etc. +mod identifier; // Identifier and qualified-name parsing helpers. +mod literals; // Literal decoding and literal-specific parsing utilities. +mod pratt; // Top-level Pratt driver. +mod precedence; // Operator precedence ranks and Pratt binding rules. +mod primary; // Primary-expression parsing and keyword-vs-identifier dispatch. +mod selectors; // Suffix continuations: member access, indexing, and calls. +mod switch; // `switch (...) { ... }` parsing and arm/body recovery. diff --git a/rottlib/src/parser/grammar/expression/pratt.rs b/rottlib/src/parser/grammar/expression/pratt.rs new file mode 100644 index 0000000..d1c022c --- /dev/null +++ b/rottlib/src/parser/grammar/expression/pratt.rs @@ -0,0 +1,194 @@ +//! Core of the expression parser for Fermented `UnrealScript`. +//! +//! This module implements a Pratt-style parser for the language's expression +//! grammar, supporting: +//! +//! * Primary expressions (see [`crate::parser::primary`] for details on what +//! we consider to be a primary expression); +//! * Prefix operators; +//! * Postfix operators; +//! * Infix operators with hard-coded precedence and associativity. +//! +//! Parsing is driven by [`PrecedenceRank`], which controls how tightly +//! operators bind. Infix parsing uses the pair of binding powers returned by +//! [`super::precedence::infix_precedence_ranks`] to encode associativity. +//! The parser infrastructure supports both left- and right-associative +//! operators, but Fermented `UnrealScript` currently defines only +//! left-associative ones. +//! +//! ## Postfix operator vs "selectors" +//! +//! Everywhere here we distinguish *selectors* like field accessor `.`, +//! function call `()` or array indices `[]` from other *postfix operators* +//! as they: +//! +//! 1. Have significantly different semantic meaning; +//! 2. Are not considered operators from `UnrealScript`'s viewpoint +//! (e.g. cannot be overloaded). +//! +//! ## See also +//! +//! - [`parser::Parser::parse_expression`] - main entry point +//! - [`PrecedenceRank`] - operator binding strengths +//! - [`super::precedence`] - operator precedence definitions + +use crate::ast::{self, Expression, ExpressionRef}; +use crate::parser::{self, Parser, ResultRecoveryExt}; + +pub use super::precedence::PrecedenceRank; + +/// Returns whether postfix operators like `++` and `--` are disallowed +/// after this expression. +/// +/// This restriction applies only to postfix operators. Selectors such as +/// field access `.x`, indexing `[i]`, and calls `(args)` remain allowed. +fn forbids_postfix_operators(expression: &ExpressionRef<'_, '_>) -> bool { + matches!( + **expression, + Expression::If { .. } + | Expression::While { .. } + | Expression::DoUntil { .. } + | Expression::For { .. } + | Expression::ForEach { .. } + | Expression::Switch { .. } + | Expression::Block { .. } + ) +} + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses an expression. + /// + /// Always returns some expression node; any syntax errors are reported + /// through the parser's diagnostics. + #[must_use] + pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> { + self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST) + } + + /// Parses an expression, including only operators with binding power + /// at least `min_precedence_rank` (as tight or tighter). + fn parse_expression_with_min_precedence_rank( + &mut self, + min_precedence_rank: PrecedenceRank, + ) -> ExpressionRef<'src, 'arena> { + let mut left_hand_side = self + .parse_prefix_or_primary() + .sync_error_until(self, parser::SyncLevel::Expression) + .unwrap_or_fallback(self); + left_hand_side = self + .parse_selectors_into(left_hand_side) + .unwrap_or_fallback(self); + // We disallow only postfix operators after expression forms that + // represent control-flow or block constructs. Selectors are still + // parsed normally. + // This avoids ambiguities in cases like: + // + // ```unrealscript + // if test() { do_it(); } + // ++ counter; + // ``` + // + // This wasn't a problem in UnrealScript, because such constructs were + // never treated as expressions. And it shouldn't be an issue for us + // because neither `--` or `++` (the only existing default postfix + // operators) make any sense after such expressions anyway. + if !forbids_postfix_operators(&left_hand_side) { + left_hand_side = self.parse_postfix_into(left_hand_side); + } + self.parse_infix_into(left_hand_side, min_precedence_rank) + } + + /// Parses a prefix or primary expression (Pratt parser's "nud" or + /// null denotation). + fn parse_prefix_or_primary(&mut self) -> parser::ParseExpressionResult<'src, 'arena> { + let (token, token_lexeme, token_position) = + self.require_token_lexeme_and_position(parser::ParseErrorKind::MissingExpression)?; + self.advance(); + if let Ok(operator) = ast::PrefixOperator::try_from(token) { + // In UnrealScript, prefix and postfix operators bind tighter than + // any infix operators, so we can safely parse the right hand side + // at the tightest precedence. + let right_hand_side = + self.parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST); + Ok(Expression::new_prefix( + self.arena, + token_position, + operator, + right_hand_side, + )) + } else { + self.parse_primary_from_current_token(token, token_lexeme, token_position) + } + } + + /// Parses all postfix operators it can, creating a tree with + /// `left_hand_side` as a child. + fn parse_postfix_into( + &mut self, + mut left_hand_side: ExpressionRef<'src, 'arena>, + ) -> ExpressionRef<'src, 'arena> { + while let Some((operator, operator_position)) = self.peek_postfix_with_position() { + self.advance(); + left_hand_side = + Expression::new_postfix(self.arena, left_hand_side, operator, operator_position); + } + left_hand_side + } + + /// Parses infix operators binding at least as tight as + /// `min_precedence_rank`. + /// + /// Associativity is encoded by + /// [`super::precedence::infix_precedence_ranks`]. + /// + /// Stops when the next operator is looser than `min_precedence_rank`. + fn parse_infix_into( + &mut self, + mut left_hand_side: ExpressionRef<'src, 'arena>, + min_precedence_rank: PrecedenceRank, + ) -> ExpressionRef<'src, 'arena> { + while let Some((operator, right_precedence_rank)) = + self.peek_infix_with_min_precedence_rank(min_precedence_rank) + { + self.advance(); + let right_hand_side = + self.parse_expression_with_min_precedence_rank(right_precedence_rank); + left_hand_side = + Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side); + } + left_hand_side + } + + /// Returns the next postfix operator and its position if present. + /// + /// Helper to avoid peeking and mapping twice; used to drive the postfix + /// loop without unwraps. + fn peek_postfix_with_position( + &mut self, + ) -> Option<(ast::PostfixOperator, crate::lexer::TokenPosition)> { + let (token, token_position) = self.peek_token_and_position()?; + let Ok(operator) = ast::PostfixOperator::try_from(token) else { + return None; + }; + Some((operator, token_position)) + } + + /// If the next token is an infix operator with left binding power at least + /// `min_precedence_rank`, returns its operator and the minimum precedence + /// rank to use when parsing the right-hand side (i.e. the operator's right + /// binding power). + /// + /// Otherwise returns [`None`]. + fn peek_infix_with_min_precedence_rank( + &mut self, + min_precedence_rank: PrecedenceRank, + ) -> Option<(ast::InfixOperator, PrecedenceRank)> { + let (left_precedence_rank, operator, right_precedence_rank) = self + .peek_token() + .and_then(super::precedence::infix_precedence_ranks)?; + if left_precedence_rank.is_looser_than(min_precedence_rank) { + return None; + } + Some((operator, right_precedence_rank)) + } +} diff --git a/rottlib/src/parser/grammar/expression/precedence.rs b/rottlib/src/parser/grammar/expression/precedence.rs new file mode 100644 index 0000000..7f5b6c8 --- /dev/null +++ b/rottlib/src/parser/grammar/expression/precedence.rs @@ -0,0 +1,93 @@ +//! Precedence tables for Fermented `UnrealScript` operators. +//! +//! These values don't follow the usual *binding power* convention for +//! a Pratt parser, where tighter binding corresponds to a larger number.\ +//! Here, the smaller the number, the tighter the binding power.\ +//! For this reason, we use the term *precedence rank* instead. +//! +//! ## Operators sorted by precedence (lowest number = tighter binding) +//! +//! ### Infix operators +//! +//! All infix operators in `UnrealScript` are +//! [left-associative](https://wiki.beyondunreal.com/Operators). +//! +//! 12: `**` +//! 16: `*`, `/`, `Cross`, `Dot` +//! 18: `%` +//! 20: `+`, `-` +//! 22: `<<`, `>>`, `>>>` +//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom` +//! 26: `!=` +//! 28: `&`, `^`, `|` +//! 30: `&&`, `^^` +//! 32: `||` +//! 34: `*=`, `/=`, `+=`, `-=` +//! 40: `$`, `*`, `@` +//! 44: `$=`, `*=`, `@=` +//! 45: `-=` +//! +//! Some operator, such as `*`, appear twice with different precedence +//! ranks because they were defined with different values for different types +//! in separate script source files (as in the Killing Floor sources).\ +//! However, `UnrealScript` uses only the first definition it encounters in +//! `Object.uc`, which corresponds to the lower value. +//! +//! ### Prefix operators +//! +//! `!`, `~`, `+`, `-`, `++`, `--`. +//! +//! ### Postfix operators +//! +//! `++`, `--`. + +use crate::ast::{InfixOperator, infix_operator_info}; +use crate::lexer::Token; + +/// Compact precedence rank used by the Pratt Parser. +/// +/// A smaller number means tighter binding, and a larger number means looser +/// binding. This inverted scale matches how `UnrealScript` tables were recorded. +#[must_use] +#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct PrecedenceRank(u8); + +impl PrecedenceRank { + /// The loosest possible precedence rank. + /// + /// In this inverted scale (smaller number = tighter binding), + /// this is represented by the maximum [`u8`] value. + pub const LOOSEST: Self = Self(u8::MAX); + + /// The tightest possible precedence rank. + /// + /// In this inverted scale (smaller number = tighter binding), + /// this is represented by zero. + pub const TIGHTEST: Self = Self(0); + + /// Returns `true` if `self` has a looser binding than `other`. + pub const fn is_looser_than(self, other: Self) -> bool { + self.0 > other.0 + } +} + +/// Maps a token to its infix operator along with its left and right binding +/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`. +/// +/// Returns [`None`] if and only if `token` is not an infix operator. +pub fn infix_precedence_ranks( + token: Token, +) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> { + let info = infix_operator_info(token)?; + // All operators are left-associative, so `right_precedence_rank` is set to + // `left_binding_rank - 1` (with our "smaller is tighter" scale, this + // enforces left associativity in Pratt parsing). + // + // Since all precedences are even, subtracting one won't actually cross + // any boundary between operator groups. + Some(( + PrecedenceRank(info.right_precedence_rank), + info.operator, + PrecedenceRank(info.right_precedence_rank - 1), + )) +} diff --git a/rottlib/src/parser/grammar/expression/primary.rs b/rottlib/src/parser/grammar/expression/primary.rs new file mode 100644 index 0000000..01a9b1d --- /dev/null +++ b/rottlib/src/parser/grammar/expression/primary.rs @@ -0,0 +1,463 @@ +//! Parser for primary expressions in Fermented `UnrealScript`. +//! +//! This module implements parsing of primary expressions via +//! [`Parser::parse_primary_from_current_token`] and its helper +//! [`Parser::parse_keyword_primary`]. +//! +//! ## What is a "primary expression" here? +//! +//! In this module, "primary" is used somewhat more broadly than in a +//! textbook grammar, but it still has one essential property: +//! +//! A primary expression is an expression form that can be parsed +//! directly from the current token, without requiring an already +//! parsed left-hand side. +//! +//! This includes ordinary primaries such as literals, identifiers, and +//! parenthesized expressions, as well as keyword-led forms such as +//! `if`, `while`, `for`, `foreach`, `switch`, `return`, `break`, +//! `continue`, `new`, and `class<...>`. +//! +//! By contrast, selectors, postfix operators, and infix operators are +//! not primaries. They cannot stand on their own here: they are parsed +//! only as continuations of an already parsed expression. +//! +//! So "primary" here does not mean "smallest atomic expression". +//! It means "an expression form that does not need a left-hand side +//! in order to be parsed". + +use super::selectors::ParsedCallArgumentSlot; +use crate::ast::{Expression, ExpressionRef, OptionalExpression}; +use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel}; + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a primary expression starting from the provided token. + /// + /// The provided token is assumed to be the already consumed first token of + /// the primary expression. + /// + /// This includes literals, identifiers, grouped expressions, block + /// expressions, and certain keyword-led forms. + /// + /// It does not parse selectors, postfix operators, or infix operators; + /// those are handled afterwards as continuations of the parsed primary. + /// + /// # Errors + /// + /// Returns [`ParseErrorKind::ExpressionExpected`] if the provided + /// token cannot begin any valid primary expression in this position. + pub(crate) fn parse_primary_from_current_token( + &mut self, + token: Token, + token_lexeme: &'src str, + token_position: TokenPosition, + ) -> ParseExpressionResult<'src, 'arena> { + Ok(match token { + Token::IntegerLiteral => { + let value = self.decode_integer_literal(token_lexeme)?; + self.arena + .alloc_node_at(Expression::Integer(value), token_position) + } + Token::FloatLiteral => { + let value = self.decode_float_literal(token_lexeme)?; + self.arena + .alloc_node_at(Expression::Float(value), token_position) + } + Token::StringLiteral => { + let value = self.unescape_string_literal(token_lexeme); + self.arena + .alloc_node_at(Expression::String(value), token_position) + } + Token::NameLiteral => self.arena.alloc_node_at( + Expression::NameLiteral { + tag: None, + name: token_lexeme, + }, + token_position, + ), + Token::LeftParenthesis => self.parse_parenthesized_expression_tail(token_position), + Token::LeftBrace => self.parse_block_tail(token_position), + Token::Keyword(keyword) => match self.parse_keyword_primary(keyword, token_position) { + Some(keyword_expression) => keyword_expression, + None => return self.parse_identifier_like_primary(token, token_position), + }, + _ => return self.parse_identifier_like_primary(token, token_position), + }) + } + + /// Parses a keyword-led primary expression. + /// + /// Returns `None` if the keyword should instead be interpreted as an + /// identifier in this position. + fn parse_keyword_primary( + &mut self, + keyword: Keyword, + token_position: TokenPosition, + ) -> OptionalExpression<'src, 'arena> { + Some(match keyword { + Keyword::True => self + .arena + .alloc_node_at(Expression::Bool(true), token_position), + Keyword::False => self + .arena + .alloc_node_at(Expression::Bool(false), token_position), + Keyword::None => self.arena.alloc_node_at(Expression::None, token_position), + Keyword::If => self.parse_if_tail(token_position), + Keyword::While => self.parse_while_tail(token_position), + Keyword::Do => self.parse_do_until_tail(token_position), + Keyword::ForEach => self.parse_foreach_tail(token_position), + Keyword::Return => self.parse_return_tail(token_position), + Keyword::Break => self.parse_break_tail(token_position), + Keyword::Continue => self + .arena + .alloc_node_at(Expression::Continue, token_position), + Keyword::New => self.parse_new_expression_tail(token_position), + // These keywords remain valid identifiers unless the following + // tokens commit to the keyword-led form. + Keyword::For if self.is_for_loop_header_ahead() => self.parse_for_tail(token_position), + Keyword::Goto if !matches!(self.peek_token(), Some(Token::LeftParenthesis)) => { + self.parse_goto_tail(token_position) + } + // `switch` is only treated as keyword-led when followed by `(` + // to match the syntax accepted by the existing codebase. + Keyword::Switch if matches!(self.peek_token(), Some(Token::LeftParenthesis)) => { + self.parse_switch_tail(token_position) + } + Keyword::Class => { + if let Some((Token::Less, left_angle_bracket_position)) = + self.peek_token_and_position() + { + self.advance(); // '<' + self.parse_class_type_tail(token_position, left_angle_bracket_position) + } else { + return None; + } + } + _ => return None, + }) + } + + /// Attempts to parse the already-consumed token as an identifier or tagged + /// name literal. + /// + /// # Errors + /// + /// Returns [`ParseErrorKind::ExpressionExpected`] if the token + /// cannot be used as an identifier in this position. + fn parse_identifier_like_primary( + &mut self, + primary_token: Token, + primary_token_position: TokenPosition, + ) -> ParseExpressionResult<'src, 'arena> { + let identifier_token = + Parser::identifier_token_from_token(primary_token, primary_token_position).ok_or_else( + || self.make_error_at(ParseErrorKind::ExpressionExpected, primary_token_position), + )?; + + // A token that is valid as an identifier may still start a tagged-name + // literal such as `Texture'Foo.Bar'`. + let expression = if let Some((Token::NameLiteral, lexeme, name_position)) = + self.peek_token_lexeme_and_position() + { + self.advance(); + self.arena.alloc_node_between( + Expression::NameLiteral { + tag: Some(identifier_token), + name: lexeme, + }, + primary_token_position, + name_position, + ) + } else { + self.arena.alloc_node_at( + Expression::Identifier(identifier_token), + primary_token_position, + ) + }; + Ok(expression) + } + + /// Parses a parenthesized expression. + /// + /// Assumes the opening `(` has already been consumed. + /// Reports and recovers from a missing closing `)`. + fn parse_parenthesized_expression_tail( + &mut self, + left_parenthesis_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + // Special case for an empty expression + if let Some((Token::RightParenthesis, right_parenthesis_position)) = + self.peek_token_and_position() + { + self.make_error_here(ParseErrorKind::ParenthesizedExpressionEmpty { + left_parenthesis_position, + }) + .widen_error_span_from(left_parenthesis_position) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .blame_token(right_parenthesis_position) + .report_error(self); + return self.arena.alloc_node_between( + Expression::Error, + left_parenthesis_position, + right_parenthesis_position, + ); + } + // Continue parsing normally + let inner_expression = if self.next_token_definitely_cannot_start_expression() { + let error = self + .make_error_here(ParseErrorKind::ExpressionExpected) + .widen_error_span_from(left_parenthesis_position) + .sync_error_at(self, SyncLevel::Expression) + .related_token(left_parenthesis_position); + let error_span = error.covered_span; + self.report_error(error); + return crate::arena::ArenaNode::new_in( + crate::ast::Expression::Error, + error_span, + self.arena, + ); + } else { + self.parse_expression() + }; + let right_parenthesis_position = self + .expect( + Token::RightParenthesis, + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { + left_parenthesis_position, + }, + ) + .widen_error_span_from(left_parenthesis_position) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .unwrap_or_fallback(self); + self.arena.alloc_node_between( + Expression::Parentheses(inner_expression), + left_parenthesis_position, + right_parenthesis_position, + ) + } + + /// Parses a class type expression of the form `class<...>`. + /// + /// Assumes the `class` keyword and following '<' token have already been + /// consumed. Reports and recovers from malformed type syntax locally. + fn parse_class_type_tail( + &mut self, + class_keyword_position: TokenPosition, + left_angle_bracket_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + // Special case for an empty argument + if let Some((Token::Greater, right_angle_bracket_position)) = self.peek_token_and_position() + { + self.make_error_here(ParseErrorKind::ClassTypeMissingTypeArgument { + left_angle_bracket_position, + }) + .widen_error_span_from(left_angle_bracket_position) + .sync_error_at(self, SyncLevel::CloseAngleBracket) + .blame_token(right_angle_bracket_position) + .report_error(self); + return self.arena.alloc_node_between( + Expression::Error, + class_keyword_position, + right_angle_bracket_position, + ); + } + // Qualified identifiers do not have a meaningful fallback option + let class_type = match self + .parse_qualified_identifier(ParseErrorKind::ClassTypeInvalidTypeArgument { + left_angle_bracket_position, + }) + .widen_error_span_from(class_keyword_position) + .sync_error_at(self, SyncLevel::CloseAngleBracket) + { + Ok(class_type) => class_type, + Err(error) => { + self.report_error(error); + return self.arena.alloc_node_between( + Expression::Error, + class_keyword_position, + self.last_consumed_position() + .unwrap_or(class_keyword_position), + ); + } + }; + let right_angle_bracket_position = self + .expect( + Token::Greater, + ParseErrorKind::ClassTypeMissingClosingAngleBracket { + left_angle_bracket_position, + }, + ) + .widen_error_span_from(class_keyword_position) + .sync_error_at(self, SyncLevel::CloseAngleBracket) + .unwrap_or_fallback(self); + self.arena.alloc_node_between( + Expression::ClassType(class_type), + class_keyword_position, + right_angle_bracket_position, + ) + } + + /// Parses a `new` expression with an optional parenthesized argument list. + /// + /// Assumes the `new` keyword has already been consumed. + /// The parenthesized argument list is optional. + fn parse_new_expression_tail( + &mut self, + new_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let (outer_argument, name_argument, flags_argument) = + if let Some((Token::LeftParenthesis, left_parenthesis_position)) = + self.peek_token_and_position() + { + self.advance(); + self.parse_new_argument_list_tail(left_parenthesis_position) + } else { + (None, None, None) + }; + // The class specifier is often a literal class reference, but any + // expression is accepted here. + let class_specifier = if self.next_token_definitely_cannot_start_expression() { + let error = self + .make_error_here(ParseErrorKind::NewMissingClassSpecifier { + new_keyword_position, + }) + .widen_error_span_from(new_keyword_position) + .sync_error_at(self, SyncLevel::Expression); + let error_span = error.covered_span; + self.report_error(error); + crate::arena::ArenaNode::new_in(crate::ast::Expression::Error, error_span, self.arena) + } else { + self.parse_expression() + }; + let class_specifier_end_position = class_specifier.span().token_to; + self.arena.alloc_node_between( + Expression::New { + outer_argument, + name_argument, + flags_argument, + class_specifier, + }, + new_keyword_position, + class_specifier_end_position, + ) + } + + /// Parses the optional parenthesized arguments of a `new` expression. + /// + /// Assumes the opening `(` has already been consumed. + /// Returns the `outer`, `name`, and `flags` argument slots, each of which + /// may be omitted. Reports and recovers from a missing closing `)`. + fn parse_new_argument_list_tail( + &mut self, + left_parenthesis_position: TokenPosition, + ) -> ( + OptionalExpression<'src, 'arena>, + OptionalExpression<'src, 'arena>, + OptionalExpression<'src, 'arena>, + ) { + let mut outer_argument = None; + let mut name_argument = None; + let mut flags_argument = None; + + for slot in [&mut outer_argument, &mut name_argument, &mut flags_argument] { + match self.parse_call_argument_slot(left_parenthesis_position) { + ParsedCallArgumentSlot::Argument(argument) => *slot = argument, + ParsedCallArgumentSlot::NoMoreArguments => break, + } + } + + if let Some((next_token, next_token_position)) = self.peek_token_and_position() + && next_token != Token::RightParenthesis + { + self.make_error_here(ParseErrorKind::NewTooManyArguments { + left_parenthesis_position, + }) + .widen_error_span_from(left_parenthesis_position) + .sync_error_until(self, SyncLevel::CloseParenthesis) + .blame_token(next_token_position) + .extend_blame_to_covered_end() + .report_error(self); + } + + self.expect( + Token::RightParenthesis, + ParseErrorKind::NewMissingClosingParenthesis { + left_parenthesis_position, + }, + ) + .widen_error_span_from(left_parenthesis_position) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + + (outer_argument, name_argument, flags_argument) + } + + /// Returns `true` iff the next token is definitely not a valid start of an + /// expression. + /// + /// This is intentionally conservative: + /// - `true` means parsing an expression here is pointless; + /// - `false` means "might be valid", so the normal expression parser should + /// decide and potentially emit a more specific error. + #[must_use] + pub(crate) fn next_token_definitely_cannot_start_expression(&mut self) -> bool { + matches!( + self.peek_token(), + None + // Closing delimiters / separators + | Some(Token::RightParenthesis) + | Some(Token::RightBrace) + | Some(Token::RightBracket) + | Some(Token::Semicolon) + | Some(Token::Comma) + | Some(Token::Colon) + | Some(Token::Question) + + // Tokens that only continue a previous expression + | Some(Token::Period) + + // Infix / postfix / assignment operators + | Some(Token::Exponentiation) + | Some(Token::Multiply) + | Some(Token::Divide) + | Some(Token::Modulo) + | Some(Token::ConcatSpace) + | Some(Token::Concat) + | Some(Token::LeftShift) + | Some(Token::LogicalRightShift) + | Some(Token::RightShift) + | Some(Token::Less) + | Some(Token::LessEqual) + | Some(Token::Greater) + | Some(Token::GreaterEqual) + | Some(Token::Equal) + | Some(Token::NotEqual) + | Some(Token::ApproximatelyEqual) + | Some(Token::BitwiseAnd) + | Some(Token::BitwiseOr) + | Some(Token::BitwiseXor) + | Some(Token::LogicalAnd) + | Some(Token::LogicalXor) + | Some(Token::LogicalOr) + | Some(Token::Assign) + | Some(Token::MultiplyAssign) + | Some(Token::DivideAssign) + | Some(Token::ModuloAssign) + | Some(Token::PlusAssign) + | Some(Token::MinusAssign) + | Some(Token::ConcatAssign) + | Some(Token::ConcatSpaceAssign) + + // Non-expression trivia / technical tokens + | Some(Token::ExecDirective) + | Some(Token::CppBlock) + | Some(Token::Hash) + | Some(Token::LineComment) + | Some(Token::BlockComment) + | Some(Token::Newline) + | Some(Token::Whitespace) + | Some(Token::Error) + ) + } +} diff --git a/rottlib/src/parser/grammar/expression/selectors.rs b/rottlib/src/parser/grammar/expression/selectors.rs new file mode 100644 index 0000000..18b94a8 --- /dev/null +++ b/rottlib/src/parser/grammar/expression/selectors.rs @@ -0,0 +1,197 @@ +//! Parser for expression selectors in Fermented `UnrealScript`. +//! +//! Selectors are suffix forms that extend an already parsed expression, +//! such as member access, indexing, and calls. +//! +//! Unlike primaries, selectors cannot be parsed on their own from the +//! current token. They always require a left-hand side expression. + +use crate::arena::ArenaVec; +use crate::ast::AstSpan; +use crate::ast::{Expression, ExpressionRef, OptionalExpression}; +use crate::lexer::{Token, TokenPosition}; +use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel}; + +/// Represents the result of parsing one call argument slot. +/// +/// This distinguishes between the end of the argument list and a parsed +/// argument slot, including an omitted one. +#[must_use] +#[derive(Debug, PartialEq)] +pub enum ParsedCallArgumentSlot<'src, 'arena> { + /// Indicates that the argument list has ended. + NoMoreArguments, + /// The parsed argument for this slot. + /// + /// `None` represents an omitted argument between commas. + Argument(OptionalExpression<'src, 'arena>), +} + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses zero or more postfix selectors after `left_hand_side`. + /// + /// Returns the resulting expression after all contiguous selectors. + pub(crate) fn parse_selectors_into( + &mut self, + left_hand_side: ExpressionRef<'src, 'arena>, + ) -> ParseExpressionResult<'src, 'arena> { + let mut left_hand_side = left_hand_side; + // `next_position` is used only to widen diagnostic spans. + while let Some((next_token, next_position)) = self.peek_token_and_position() { + left_hand_side = match next_token { + Token::Period => self.parse_selector_member_access_into(left_hand_side)?, + Token::LeftBracket => { + self.parse_selector_index_into(left_hand_side, next_position)? + } + Token::LeftParenthesis => { + self.parse_selector_call_into(left_hand_side, next_position) + } + _ => break, + }; + } + Ok(left_hand_side) + } + + /// Parses a member access selector after `left_hand_side`. + /// + /// Expects the leading `.` to be the next token and returns the resulting + /// member access expression. + fn parse_selector_member_access_into( + &mut self, + left_hand_side: ExpressionRef<'src, 'arena>, + ) -> ParseExpressionResult<'src, 'arena> { + self.advance(); // `.` + let member_access_start = left_hand_side.span().token_from; + let member_identifier = self.parse_identifier(ParseErrorKind::ExpressionUnexpectedToken)?; + let member_access_end = member_identifier.0; + Ok(self.arena.alloc_node( + Expression::Member { + target: left_hand_side, + name: member_identifier, + }, + AstSpan::range(member_access_start, member_access_end), + )) + } + + /// Parses an index selector after `left_hand_side`. + /// + /// Expects the leading `[` to be the next token and returns the resulting + /// indexing expression. + fn parse_selector_index_into( + &mut self, + left_hand_side: ExpressionRef<'src, 'arena>, + left_bracket_position: TokenPosition, + ) -> ParseExpressionResult<'src, 'arena> { + self.advance(); // '[' + let index_expression = self.parse_expression(); + let right_bracket_position = self + .expect( + Token::RightBracket, + ParseErrorKind::ExpressionUnexpectedToken, + ) + .widen_error_span_from(left_bracket_position) + .sync_error_at(self, SyncLevel::CloseBracket)?; + + let expression_start = left_hand_side.span().token_from; + Ok(self.arena.alloc_node_between( + Expression::Index { + target: left_hand_side, + index: index_expression, + }, + expression_start, + right_bracket_position, + )) + } + + /// Parses a call selector after `left_hand_side`. + /// + /// Expects the leading `(` to be the next token and returns the resulting + /// call expression. + fn parse_selector_call_into( + &mut self, + left_hand_side: ExpressionRef<'src, 'arena>, + left_parenthesis_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + self.advance(); // '(' + let argument_list = self.parse_call_argument_list(left_parenthesis_position); + let right_parenthesis_position = self + .expect( + Token::RightParenthesis, + ParseErrorKind::FunctionCallMissingClosingParenthesis, + ) + .widen_error_span_from(left_parenthesis_position) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .unwrap_or_fallback(self); + + let expression_start = left_hand_side.span().token_from; + self.arena.alloc_node_between( + Expression::Call { + callee: left_hand_side, + arguments: argument_list, + }, + expression_start, + right_parenthesis_position, + ) + } + + /// Parses one call argument slot after an already consumed `(`. + /// + /// In `UnrealScript`, every comma introduces a follow-up argument slot, so a + /// trailing comma immediately before `)` denotes an omitted final argument. + /// + /// Returns [`ParsedCallArgumentSlot::NoMoreArguments`] when the argument + /// list ends, and `Argument(None)` for an omitted argument slot. + pub(crate) fn parse_call_argument_slot( + &mut self, + left_parenthesis_position: TokenPosition, + ) -> ParsedCallArgumentSlot<'src, 'arena> { + match self.peek_token() { + Some(Token::RightParenthesis) => return ParsedCallArgumentSlot::NoMoreArguments, + Some(Token::Comma) => { + self.advance(); + if self.at_call_argument_boundary() { + return ParsedCallArgumentSlot::Argument(None); + } + } + _ => (), + } + let argument = self.parse_expression(); + if !self.at_call_argument_boundary() { + self.make_error_here(ParseErrorKind::FunctionArgumentMissingComma) + .widen_error_span_from(left_parenthesis_position) + .report_error(self); + } + ParsedCallArgumentSlot::Argument(Some(argument)) + } + + /// Parses a call argument list after an already-consumed `(`. + /// + /// Returns all parsed argument slots, preserving omitted arguments + /// as `None`. + fn parse_call_argument_list( + &mut self, + left_parenthesis_position: TokenPosition, + ) -> ArenaVec<'arena, Option>> { + let mut argument_list = ArenaVec::new_in(self.arena); + + while let ParsedCallArgumentSlot::Argument(argument) = + self.parse_call_argument_slot(left_parenthesis_position) + { + argument_list.push(argument); + } + + argument_list + } + + /// Returns whether the current lookahead token ends the current call + /// argument slot. + /// + /// This is true for `,`, which starts the next slot, and for `)`, which + /// ends the argument list. + fn at_call_argument_boundary(&mut self) -> bool { + matches!( + self.peek_token(), + Some(Token::Comma | Token::RightParenthesis) + ) + } +} diff --git a/rottlib/src/parser/grammar/expression/switch.rs b/rottlib/src/parser/grammar/expression/switch.rs new file mode 100644 index 0000000..2d5522f --- /dev/null +++ b/rottlib/src/parser/grammar/expression/switch.rs @@ -0,0 +1,203 @@ +//! Switch parsing for Fermented `UnrealScript`. +//! +//! Provides routines for parsing `switch (...) { ... }` expressions. +use crate::arena::ArenaVec; +use crate::ast::{AstSpan, ExpressionRef, StatementRef}; +use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::parser::{ParseErrorKind, ResultRecoveryExt}; + +impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { + /// Parses a `switch` expression after the `switch` keyword has been + /// consumed. + /// + /// Returns an [`crate::ast::Expression::Switch`] whose span covers the + /// entire construct, from `switch_start_position` to the closing `}`. + /// + /// Only one `default` arm is recorded. Duplicate defaults and `case` arms + /// after a `default` are reported as errors. + /// + /// On premature end-of-file, reports an error and returns a best-effort + /// switch node. + #[must_use] + pub(crate) fn parse_switch_tail( + &mut self, + switch_start_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let selector = self.parse_expression(); + let mut cases = self.arena.vec(); + let mut default_arm = None; + let mut span = AstSpan::new(switch_start_position); + if self + .expect(Token::LeftBrace, ParseErrorKind::SwitchMissingBody) + .report_error(self) + { + return self.alloc_switch_node(selector, cases, default_arm, span); + } + while let Some((token, token_position)) = self.peek_token_and_position() { + match token { + Token::RightBrace => { + self.advance(); // '}' + span.extend_to(token_position); + return self.alloc_switch_node(selector, cases, default_arm, span); + } + Token::Keyword(Keyword::Case) => { + if default_arm.is_some() { + self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault); + } + let case_node = self.parse_switch_case_group(token_position); + cases.push(case_node); + } + Token::Keyword(Keyword::Default) => { + if default_arm.is_some() { + self.report_error_here(ParseErrorKind::SwitchDuplicateDefault); + } + // Duplicate `default` is still parsed so that diagnostics + // in its body can be reported. + self.parse_switch_default_arm( + token_position, + default_arm.get_or_insert_with(|| self.arena.vec()), + ); + } + // Items before the first arm declaration are not allowed, but + // are parsed for basic diagnostics and simplicity. + _ => self.parse_switch_preamble_items(token_position), + } + self.ensure_forward_progress(token_position); + } + self.report_error_here(ParseErrorKind::SwitchMissingClosingBrace); + // This can only be `None` in the pathological case of + // an empty token stream + span.extend_to( + self.last_consumed_position() + .unwrap_or(switch_start_position), + ); + self.alloc_switch_node(selector, cases, default_arm, span) + } + + /// Parses a stacked `case` group and its body: + /// `case : (case :)* `. + /// + /// Returns the allocated [`crate::ast::CaseRef`] node. + /// + /// The returned node span covers the entire group, from + /// `first_case_position` to the end of the arm body, or to the end of the + /// last label if the body is empty. + #[must_use] + fn parse_switch_case_group( + &mut self, + first_case_position: TokenPosition, + ) -> crate::ast::SwitchCaseRef<'src, 'arena> { + let mut labels = self.arena.vec(); + while let Some((Keyword::Case, case_position)) = self.peek_keyword_and_position() { + self.advance(); // 'case' + labels.push(self.parse_expression()); + + // `:` is required after each case label; missing `:` is recovered + // at statement sync level. + self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon) + .widen_error_span_from(case_position) + .sync_error_until(self, crate::parser::SyncLevel::Statement) + .report_error(self); + } + let mut body = self.arena.vec(); + self.parse_switch_arm_body(&mut body); + let case_span = compute_case_span(first_case_position, &labels, &body); + self.arena + .alloc_node(crate::ast::SwitchCase { labels, body }, case_span) + } + + /// Parses a `default:` arm and appends its statements to `statements`. + fn parse_switch_default_arm( + &mut self, + default_position: TokenPosition, + statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>, + ) { + self.advance(); // 'default' + self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon) + .widen_error_span_from(default_position) + .sync_error_until(self, crate::parser::SyncLevel::Statement) + .report_error(self); + self.parse_switch_arm_body(statements); + } + + /// Parses statements of a single switch arm body. + fn parse_switch_arm_body( + &mut self, + statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>, + ) { + while let Some((token, token_position)) = self.peek_token_and_position() { + match token { + Token::Keyword(Keyword::Case | Keyword::Default) | Token::RightBrace => break, + _ => { + self.parse_next_block_item_into(statements); + self.ensure_forward_progress(token_position); + } + } + } + } + + /// Parses items that appear before any `case` or `default` arm declaration. + /// + /// Such items are not allowed, but they are parsed to produce diagnostics + /// and maintain forward progress. + /// + /// Parsed statements are discarded; only error reporting is preserved. + /// + /// Parsing stops at a boundary token or end-of-file. + /// Boundary tokens: `case`, `default`, `}`. + fn parse_switch_preamble_items(&mut self, preamble_start_position: TokenPosition) + where + 'src: 'arena, + { + // Discard parsed statements into a sink vector. + // This is a bit "hacky", but I don't want to adapt code to skip + // production of AST nodes just to report errors in + // one problematic case. + let mut sink = self.arena.vec(); + self.parse_switch_arm_body(&mut sink); + self.make_error_here(ParseErrorKind::SwitchTopLevelItemNotCase) + .widen_error_span_from(preamble_start_position) + .report_error(self); + } + + /// Helper to allocate a `Switch` expression with the given span. + #[must_use] + fn alloc_switch_node( + &self, + selector: ExpressionRef<'src, 'arena>, + cases: ArenaVec<'arena, crate::ast::SwitchCaseRef<'src, 'arena>>, + default_arm: Option>>, + span: AstSpan, + ) -> ExpressionRef<'src, 'arena> { + self.arena.alloc_node( + crate::ast::Expression::Switch { + selector, + cases, + default_arm, + }, + span, + ) + } +} + +/// Computes an [`AstSpan`] covering a `case` group. +/// +/// The span begins at `labels_start_position` and extends to: +/// - the end of the last statement in `body`, if present; otherwise +/// - the end of the last label in `labels`, if present. +/// +/// If both are empty, the span covers only `labels_start_position`. +#[must_use] +fn compute_case_span( + labels_start_position: TokenPosition, + labels: &[ExpressionRef], + body: &[StatementRef], +) -> AstSpan { + let mut span = AstSpan::new(labels_start_position); + if let Some(last_statement) = body.last() { + span.extend_to(last_statement.span().token_to); + } else if let Some(last_label) = labels.last() { + span.extend_to(last_label.span().token_to); + } + span +} diff --git a/rottlib/src/parser/grammar/flow.rs b/rottlib/src/parser/grammar/flow.rs deleted file mode 100644 index ba4aa0d..0000000 --- a/rottlib/src/parser/grammar/flow.rs +++ /dev/null @@ -1,99 +0,0 @@ -use crate::ast::{AstSpan, Expression}; -use crate::lexer::{Token, TokenLocation}; -use crate::parser::ParseErrorKind; - -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Parse the continuation of a `return` after its keyword was consumed. - /// - /// Doesn't consume the terminating `;`. - /// If the next token is not `;`, parses an expression as the optional - /// value. Produces an [`Expression::Return`] whose span runs from - /// the `return` keyword to the end of the value if present, otherwise to - /// the `return` keyword. - #[must_use] - pub(crate) fn parse_return_cont( - &mut self, - return_start_location: TokenLocation, - ) -> crate::ast::ExpressionRef<'src, 'arena> { - let (value, span) = if self.peek_token() != Some(Token::Semicolon) { - let value = self.parse_expression(); - - let span = AstSpan { - from: return_start_location, - to: value.span().to, - }; - (Some(value), span) - } else { - ( - None, - AstSpan { - from: return_start_location, - to: return_start_location, - }, - ) - }; - self.arena.alloc(Expression::Return(value), span) - } - - /// Parse the continuation of a `break` after its keyword was consumed. - /// - /// Doesn't consume the terminating `;`. - /// If the next token is not `;`, parses an optional value expression. - /// Produces an [`Expression::Break`] spanning from `break` to the end - /// of the value if present, otherwise to the `break` keyword. - #[must_use] - pub(crate) fn parse_break_cont( - &mut self, - break_start_location: TokenLocation, - ) -> crate::ast::ExpressionRef<'src, 'arena> { - let (value, span) = if self.peek_token() != Some(Token::Semicolon) { - let value = self.parse_expression(); - - let span = AstSpan { - from: break_start_location, - to: value.span().to, - }; - (Some(value), span) - } else { - ( - None, - AstSpan { - from: break_start_location, - to: break_start_location, - }, - ) - }; - self.arena.alloc(Expression::Break(value), span) - } - - /// Parses a `goto` expression after `goto`, assuming that the `goto` token - /// was consumed. - /// - /// Requires the next token to be an identifier label. - /// On missing token, returns [`ParseErrorKind::UnexpectedEndOfFile`]. - /// On a non-identifier next token, - /// returns [`ParseErrorKind::GotoMissingLabel`]. - /// On success, produces an [`Expression::Goto`] spanning from `goto` - /// to the label token. - #[must_use] - pub(crate) fn parse_goto_cont( - &mut self, - goto_start_location: TokenLocation, - ) -> crate::parser::ParseExpressionResult<'src, 'arena> { - let Some((token, text, token_location)) = self.peek_token_lexeme_and_location() else { - return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); - }; - if token == Token::Identifier { - let span = AstSpan { - from: goto_start_location, - to: token_location, - }; - self.advance(); - Ok(self - .arena - .alloc(Expression::Goto(self.arena.string(text)), span)) - } else { - Err(self.make_error_here(ParseErrorKind::GotoMissingLabel)) - } - } -} diff --git a/rottlib/src/parser/grammar/function/definition.rs b/rottlib/src/parser/grammar/function/definition.rs new file mode 100644 index 0000000..27e0bc8 --- /dev/null +++ b/rottlib/src/parser/grammar/function/definition.rs @@ -0,0 +1,297 @@ +//! Parsing of callable definitions for Fermented `UnrealScript` +//! (functions, events, delegates, operators). + +use crate::arena::ArenaVec; + +use crate::ast::{ + AstSpan, CallableDefinition, CallableDefinitionRef, CallableKind, CallableModifier, + CallableModifierKind, CallableName, IdentifierToken, InfixOperator, InfixOperatorName, + ParameterRef, PostfixOperator, PostfixOperatorName, PrefixOperator, PrefixOperatorName, + TypeSpecifierRef, +}; +use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::parser::{ + ParseError, ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel, + recovery::RecoveryFallback, +}; + +/// Temporary parsed representation of a callable header without its body. +#[derive(Debug)] +pub(super) struct ParsedCallableHeader<'src, 'arena> { + pub start_position: TokenPosition, + pub modifiers: crate::arena::ArenaVec<'arena, CallableModifier>, + pub kind: CallableKind, + pub return_type_specifier: Option>, + pub name: CallableName, + pub parameters: crate::arena::ArenaVec<'arena, ParameterRef<'src, 'arena>>, +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for ParsedCallableHeader<'src, 'arena> { + fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { + let fallback_position = error.covered_span.token_from; + ParsedCallableHeader { + start_position: fallback_position, + modifiers: parser.arena.vec(), + kind: CallableKind::Function, + return_type_specifier: None, + name: CallableName::Identifier(IdentifierToken(fallback_position)), + parameters: parser.arena.vec(), + } + } +} + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Parses a callable definition. + /// + /// Assumes [`Parser::is_callable_header_ahead`] has already confirmed that + /// a callable declaration begins at the current position. This affects + /// the diagnostics produced for malformed input. + #[must_use] + pub(crate) fn parse_callable_definition(&mut self) -> CallableDefinitionRef<'src, 'arena> { + let header = self.parse_callable_header().unwrap_or_fallback(self); + + let body = if self.eat(Token::LeftBrace) { + Some(self.parse_braced_block_statements_tail(self.last_consumed_position_or_start())) + } else { + self.expect( + Token::Semicolon, + ParseErrorKind::CallableMissingBodyOrSemicolon, + ) + .report_error(self); + None + }; + + let span = AstSpan::range( + header.start_position, + self.last_consumed_position_or_start(), + ); + + self.arena.alloc_node( + CallableDefinition { + name: header.name, + kind: header.kind, + return_type_specifier: header.return_type_specifier, + modifiers: header.modifiers, + parameters: header.parameters, + body, + }, + span, + ) + } + + /// Parses a callable header without the body. + fn parse_callable_header( + &mut self, + ) -> ParseResult<'src, 'arena, ParsedCallableHeader<'src, 'arena>> { + let start_position = self.require_position(ParseErrorKind::CallableExpectedHeader)?; + let mut modifiers = self.arena.vec(); + self.collect_callable_modifiers(&mut modifiers); + let kind = self.parse_callable_kind()?; + self.collect_callable_modifiers(&mut modifiers); + + // `(` cannot appear inside a return type in this grammar, + // so seeing it here means the callable has no return type specifier. + let return_type_specifier = match self.peek_token_at(1) { + Some(Token::LeftParenthesis) => None, + _ => Some(self.parse_type_specifier()?), + }; + let name = self.parse_callable_name(kind)?; + + self.expect( + Token::LeftParenthesis, + ParseErrorKind::CallableParamsMissingOpeningParenthesis, + ) + .report_error(self); + let parameters = self.parse_parameter_list(); + self.expect( + Token::RightParenthesis, + ParseErrorKind::CallableParamsMissingClosingParenthesis, + ) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + + Ok(ParsedCallableHeader { + start_position, + modifiers, + kind, + return_type_specifier, + name, + parameters, + }) + } + + fn parse_callable_kind(&mut self) -> ParseResult<'src, 'arena, CallableKind> { + if let Some(keyword) = self.peek_keyword() { + // Handle this separately because only infix operators can carry + // an optional precedence and cannot, therefore, be handled by + // a simple converter. + if keyword == Keyword::Operator { + self.advance(); + let precedence = self.parse_optional_parenthesized_integer( + ParseErrorKind::CallableOperatorInvalidPrecedence, + ); + return Ok(CallableKind::InfixOperator(precedence)); + } + if let Ok(kind) = CallableKind::try_from(keyword) { + self.advance(); + return Ok(kind); + } + } + Err(self.make_error_here(ParseErrorKind::CallableExpectedKind)) + } + + fn parse_callable_name( + &mut self, + kind: CallableKind, + ) -> ParseResult<'src, 'arena, CallableName> { + match kind { + CallableKind::Function | CallableKind::Event | CallableKind::Delegate => self + .parse_identifier(ParseErrorKind::CallableNameNotIdentifier) + .map(CallableName::Identifier), + CallableKind::PrefixOperator => { + let (token, operator_position) = self.require_token_and_position( + ParseErrorKind::CallablePrefixOperatorInvalidSymbol, + )?; + let operator = PrefixOperator::try_from(token).map_err(|()| { + self.make_error_here(ParseErrorKind::CallablePrefixOperatorInvalidSymbol) + })?; + self.advance(); + Ok(CallableName::PrefixOperator(PrefixOperatorName { + kind: operator, + position: operator_position, + })) + } + CallableKind::InfixOperator(_) => { + let (token, operator_position) = self.require_token_and_position( + ParseErrorKind::CallableInfixOperatorInvalidSymbol, + )?; + let operator = InfixOperator::try_from(token).map_err(|()| { + self.make_error_here(ParseErrorKind::CallableInfixOperatorInvalidSymbol) + })?; + self.advance(); + Ok(CallableName::InfixOperator(InfixOperatorName { + kind: operator, + position: operator_position, + })) + } + CallableKind::PostfixOperator => { + let (token, operator_position) = self.require_token_and_position( + ParseErrorKind::CallablePostfixOperatorInvalidSymbol, + )?; + let operator = PostfixOperator::try_from(token).map_err(|()| { + self.make_error_here(ParseErrorKind::CallablePostfixOperatorInvalidSymbol) + })?; + self.advance(); + Ok(CallableName::PostfixOperator(PostfixOperatorName { + kind: operator, + position: operator_position, + })) + } + } + } + + /// Parses an uninterrupted sequence of function modifiers into + /// given vector. + pub(crate) fn collect_callable_modifiers( + &mut self, + modifiers: &mut ArenaVec<'arena, CallableModifier>, + ) { + while let Some(next_mod) = self.parse_function_modifier() { + modifiers.push(next_mod); + } + } + + fn parse_function_modifier(&mut self) -> Option { + let (keyword, start) = self.peek_keyword_and_position()?; + + let kind = match keyword { + Keyword::Native => { + self.advance(); + let native_id = self.parse_optional_parenthesized_integer( + ParseErrorKind::NativeModifierIdNotIntegerLiteral, + ); + CallableModifierKind::Native(native_id) + } + Keyword::Config => { + self.advance(); + let ident = self + .parse_required_parenthesized_identifier( + ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis, + ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis, + ) + .unwrap_or(IdentifierToken(start)); + CallableModifierKind::Config(ident) + } + _ => { + let simple = CallableModifierKind::try_from(keyword).ok()?; + // Only advance after confirming it is the modifier + self.advance(); + simple + } + }; + + let span = AstSpan::range(start, self.last_consumed_position_or_start()); + Some(CallableModifier { kind, span }) + } + + fn parse_optional_parenthesized_integer(&mut self, close_err: ParseErrorKind) -> Option { + if !self.eat(Token::LeftParenthesis) { + return None; + } + + let value = match self.peek_token_and_lexeme() { + Some((Token::IntegerLiteral, lex)) => { + self.advance(); + self.decode_integer_literal(lex).ok_or_report(self) + } + Some(_) => { + self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral); + self.advance(); + None + } + None => { + self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral); + None + } + }; + + self.expect(Token::RightParenthesis, close_err) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + + value + } + + fn parse_required_parenthesized_identifier( + &mut self, + close_err: ParseErrorKind, + ident_err: ParseErrorKind, + ) -> Option { + if !self.eat(Token::LeftParenthesis) { + self.report_error_here(ident_err); + return None; + } + + let ident = match self.peek_token_lexeme_and_position() { + Some((tok, _, pos)) if tok.is_valid_identifier_name() => { + self.advance(); + Some(IdentifierToken(pos)) + } + Some(_) => { + self.report_error_here(ident_err); + self.advance(); + None + } + None => { + self.report_error_here(ident_err); + None + } + }; + + self.expect(Token::RightParenthesis, close_err) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .report_error(self); + + ident + } +} diff --git a/rottlib/src/parser/grammar/function/lookahead.rs b/rottlib/src/parser/grammar/function/lookahead.rs new file mode 100644 index 0000000..a330743 --- /dev/null +++ b/rottlib/src/parser/grammar/function/lookahead.rs @@ -0,0 +1,50 @@ +//! Lookahead for callable headers in Fermented `UnrealScript`. + +use crate::lexer::{Keyword, Token}; +use crate::parser::Parser; + +impl Parser<'_, '_> { + /// Returns whether the upcoming tokens have the syntactic shape of + /// a callable header. + /// + /// Returns `true` when the following tokens consist of zero or more + /// callable modifiers followed by a keyword that defines a callable kind. + /// + /// Does not check whether any parenthesized arguments are valid. + #[must_use] + pub(crate) fn is_callable_header_ahead(&mut self) -> bool { + let mut lookahead_offset = 0; + while let Some(keyword) = self.peek_keyword_at(lookahead_offset) { + if keyword.is_callable_kind_keyword() { + return true; + } + if let Some(token_width) = self.callable_modifier_width_at(keyword, lookahead_offset) { + lookahead_offset += token_width; + } else { + break; + } + } + false + } + + fn callable_modifier_width_at( + &mut self, + keyword: Keyword, + lookahead_token_offset: usize, + ) -> Option { + if !keyword.is_callable_modifier() { + return None; + } + + if matches!(keyword, Keyword::Native | Keyword::Config) + && self.peek_token_at(lookahead_token_offset + 1) == Some(Token::LeftParenthesis) + && self.peek_token_at(lookahead_token_offset + 3) == Some(Token::RightParenthesis) + { + // `native(...)` and `config(...)` consume a parenthesized specifier + // in modifier position, so lookahead must skip the whole modifier. + Some(4) + } else { + Some(1) + } + } +} diff --git a/rottlib/src/parser/grammar/function/mod.rs b/rottlib/src/parser/grammar/function/mod.rs new file mode 100644 index 0000000..ba552e4 --- /dev/null +++ b/rottlib/src/parser/grammar/function/mod.rs @@ -0,0 +1,3 @@ +mod definition; +mod lookahead; +mod params; diff --git a/rottlib/src/parser/grammar/function/params.rs b/rottlib/src/parser/grammar/function/params.rs new file mode 100644 index 0000000..7e894ad --- /dev/null +++ b/rottlib/src/parser/grammar/function/params.rs @@ -0,0 +1,107 @@ +use crate::arena::ArenaVec; +use crate::ast::{AstSpan, Parameter, ParameterModifier, ParameterModifierKind, ParameterRef}; +use crate::lexer::{Keyword, Token}; +use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel}; + +impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { + // allowed to switch to result returning + pub(crate) fn parse_parameter_list(&mut self) -> ArenaVec<'arena, ParameterRef<'src, 'arena>> { + let mut params = self.arena.vec(); + if matches!(self.peek_token(), Some(Token::RightParenthesis)) { + return params; + } + + loop { + let start_pos = self.last_consumed_position_or_start(); + + let mut modifiers = self.arena.vec(); + + while let Some((next_keyword, next_position)) = self.peek_keyword_and_position() { + match next_keyword { + Keyword::Optional => { + modifiers.push(ParameterModifier { + kind: ParameterModifierKind::Optional, + position: next_position, + }); + self.advance(); + } + Keyword::Out => { + modifiers.push(ParameterModifier { + kind: ParameterModifierKind::Out, + position: next_position, + }); + self.advance(); + } + Keyword::Coerce => { + modifiers.push(ParameterModifier { + kind: ParameterModifierKind::Coerce, + position: next_position, + }); + self.advance(); + } + Keyword::Skip => { + modifiers.push(ParameterModifier { + kind: ParameterModifierKind::Skip, + position: next_position, + }); + self.advance(); + } + _ => break, + } + } + + let type_spec = match self.parse_type_specifier() { + Ok(t) => t, + Err(e) => { + self.report_error(e); + self.recover_until(SyncLevel::ListSeparator); + if self.eat(Token::Comma) { + continue; + } + break; + } + }; + + let name = self + .parse_identifier(ParseErrorKind::ParamMissingIdentifier) + .unwrap_or_fallback(self); + + let array_len = match self.parse_array_len_expr() { + Ok(v) => v, + Err(e) => { + self.report_error(e); + self.recover_until(SyncLevel::CloseBracket); + let _ = self.eat(Token::RightBracket); + None + } + }; + + let default_value = if self.eat(Token::Assign) { + Some(self.parse_expression()) + } else { + None + }; + + let span = AstSpan::range(start_pos, self.last_consumed_position_or_start()); + params.push(self.arena.alloc_node( + Parameter { + modifiers, + type_specifier: type_spec, + name, + array_size: array_len, + default_value, + }, + span, + )); + + if !self.eat(Token::Comma) || matches!(self.peek_token(), Some(Token::RightParenthesis)) + { + break; + } + + self.ensure_forward_progress(start_pos); + } + + params + } +} diff --git a/rottlib/src/parser/grammar/literals.rs b/rottlib/src/parser/grammar/literals.rs deleted file mode 100644 index f9c6832..0000000 --- a/rottlib/src/parser/grammar/literals.rs +++ /dev/null @@ -1,119 +0,0 @@ -use crate::ast::DeclarationLiteral; -use crate::lexer::Token; -use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt}; - -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Parses a simple literal value that can be used inside FerUS's top-level - /// class members' definitions. - /// - /// On success consumes exactly one token. If the next token is not - /// a supported literal, returns [`None`] and leaves the stream untouched. - pub(crate) fn parse_declaration_literal( - &mut self, - ) -> ParseResult { - let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location() - else { - return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); - }; - let declaration_literal = match token { - Token::IntegerLiteral => { - let value = self - .decode_integer_literal(token_text) - .unwrap_or_fallback(self); - self.advance(); - DeclarationLiteral::Integer(value) - } - Token::FloatLiteral => { - let value = self - .decode_float_literal(token_text) - .unwrap_or_fallback(self); - self.advance(); - DeclarationLiteral::Float(value) - } - Token::StringLiteral => { - let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text); - self.advance(); - DeclarationLiteral::String(value) - } - Token::True => { - self.advance(); - DeclarationLiteral::Bool(true) - } - Token::False => { - self.advance(); - DeclarationLiteral::Bool(false) - } - Token::None => { - self.advance(); - DeclarationLiteral::None - } - Token::Identifier => { - self.advance(); - DeclarationLiteral::Identifier(token_text) - } - _ => { - return Err(self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken)); - } - }; - Ok((declaration_literal, token_location)) - } - - /// Parses an integer literal as [`i128`]. - /// - /// Expects a normalized decimal string with optional leading sign. - /// Does not accept base prefixes or digit separators. - /// - /// [`i128`] type was chosen to cover FerUS's integer range so constant - /// folding remains precise. - pub(crate) fn decode_integer_literal(&mut self, text: &str) -> ParseResult { - text.parse::() - .map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) - } - - /// Parses a float literal as [`f64`]. - /// - /// Expects a normalized decimal float (optional sign, decimal point, - /// optional exponent). Special values like `inf`/`NaN` are rejected. - pub(crate) fn decode_float_literal(&mut self, text: &str) -> ParseResult { - if let Ok(parsed_value) = text.parse::() { - Ok(parsed_value) - } else { - Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) - } - } - - /// Unescapes a tokenized string literal into an arena string. - /// - /// Supported escapes: `\n`, `\t`, `\"`, `\\`. - /// Unknown escape sequences are preserved as-is (UnrealScript behavior). - /// - /// Note: this function assumes `raw` is the token text without surrounding - /// quotes. - pub(crate) fn unescape_string_literal( - arena: &'arena crate::arena::Arena, - raw: &str, - ) -> crate::arena::ArenaString<'arena> { - let mut buffer = String::with_capacity(raw.len()); - let mut characters = raw.chars(); - while let Some(next_character) = characters.next() { - if next_character == '\\' { - // The lexer never produces a trailing backslash in a string - // token, so there's always a following character to inspect. - if let Some(escaped_character) = characters.next() { - match escaped_character { - 'n' => buffer.push('\n'), - 't' => buffer.push('\t'), - '"' => buffer.push('"'), - '\\' => buffer.push('\\'), - // Simply leaving the escaped character as-is matches - // UnrealScript behavior. - other => buffer.push(other), - } - } - } else { - buffer.push(next_character); - } - } - arena.string(&buffer) - } -} diff --git a/rottlib/src/parser/grammar/mod.rs b/rottlib/src/parser/grammar/mod.rs index 58cb415..6467085 100644 --- a/rottlib/src/parser/grammar/mod.rs +++ b/rottlib/src/parser/grammar/mod.rs @@ -1,8 +1,15 @@ -mod block; -mod control; -mod flow; -mod literals; -mod pratt; -mod precedence; -mod statements; -mod switch; +//! ## Naming conventions +//! +//! Some naming conventions that might not be obvious: +//! +//! - `*_tail` means the opening token or keyword has already been consumed. +//! Tail parsers build the rest of the construct and usually return a total, +//! recovered result. +//! - `*_into` means the method extends an already parsed value or appends into +//! an existing output container. + +mod class; +mod declarations; +mod expression; +mod function; +mod statement; diff --git a/rottlib/src/parser/grammar/pratt.rs b/rottlib/src/parser/grammar/pratt.rs deleted file mode 100644 index f45f708..0000000 --- a/rottlib/src/parser/grammar/pratt.rs +++ /dev/null @@ -1,342 +0,0 @@ -//! Expression parsing for the language front-end. -//! -//! This module implements a Pratt-style parser for the language's expression -//! grammar, supporting: -//! -//! * Primary expressions (literals, identifiers, parenthesized expressions) -//! * Prefix operators -//! * Postfix operators -//! * Infix operators with precedence and associativity -//! -//! Parsing is driven by [`PrecedenceRank`], which controls how tightly -//! operators bind. Infix parsing uses the pair of binding powers returned by -//! [`super::precedence::infix_precedence_ranks`] to encode associativity. -//! The parser infrastructure supports both left- and right-associative -//! operators, but Fermented UnrealScript currently defines only -//! right-associative ones. -//! -//! ## See also -//! -//! - [`crate::parser::Parser::parse_expression`] - main entry point -//! - [`PrecedenceRank`] - operator binding strengths -//! - [`super::precedence`] - operator precedence definitions - -use crate::ast::{Expression, ExpressionRef, NeedsSemi}; -use crate::lexer::{Token, TokenLocation}; -use crate::parser::{ParseErrorKind, ParseExpressionResult, ResultRecoveryExt, SyncLevel}; - -pub(crate) use super::precedence::PrecedenceRank; - -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Parses an expression. - pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> { - self.parse_expression_with_precedence(PrecedenceRank::LOOSEST) - } - - /// Parses an expression with operators of at least `min_precedence_rank` - /// (as tight or tighter). - fn parse_expression_with_precedence( - &mut self, - min_precedence_rank: PrecedenceRank, - ) -> ExpressionRef<'src, 'arena> { - // Intentional order: (1) prefix/primary, (2) postfix (tighter than - // any infix), (3) infix. We don't run a second postfix pass; - // `(a+b)!` works because the parenthesized sub-expression had its own - // postfix pass before returning. - let mut left_hand_side = self - .parse_prefix_or_primary() - .sync_error_until(self, SyncLevel::Expression) - .unwrap_or_fallback(self); - // Postfix operators are tighter than any infix ones - left_hand_side = self.parse_postfix_into(left_hand_side); - left_hand_side = self.parse_infix_into(left_hand_side, min_precedence_rank); - left_hand_side - } - - /// Parses a prefix or primary expression (Pratt parser's "nud" or - /// null denotation). - /// - /// Errors with [`ParseErrorKind::UnexpectedEndOfFile`] if the stream ends - /// before a valid prefix/primary. - fn parse_prefix_or_primary(&mut self) -> ParseExpressionResult<'src, 'arena> { - let Some((token, token_location)) = self.peek_token_and_location() else { - return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); - }; - - if let Ok(operator) = crate::ast::PrefixOperator::try_from(token) { - self.advance(); - let right_hand_side = self.parse_expression_with_precedence(PrecedenceRank::TIGHTEST); - Ok(Expression::new_prefix( - self.arena, - token_location, - operator, - right_hand_side, - )) - } else { - self.parse_primary() - } - } - - /// Parses a primary expression: literals, identifiers, or a parenthesized - /// sub-expression. - /// - /// # Errors - /// - /// [`ParseErrorKind::ExpressionUnexpectedToken`] if the next token - /// cannot start a primary; [`ParseErrorKind::UnexpectedEndOfFile`] - /// at end of input. - fn parse_primary(&mut self) -> ParseExpressionResult<'src, 'arena> { - // For diagnostics, we only advance *after* fully parsing the current - // literal/token. - let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location() - else { - return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); - }; - match token { - Token::IntegerLiteral => { - let value = self.decode_integer_literal(token_text)?; - self.advance(); - Ok(self - .arena - .alloc_at(Expression::Integer(value), token_location)) - } - Token::FloatLiteral => { - let value = self.decode_float_literal(token_text)?; - self.advance(); - Ok(self - .arena - .alloc_at(Expression::Float(value), token_location)) - } - Token::StringLiteral => { - let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text); - self.advance(); - Ok(self - .arena - .alloc_at(Expression::String(value), token_location)) - } - Token::True => { - self.advance(); - Ok(self.arena.alloc_at(Expression::Bool(true), token_location)) - } - Token::False => { - self.advance(); - Ok(self.arena.alloc_at(Expression::Bool(false), token_location)) - } - Token::None => { - self.advance(); - Ok(self.arena.alloc_at(Expression::None, token_location)) - } - Token::Identifier => { - self.advance(); - Ok(self - .arena - .alloc_at(Expression::Identifier(token_text), token_location)) - } - Token::LeftParenthesis => { - self.advance(); - self.parse_parenthesized_expression_cont(token_location) - } - Token::If => { - self.advance(); - Ok(self.parse_if_cont(token_location)) - } - Token::While => { - self.advance(); - Ok(self.parse_while_cont(token_location)) - } - Token::Do => { - self.advance(); - self.parse_do_until_cont(token_location) - } - Token::ForEach => { - self.advance(); - Ok(self.parse_foreach_cont(token_location)) - } - Token::For => { - self.advance(); - self.parse_for_cont(token_location) - } - Token::Brace(crate::lexer::BraceKind::Normal) => { - self.advance(); - Ok(self.parse_block_cont(token_location)) - } - Token::Return => { - self.advance(); - Ok(self.parse_return_cont(token_location)) - } - Token::Break => { - self.advance(); - Ok(self.parse_break_cont(token_location)) - } - Token::Continue => { - self.advance(); - Ok(self.arena.alloc_at(Expression::Continue, token_location)) - } - Token::Goto => { - self.advance(); - self.parse_goto_cont(token_location) - } - Token::Switch => { - self.advance(); - self.parse_switch_cont(token_location) - } - _ => { - // Unexpected token in expression. - Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken)) - } - } - } - - /// Parses an expression in parentheses. - /// - /// Assumes the `(` was already consumed; its location is - /// `left_parenthesis_location`. - /// On success, allocates a [`Expression::Parentheses`] node with a span - /// covering from `(` to `)`. - /// - /// Errors with [`ParseErrorKind::ExpressionMissingClosingParenthesis`] if - /// a closing `)` is missing; the diagnostic is associated with - /// the opening `(` via `left_parenthesis_location`. - fn parse_parenthesized_expression_cont( - &mut self, - left_parenthesis_location: TokenLocation, - ) -> ParseExpressionResult<'src, 'arena> { - let inner_expression = self.parse_expression(); - let right_parenthesis_location = self - .expect( - Token::RightParenthesis, - ParseErrorKind::ExpressionMissingClosingParenthesis, - ) - .widen_error_span_from(left_parenthesis_location) - .sync_error_at(self, SyncLevel::CloseParenthesis)?; - Ok(self.arena.alloc_between( - Expression::Parentheses(inner_expression), - left_parenthesis_location, - right_parenthesis_location, - )) - } - - /// Parses all postfix operators it can, creating a tree with - /// `left_hand_side` as a child. - fn parse_postfix_into( - &mut self, - mut left_hand_side: ExpressionRef<'src, 'arena>, - ) -> ExpressionRef<'src, 'arena> { - // Single peek that yields `(postfix_op, location)` so the postfix loop - // can advance once per operator without extra matching/unwraps. - while let Some((operator, operator_location)) = self.peek_postfix_with_location() { - self.advance(); - left_hand_side = - Expression::new_postfix(self.arena, left_hand_side, operator, operator_location); - } - left_hand_side - } - - /// Parses infix operators binding at least as tight as - /// `min_precedence_rank`. - /// - /// Associativity is encoded by - /// [`super::precedence::infix_precedence_ranks`]: the right-hand - /// side is parsed with `right_precedence_rank`, so `a - b - c` vs - /// `a ^ b ^ c` associate correctly based on the pair - /// `(left_rank, right_rank)`. - /// - /// Stops when the next operator is looser than `min_precedence_rank`. - fn parse_infix_into( - &mut self, - mut left_hand_side: ExpressionRef<'src, 'arena>, - min_precedence_rank: PrecedenceRank, - ) -> ExpressionRef<'src, 'arena> { - while let Some((operator, right_precedence_rank)) = - self.peek_infix_at_least(min_precedence_rank) - { - self.advance(); - let right_hand_side = self.parse_expression_with_precedence(right_precedence_rank); - left_hand_side = - Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side); - } - left_hand_side - } - - /// Returns the next postfix operator and its location if present. - /// - /// Helper to avoid peeking and mapping twice; used to drive the postfix - /// loop without unwraps. - fn peek_postfix_with_location( - &mut self, - ) -> Option<(crate::ast::PostfixOperator, TokenLocation)> { - let Some((token, token_location)) = self.peek_token_and_location() else { - return None; - }; - let Ok(operator) = crate::ast::PostfixOperator::try_from(token) else { - return None; - }; - Some((operator, token_location)) - } - - /// If the next token is an infix operator with left binding power at least - /// `min_precedence_rank`, returns its operator and precedence rank. - /// - /// Otherwise return [`None`]. - fn peek_infix_at_least( - &mut self, - min_precedence_rank: PrecedenceRank, - ) -> Option<(crate::ast::InfixOperator, PrecedenceRank)> { - let (left_precedence_rank, operator, right_precedence_rank) = self - .peek_token() - .and_then(super::precedence::infix_precedence_ranks)?; - if left_precedence_rank.is_looser_than(min_precedence_rank) { - return None; - } - Some((operator, right_precedence_rank)) - } - - /// Parses one item inside a `{ ... }` block. - /// - /// The item can be a statement (e.g. a variable declaration) or an - /// expression. If the item is an expression without a following - /// semicolon, it is returned as the block's current tail expression - /// - the value considered to be the block's result. In well-formed - /// code such a tail expression appears only at the very end of the block. - /// - /// This method never consumes the closing `}` and is only meant to be - /// called while parsing inside a block. - pub(crate) fn parse_block_item( - &mut self, - statements: &mut crate::arena::ArenaVec<'arena, crate::ast::StatementRef<'src, 'arena>>, - ) -> Option> { - if let Some(mut next_statement) = self.parse_statement() { - if next_statement.needs_semicolon() { - // For statements we immediately know if lack of - // semicolon is an issue - if let Some(Token::Semicolon) = self.peek_token() { - next_statement.span_mut().to = self.peek_location(); - self.advance(); // ';' - } else { - self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterStatement); - } - } - statements.push(next_statement); - } else { - let mut next_expression = self.parse_expression(); - if let Expression::Error = *next_expression { - self.recover_until(SyncLevel::Statement); - next_expression.span_mut().to = self.peek_location(); - } - if let Some((Token::Semicolon, semicolon_location)) = self.peek_token_and_location() { - self.advance(); // ; - let span = crate::ast::AstSpan { - from: next_expression.span().from, - to: semicolon_location, - }; - let expression_statement_node = self - .arena - .alloc(crate::ast::Statement::Expression(next_expression), span); - statements.push(expression_statement_node); - } else { - return Some(next_expression); - } - } - None - } -} diff --git a/rottlib/src/parser/grammar/precedence.rs b/rottlib/src/parser/grammar/precedence.rs deleted file mode 100644 index 48dae04..0000000 --- a/rottlib/src/parser/grammar/precedence.rs +++ /dev/null @@ -1,185 +0,0 @@ -//! Precedence tables for Fermented UnrealScript operators. -//! -//! These values don't follow the usual *binding power* convention for -//! a Pratt parser, where tighter binding corresponds to a larger number. -//! Here, the smaller the number, the tighter the binding power. -//! For this reason, we use the term *precedence rank* instead. -//! -//! ## Operators sorted by precedence (lowest number = tighter binding) -//! -//! ### Infix operators -//! -//! All infix operators in UnrealScript are -//! [left-associative](https://wiki.beyondunreal.com/Operators). -//! -//! 12: `**` -//! 16: `*`, `/`, `Cross`, `Dot` -//! 18: `%` -//! 20: `+`, `-` -//! 22: `<<`, `>>`, `>>>` -//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom` -//! 26: `!=` -//! 28: `&`, `^`, `|` -//! 30: `&&`, `^^` -//! 32: `||` -//! 34: `*=`, `/=`, `+=`, `-=` -//! 40: `$`, `*`, `@` -//! 44: `$=`, `*=`, `@=` -//! 45: `-=` -//! -//! Some operator, such as `*`, appear twice with different precedence -//! ranks because they were defined with different values for different types -//! in separate script source files (as in the Killing Floor sources). -//! However, UnrealScript uses only the first definition it encounters in -//! `Object.uc`, which corresponds to the lower value. -//! -//! ### Prefix operators -//! -//! `!`, `~`, `-`, `++`, `--`. -//! -//! ### Postfix operators -//! -//! `++`, `--`. - -use crate::ast::{InfixOperator, PostfixOperator, PrefixOperator}; -use crate::lexer::Token; - -/// Compact precedence rank used by the Pratt Parser. -/// -/// A smaller number means tighter binding, and a larger number means looser -/// binding. This inverted scale matches how UnrealScript tables were recorded. -#[must_use] -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct PrecedenceRank(u8); - -impl PrecedenceRank { - /// The loosest possible precedence rank. - /// - /// In this inverted scale (smaller number = tighter binding), - /// this is represented by the maximum [`u8`] value. - pub const LOOSEST: Self = PrecedenceRank(u8::MAX); - - /// The tightest possible precedence rank. - /// - /// In this inverted scale (smaller number = tighter binding), - /// this is represented by zero. - pub const TIGHTEST: PrecedenceRank = PrecedenceRank(0); - - /// Returns `true` if `other` has a looser binding than `self`. - /// - /// # Examples - /// - /// ``` - /// # use crate::parser::expressions::PrecedenceRank; - /// let a = PrecedenceRank(40); - /// let b = PrecedenceRank(34); - /// assert!(a.is_looser_than(b)); // 40 is looser than 34 - /// - /// let c = PrecedenceRank(20); - /// let d = PrecedenceRank(24); - /// assert!(!c.is_looser_than(d)); // 20 is tighter than 24 - /// ``` - pub fn is_looser_than(self, other: Self) -> bool { - self.0 > other.0 - } -} - -impl TryFrom for PrefixOperator { - type Error = (); - fn try_from(token: Token) -> Result { - use PrefixOperator::*; - Ok(match token { - Token::Not => Not, - Token::Minus => Minus, - Token::BitwiseNot => BitwiseNot, - Token::Increment => Increment, - Token::Decrement => Decrement, - _ => return Err(()), - }) - } -} - -impl TryFrom for PostfixOperator { - type Error = (); - fn try_from(token: Token) -> Result { - use PostfixOperator::*; - Ok(match token { - Token::Increment => Increment, - Token::Decrement => Decrement, - _ => return Err(()), - }) - } -} - -/// Maps a token to its infix operator along with its left and right binding -/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`. -/// -/// Returns [`None`] if and only if `token` is not an infix operator. -pub(crate) fn infix_precedence_ranks( - token: Token, -) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> { - use crate::ast::InfixOperator::*; - let (left_precedence_rank, operator) = match token { - // 12: `**` - Token::Exponentiation => (12, Exponentiation), - // 16: `*`, `/`, `Cross`, `Dot` (left-assoc) - Token::Multiply => (16, Multiply), - Token::Divide => (16, Divide), - Token::Cross => (16, Cross), - Token::Dot => (16, Dot), - // 18: `%` - Token::Modulo => (18, Modulo), - // 20: `+`, `-` - Token::Plus => (20, Plus), - Token::Minus => (20, Minus), - // 22: `<<`, `>>`, `>>>` - Token::LeftShift => (22, LeftShift), - Token::RightShift => (22, RightShift), - Token::LogicalRightShift => (22, LogicalRightShift), - // 24: comparison operators - Token::Less => (24, Less), - Token::LessEqual => (24, LessEqual), - Token::Greater => (24, Greater), - Token::GreaterEqual => (24, GreaterEqual), - Token::Equal => (24, Equal), - Token::ApproximatelyEqual => (24, ApproximatelyEqual), - Token::ClockwiseFrom => (24, ClockwiseFrom), - // 26: `!=` - Token::NotEqual => (26, NotEqual), - // 28: bit-wise `&`, `^`, `|` - Token::BitwiseAnd => (28, BitwiseAnd), - Token::BitwiseXor => (28, BitwiseXor), - Token::BitwiseOr => (28, BitwiseOr), - // 30: logical `&&`, `^^` - Token::And => (30, And), - Token::Xor => (30, Xor), - // 32: logical `||` - Token::Or => (32, Or), - // 34: `*=`, `/=`, `+=`, `-=` - Token::MultiplyAssign => (34, MultiplyAssign), - Token::DivideAssign => (34, DivideAssign), - Token::PlusAssign => (34, PlusAssign), - Token::MinusAssign => (34, MinusAssign), - // Simple '=' treated with same precedence - Token::Assign => (34, Assign), - Token::ModuloAssign => (34, ModuloAssign), - // 40: `$`, `@` - Token::Concat => (40, Concat), - Token::ConcatSpace => (40, ConcatSpace), - // 44: `$=`, `@=` - Token::ConcatAssign => (44, ConcatAssign), - Token::ConcatSpaceAssign => (44, ConcatSpaceAssign), - _ => return None, - }; - // All operators are left-associative, so `right_precedence_rank` is set to - // `left_binding_rank - 1` (with our "smaller is tighter" scale, this - // enforces left associativity in Pratt parsing). - // - // Since all precedences are even, subtracting one won't actually cross - // any boundary between operator groups. - Some(( - PrecedenceRank(left_precedence_rank), - operator, - PrecedenceRank(left_precedence_rank - 1), - )) -} diff --git a/rottlib/src/parser/grammar/statement.rs b/rottlib/src/parser/grammar/statement.rs new file mode 100644 index 0000000..ad9f5d8 --- /dev/null +++ b/rottlib/src/parser/grammar/statement.rs @@ -0,0 +1,85 @@ +//! Statement parsing for the language front-end. +//! +//! Implements a simple recursive-descent parser for +//! *Fermented `UnrealScript` statements*. + +use crate::ast::{AstSpan, Statement, StatementRef}; +use crate::lexer::{Keyword, Token}; +use crate::parser::{ParseErrorKind, ResultRecoveryExt}; + +impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { + /// Parses a single statement. + /// + /// Does not consume a trailing `;` except for [`Statement::Empty`]. + /// The caller handles semicolons. Returns [`Some`] if a statement is + /// recognized; otherwise [`None`]. + #[must_use] + pub(crate) fn parse_statement(&mut self) -> Option> { + let Some((token, lexeme, position)) = self.peek_token_lexeme_and_position() else { + self.report_error_here(ParseErrorKind::UnexpectedEndOfFile); + return None; + }; + + match token { + // Empty statement + Token::Semicolon => { + self.advance(); // `;` + Some( + self.arena + .alloc_node(Statement::Empty, AstSpan::new(position)), + ) + } + + // UnrealScript `local` declaration + Token::Keyword(Keyword::Local) => { + self.advance(); // `local` + let start = position; + + let type_spec = self.parse_type_specifier().unwrap_or_fallback(self); + let declarators = self.parse_variable_declarators(); + // TODO: parse + + let span = AstSpan::range(start, self.last_consumed_position_or_start()); + Some(self.arena.alloc_node( + Statement::LocalVariableDeclaration { + type_spec, + declarators, + }, + span, + )) + } + + // Label: Ident ':' (also tolerate Begin:/End:) + Token::Identifier | Token::Keyword(Keyword::Begin | Keyword::End) + if matches!(self.peek_token_at(1), Some(Token::Colon)) => + { + self.advance(); // ident/begin/end + self.advance(); // : + Some(self.arena.alloc_node( + Statement::Label(self.arena.string(lexeme)), + AstSpan::range(position, self.last_consumed_position_or_start()), + )) + } + + // Nested function/event/operator inside blocks + t if t == Token::Keyword(Keyword::Function) + || t == Token::Keyword(Keyword::Event) + || t.is_valid_function_modifier() => + { + let f = self.parse_callable_definition(); + + let span = *f.span(); + Some(self.arena.alloc_node(Statement::Function(f), span)) + } + + // C-like variable declaration starting with a TypeSpec + /*token if self.looks_like_variable_declaration_start(token) => Some( + self.parse_variable_declaration_start() + .sync_error_until(self, SyncLevel::Statement) + .unwrap_or_fallback(self), + ),*/ + // Not a statement + _ => None, + } + } +} diff --git a/rottlib/src/parser/grammar/statements.rs b/rottlib/src/parser/grammar/statements.rs deleted file mode 100644 index e039c34..0000000 --- a/rottlib/src/parser/grammar/statements.rs +++ /dev/null @@ -1,185 +0,0 @@ -//! Statement parsing for the language front-end. -//! -//! Implements a simple recursive-descent parser for -//! *Fermented UnrealScript statements*. - -use crate::ast::{AstSpan, Statement, StatementRef}; -use crate::lexer::Token; -use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel}; - -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Parses a single statement. - /// - /// Does not consume a trailing `;` except for [`Statement::Empty`]. - /// The caller handles semicolons. Returns [`Some`] if a statement is - /// recognized; otherwise [`None`]. - #[must_use] - pub(crate) fn parse_statement(&mut self) -> Option> { - let Some((token, lexeme, location)) = self.peek_token_lexeme_and_location() else { - self.report_error_here(ParseErrorKind::UnexpectedEndOfFile); - return None; - }; - match token { - // Empty statement - Token::Semicolon => { - self.advance(); // `;` - Some(self.arena.alloc(Statement::Empty, AstSpan::new(location))) - } - // UnrealScript's standard `local` variable declaration - Token::Local => { - self.advance(); // `local` - Some( - self.parse_local_variable_declaration_cont() - .widen_error_span_from(location) - .sync_error_until(self, SyncLevel::Statement) - .unwrap_or_fallback(self), - ) - } - // Label definition - Token::Identifier if matches!(self.peek_token_at(1), Some(Token::Colon)) => { - self.advance(); // `Token::Identifier` - self.advance(); // `:` - Some(self.arena.alloc( - Statement::Label(self.arena.string(lexeme)), - AstSpan::range(location, self.last_visited_location()), - )) - } - // C-like variable declaration - token - if token.is_valid_type_name_token() - && Some(Token::Identifier) == self.peek_token_at(1) => - { - self.advance(); // `TYPE_NAME` - // Next token is guaranteed to exist by the arm condition - Some(self.parse_variable_declaration_cont(lexeme)) - } - // Not a statement - _ => None, - } - } - - /// Parses a local variable declaration after `local` has been consumed. - /// - /// Requires the next token to be a type name. Initializers are not allowed. - /// Reports and recovers from errors; the identifier list may be empty if - /// recovery fails. - fn parse_local_variable_declaration_cont( - &mut self, - ) -> crate::parser::ParseResult<'src, 'arena, StatementRef<'src, 'arena>> { - let Some((type_token, type_name)) = self.peek_token_and_lexeme() else { - return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); - }; - if !type_token.is_valid_type_name_token() { - return Err(self.make_error_here(ParseErrorKind::LocalInvalidTypeName)); - } - let declaration_start_location = self.last_visited_location(); - self.advance(); // `TYPE_NAME` - - let type_name = self.arena.string(type_name); - let identifiers = self.parse_local_identifier_list(); - if identifiers.is_empty() { - self.make_error_here(ParseErrorKind::LocalMissingIdentifier) - .widen_error_span_from(declaration_start_location) - .report_error(self); - } - Ok(self.arena.alloc( - Statement::LocalVariableDeclaration { - type_name, - identifiers, - }, - AstSpan::range(declaration_start_location, self.last_visited_location()), - )) - } - - /// Parses a comma-separated list of identifiers for a local declaration. - /// - /// Best-effort recovery from errors. Returns an empty list if no valid - /// identifiers are found. - fn parse_local_identifier_list( - &mut self, - ) -> crate::arena::ArenaVec<'arena, crate::arena::ArenaString<'arena>> { - let mut identifiers = self.arena.vec(); - while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() { - if token == Token::Identifier { - identifiers.push(self.arena.string(next_variable_name)); - self.advance(); // `Token::Identifier` - } else { - self.report_error_here(ParseErrorKind::LocalBadVariableIdentifier); - // Try to recover to the next variable name - self.recover_until(SyncLevel::ListSeparator); - } - - // Disallow initializers in `local`. - if let Some(Token::Assign) = self.peek_token() { - self.report_error_here(ParseErrorKind::LocalInitializerNotAllowed); - self.recover_until(SyncLevel::ListSeparator); - } - - // Can the list continue? - // Loop cannot stall: each iteration consumes a token or breaks - if !self.eat(Token::Comma) { - break; - } - } - // End-of-file branch - identifiers - } - - /// Parses a non-local variable declaration after the type name token - /// has been consumed. - /// - /// The caller must guarantee that at least one declarator follows. - /// Optional initializers are allowed. - fn parse_variable_declaration_cont( - &mut self, - type_name: &'src str, - ) -> StatementRef<'src, 'arena> { - let declaration_start_location = self.last_visited_location(); - let type_name = self.arena.string(type_name); - let declarations = self.parse_variable_declaration_list(); - // An identifier required by method's condition - debug_assert!(!declarations.is_empty()); - self.arena.alloc( - Statement::VariableDeclaration { - type_name, - declarations, - }, - AstSpan::range(declaration_start_location, self.last_visited_location()), - ) - } - - /// Parses a comma-separated list of declarators with optional `=` - /// initializers. - /// - /// Best-effort recovery on errors. - /// The caller should invoke this when the next token starts a declarator. - fn parse_variable_declaration_list( - &mut self, - ) -> crate::arena::ArenaVec<'arena, crate::ast::VariableDeclarator<'src, 'arena>> { - let mut variables = self.arena.vec(); - while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() { - if token == Token::Identifier { - self.advance(); // `Token::Identifier` - let name = self.arena.string(next_variable_name); - let initializer = if self.eat(Token::Assign) { - Some(self.parse_expression()) - } else { - None - }; - variables.push(crate::ast::VariableDeclarator { name, initializer }); - } else { - self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier); - // Try to recover to the next variable name - self.recover_until(SyncLevel::ListSeparator); - } - - // Can the list continue? - // Loop cannot stall: each iteration consumes a token or breaks - if !self.eat(Token::Comma) { - break; - } - } - // End-of-file branch - variables - } -} diff --git a/rottlib/src/parser/grammar/switch.rs b/rottlib/src/parser/grammar/switch.rs deleted file mode 100644 index 750bf6d..0000000 --- a/rottlib/src/parser/grammar/switch.rs +++ /dev/null @@ -1,227 +0,0 @@ -use crate::arena::ArenaVec; -use crate::ast::{AstSpan, ExpressionRef, StatementRef}; -use crate::lexer::{Token, TokenLocation}; -use crate::parser::{ParseErrorKind, ResultRecoveryExt}; - -impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { - /// Parses a `switch` expression after the `switch` keyword was consumed. - /// - /// Arm bodies accept statements and expressions. A last, expression without - /// `;` in the last arm becomes the switch's tail value if none was - /// captured yet. - /// Only one `default` case arm is allowed. - /// Returns a best-effort switch node on premature EOF. - #[must_use] - pub(crate) fn parse_switch_cont( - &mut self, - switch_start_location: TokenLocation, - ) -> crate::parser::ParseExpressionResult<'src, 'arena> { - let selector = self.parse_expression(); - self.expect( - Token::Brace(crate::lexer::BraceKind::Normal), - ParseErrorKind::SwitchMissingBody, - ) - .report_error(self); - let (mut cases, mut default_arm, mut tail) = (self.arena.vec(), None, None); - let mut span = AstSpan::new(switch_start_location); - loop { - let Some((token, token_location)) = self.peek_token_and_location() else { - self.report_error_here(ParseErrorKind::UnexpectedEndOfFile); - span.extend_to(self.peek_location()); - return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span)); - }; - match token { - Token::RightBrace => { - self.advance(); // '}' - span.extend_to(token_location); - return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span)); - } - Token::Case => { - if default_arm.is_some() { - self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault); - } - let case_node = self.parse_switch_case_group(token_location, &mut tail); - cases.push(case_node); - } - Token::Default => { - if default_arm.is_some() { - self.report_error_here(ParseErrorKind::SwitchDuplicateDefault); - } - // We still parse a duplicate default to surface all errors. - // Bodies are effectively fused for error reporting; - // compilation stops anyway, so this trades AST correctness - // for diagnostics. - self.parse_switch_default_arm( - token_location, - default_arm.get_or_insert_with(|| self.arena.vec()), - &mut tail, - ); - } - // This can only be triggered before parsing any `case` or - // `default` arms, since they stop either at the start of - // another arm declaration (e.g. at `case`/`default`) or - // at the `}` that ends switch body. - _ => self.parse_switch_preamble_items(&mut tail), - } - // Ensure forward progress under errors to avoid infinite loops. - if self.peek_location() <= token_location { - self.advance(); - } - } - } - - /// Parses a stacked `case` group and its body: - /// `case : (case :)* `. - /// - /// Returns the allocated [`crate::ast::CaseRef`] node. - #[must_use] - fn parse_switch_case_group( - &mut self, - first_case_location: TokenLocation, - tail: &mut Option>, - ) -> crate::ast::CaseRef<'src, 'arena> { - let mut labels = self.arena.vec(); - while let Some((Token::Case, case_location)) = self.peek_token_and_location() { - // Guaranteed progress: we entered on `Token::Case`. - self.advance(); // 'case' - labels.push(self.parse_expression()); - - // Enforce `:` after each case with statement-level recovery. - self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon) - .widen_error_span_from(case_location) - .sync_error_until(self, crate::parser::SyncLevel::Statement) - .report_error(self); - } - let mut body = self.arena.vec(); - self.parse_switch_arm_body(&mut body, tail); - let case_span = compute_case_span(first_case_location, &labels, &body); - self.arena - .alloc(crate::ast::SwitchCase { labels, body }, case_span) - } - - /// Parses the `default :` arm and its body. - /// - /// Does not consume a boundary token after the body. - fn parse_switch_default_arm( - &mut self, - default_location: TokenLocation, - statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>, - tail: &mut Option>, - ) { - self.advance(); // 'default' - self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon) - .widen_error_span_from(default_location) - .sync_error_until(self, crate::parser::SyncLevel::Statement) - .report_error(self); - self.parse_switch_arm_body(statements, tail); - } - - /// Parses items of a single switch arm body until a boundary token or EOF. - /// - /// Boundary tokens: `case`, `default`, `}`. - fn parse_switch_arm_body( - &mut self, - statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>, - tail: &mut Option>, - ) { - // No need to report end-of-file as it'll be done by - // `parse_switch_cont`. - while let Some((token, token_location)) = self.peek_token_and_location() { - match token { - // Complain about tail instruction if `switch` body - // doesn't end here - Token::Case | Token::Default => { - if let Some(tail_expression) = tail.take() { - self.report_error_here(ParseErrorKind::SwitchBareExpressionBeforeNextArm); - let span = *tail_expression.span(); - let stmt = self - .arena - .alloc(crate::ast::Statement::Expression(tail_expression), span); - statements.push(stmt); - } - break; - } - Token::RightBrace => break, - _ => (), - } - // We know that at this point: - // 1. There is still a token and it is not EOF; - // 2. It isn't end of the block. - // So having a tail statement there is a problem! - if let Some(tail_expression) = tail.take() { - self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression); - let tail_span = *tail_expression.span(); - let node = self.arena.alloc( - crate::ast::Statement::Expression(tail_expression), - tail_span, - ); - statements.push(node); - } - *tail = self.parse_block_item(statements); - // Ensure forward progress under errors to avoid infinite loops. - if self.peek_location() <= token_location { - self.advance(); - } - } - } - - /// Parses items that were found in code *before* any arm (`case`/`default`) - /// declaration. - /// - /// These aren't allowed, but we still want to perform a proper parsing step - /// to report whatever errors we can in case programmer simply forgot to put - /// an arm declaration. - /// - /// Boundary tokens: `case`, `default`, `}`. - fn parse_switch_preamble_items(&mut self, tail: &mut Option>) { - // Report the spurious token. - self.report_error_here(ParseErrorKind::SwitchTopLevelItemNotCase); - - // Discard parsed statements into a sink vector. - // This is a bit "hacky", but I don't want to adapt code to skip - // production of AST nodes just to report errors in - // one problematic case. - let mut sink = self.arena.vec(); - self.parse_switch_arm_body(&mut sink, tail); - } - - /// Helper to allocate a `Switch` expression with the given span. - #[must_use] - fn alloc_switch_node( - &mut self, - selector: ExpressionRef<'src, 'arena>, - cases: ArenaVec<'arena, crate::ast::CaseRef<'src, 'arena>>, - default_arm: Option>>, - tail: Option>, - span: AstSpan, - ) -> ExpressionRef<'src, 'arena> { - self.arena.alloc( - crate::ast::Expression::Switch { - selector, - cases, - default_arm, - tail, - }, - span, - ) - } -} - -/// Computes [`AstSpan`] covering all labels and the body. -#[must_use] -fn compute_case_span( - labels_start_location: TokenLocation, - labels: &[ExpressionRef], - body: &[StatementRef], -) -> AstSpan { - let mut span = AstSpan { - from: labels_start_location, - to: labels_start_location, - }; - if let Some(last_statement) = body.last() { - span.extend_to(last_statement.span().to); - } else if let Some(last_label) = labels.last() { - span.extend_to(last_label.span().to); - } - span -} diff --git a/rottlib/src/parser/mod.rs b/rottlib/src/parser/mod.rs index 71fd299..0b9ee93 100644 --- a/rottlib/src/parser/mod.rs +++ b/rottlib/src/parser/mod.rs @@ -1,8 +1,8 @@ -//! Parser for Fermented UnrealScript (FerUS). +//! Parser for Fermented `UnrealScript` (`FerUS`). //! //! Consumes tokens from [`crate::lexer::TokenizedFile`] and allocates AST //! nodes in [`crate::arena::Arena`]. Basic expressions use a Pratt parser; -//! the rest rely on recursive descent in [`crate::parser::grammar`]. +//! the rest rely on recursive descent in [`crate::parser::grammar`].\ //! Non-fatal errors accumulate in `Parser::diagnostics` as //! [`crate::diagnostics::Diagnostic`]; recovery skips to sync points defined by //! [`crate::parser::recovery::SyncLevel`] and synthesizes error nodes while @@ -27,17 +27,14 @@ use super::lexer; -pub use lexer::{TokenPiece, Tokens}; +pub use lexer::{TokenData, Tokens}; mod cursor; mod errors; mod grammar; -pub mod pretty; mod recovery; mod trivia; -pub use pretty::{ExprTree, StmtTree}; - pub use errors::ParseError; pub(crate) use errors::{ParseErrorKind, ParseResult}; pub(crate) use recovery::{ResultRecoveryExt, SyncLevel}; @@ -50,8 +47,8 @@ pub type ParseExpressionResult<'src, 'arena> = pub struct Parser<'src, 'arena> { arena: &'arena crate::arena::Arena, pub diagnostics: Vec, - cursor: cursor::CursorComponent<'src>, - trivia: trivia::TriviaComponent<'src>, + cursor: cursor::Cursor<'src, 'src>, + trivia: trivia::TriviaIndexBuilder<'src>, } impl<'src, 'arena> Parser<'src, 'arena> { @@ -59,8 +56,8 @@ impl<'src, 'arena> Parser<'src, 'arena> { Self { arena, diagnostics: Vec::new(), - cursor: cursor::CursorComponent::new(file), - trivia: trivia::TriviaComponent::default(), + cursor: cursor::Cursor::new(file), + trivia: trivia::TriviaIndexBuilder::default(), } } } diff --git a/rottlib/src/parser/pretty.rs b/rottlib/src/parser/pretty.rs deleted file mode 100644 index 6861493..0000000 --- a/rottlib/src/parser/pretty.rs +++ /dev/null @@ -1,353 +0,0 @@ -use crate::ast::{Expression, Statement, SwitchCase, VariableDeclarator}; -use core::fmt; - -/// A borrow of either a statement or an expression node, -/// plus helpers to enrich the printed tree. -enum AnyNode<'src, 'a, 'b> { - Stmt(&'b Statement<'src, 'a>), - Expr(&'b Expression<'src, 'a>), - Case(&'b SwitchCase<'src, 'a>), - /// A leaf line with a preformatted label (e.g., variable names). - Text(String), - /// Wraps a child with a tag like "cond", "body", "else", "init". - Tagged(&'static str, Box>), -} - -/// Public wrappers to print trees starting from either kind of node. -pub struct StmtTree<'src, 'a, 'b>(pub &'b Statement<'src, 'a>); -pub struct ExprTree<'src, 'a, 'b>(pub &'b Expression<'src, 'a>); - -impl<'src, 'a, 'b> fmt::Display for StmtTree<'src, 'a, 'b> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_node(AnyNode::Stmt(self.0), f, "", true) - } -} -impl<'src, 'a, 'b> fmt::Display for ExprTree<'src, 'a, 'b> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt_node(AnyNode::Expr(self.0), f, "", true) - } -} - -fn fmt_node<'src, 'a, 'b>( - node: AnyNode<'src, 'a, 'b>, - f: &mut fmt::Formatter<'_>, - prefix: &str, - is_last: bool, -) -> fmt::Result { - write!(f, "{}{}─ ", prefix, if is_last { "â””" } else { "├" })?; - writeln!(f, "{}", label(&node))?; - - let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " }); - let kids = children(node); - let len = kids.len(); - for (i, child) in kids.into_iter().enumerate() { - let last = i + 1 == len; - fmt_node(child, f, &new_prefix, last)?; - } - Ok(()) -} - -/// ----- Labeling ----- - -fn label<'src, 'a, 'b>(node: &AnyNode<'src, 'a, 'b>) -> String { - match node { - AnyNode::Expr(e) => expr_label(e), - AnyNode::Stmt(s) => stmt_label(s), - AnyNode::Case(c) => case_label(c), - AnyNode::Text(s) => s.clone(), - AnyNode::Tagged(tag, inner) => format!("{tag}: {}", label(inner)), - } -} - -fn quote_str(s: &str) -> String { - let mut out = String::with_capacity(s.len() + 2); - out.push('"'); - for ch in s.chars() { - match ch { - '\\' => out.push_str("\\\\"), - '"' => out.push_str("\\\""), - '\n' => out.push_str("\\n"), - '\r' => out.push_str("\\r"), - '\t' => out.push_str("\\t"), - c => out.push(c), - } - } - out.push('"'); - out -} - -fn expr_label<'src, 'a>(e: &Expression<'src, 'a>) -> String { - match e { - Expression::Binary(_, op, _) => format!("Binary {op}"), - Expression::LeftUnary(op, _) => format!("UnaryL {op}"), - Expression::RightUnary(_, op) => format!("UnaryR {op}"), - - Expression::Identifier(s) => format!("Ident {s}"), - Expression::String(s) => { - // Avoid assuming ArenaString exposes &str; go via Display -> String. - format!("String {}", quote_str(&s.to_string())) - } - Expression::Integer(i) => format!("Int {i}"), - Expression::Float(x) => format!("Float {x}"), - Expression::Bool(true) => "Bool true".into(), - Expression::Bool(false) => "Bool false".into(), - Expression::None => "None".into(), - Expression::Parentheses(_) => "Parentheses".into(), - - Expression::Block { statements, tail } => { - let n = statements.len() + usize::from(tail.is_some()); - let tail_s = if tail.is_some() { " tail" } else { "" }; - format!("BlockExpr ({n} items{tail_s})") - } - Expression::If { .. } => "IfExpr".into(), - Expression::While { .. } => "WhileExpr".into(), - Expression::DoUntil { .. } => "DoUntilExpr".into(), - Expression::ForEach { .. } => "ForEachExpr".into(), - Expression::For { .. } => "ForExpr".into(), - Expression::Switch { - cases, - default_arm: default, - .. - } => { - let d = if default.is_some() { " yes" } else { " no" }; - format!("SwitchExpr cases={} default:{}", cases.len(), d) - } - Expression::Goto(label) => format!("Goto {}", label.to_string()), - Expression::Continue => "Continue".into(), - Expression::Break(Some(_)) => "Break value".into(), - Expression::Break(None) => "Break".into(), - Expression::Return(Some(_)) => "Return value".into(), - Expression::Return(None) => "Return".into(), - - Expression::Error => "Error".into(), - } -} - -/// ----- Children collection ----- - -fn children<'src, 'a, 'b>(node: AnyNode<'src, 'a, 'b>) -> Vec> { - match node { - AnyNode::Expr(e) => expr_children(e), - AnyNode::Stmt(s) => stmt_children(s), - AnyNode::Case(c) => case_children(c), - AnyNode::Text(_) => vec![], - AnyNode::Tagged(_, inner) => children(*inner), - } -} - -/// Expression children can include statements inside Block/Switch. -fn expr_children<'src, 'a, 'b>(e: &'b Expression<'src, 'a>) -> Vec> { - match e { - // Purely expression subtrees - Expression::Binary(lhs, _, rhs) => vec![AnyNode::Expr(&*lhs), AnyNode::Expr(&*rhs)], - Expression::LeftUnary(_, expr) => vec![AnyNode::Expr(&*expr)], - Expression::RightUnary(expr, _) => vec![AnyNode::Expr(&*expr)], - Expression::Parentheses(expr) => vec![AnyNode::Expr(&*expr)], - - // Structured expression forms - Expression::Block { statements, tail } => { - let mut out: Vec> = statements - .iter() - .map(|s| AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*s)))) - .collect(); - if let Some(t) = tail.as_ref() { - out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t)))); - } - out - } - - Expression::If { - condition, - body, - else_body, - } => { - let mut out = vec![ - AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))), - AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))), - ]; - if let Some(e) = else_body { - out.push(AnyNode::Tagged("else", Box::new(AnyNode::Expr(&*e)))); - } - out - } - - Expression::While { condition, body } => vec![ - AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))), - AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))), - ], - - Expression::DoUntil { condition, body } => vec![ - AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))), - AnyNode::Tagged("until", Box::new(AnyNode::Expr(&*condition))), - ], - - Expression::ForEach { iterator, body } => vec![ - AnyNode::Tagged("iter", Box::new(AnyNode::Expr(&*iterator))), - AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))), - ], - - Expression::For { - init, - condition, - step, - body, - } => { - let mut out = Vec::with_capacity(4); - if let Some(i) = init { - out.push(AnyNode::Tagged("init", Box::new(AnyNode::Expr(&*i)))); - } - if let Some(c) = condition { - out.push(AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*c)))); - } - if let Some(s) = step { - out.push(AnyNode::Tagged("step", Box::new(AnyNode::Expr(&*s)))); - } - out.push(AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body)))); - out - } - - Expression::Switch { - selector, - cases, - default_arm: default, - tail, - } => { - let mut out: Vec> = vec![AnyNode::Tagged( - "selector", - Box::new(AnyNode::Expr(&*selector)), - )]; - - for case in cases.iter() { - out.push(AnyNode::Tagged("case", Box::new(AnyNode::Case(&*case)))); - } - - if let Some(d) = default.as_ref() { - for stmt in d.iter() { - out.push(AnyNode::Tagged("default", Box::new(AnyNode::Stmt(&*stmt)))); - } - } - - if let Some(t) = tail.as_ref() { - out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t)))); - } - - out - } - - // Leaves - Expression::Identifier(_) - | Expression::String(_) - | Expression::Integer(_) - | Expression::Float(_) - | Expression::Bool(_) - | Expression::None - | Expression::Goto(_) - | Expression::Continue - | Expression::Break(None) - | Expression::Return(None) - | Expression::Error => vec![], - - // Single optional-child leaves - Expression::Break(Some(v)) => vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))], - Expression::Return(Some(v)) => { - vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))] - } - } -} - -fn stmt_label<'src, 'a>(s: &Statement<'src, 'a>) -> String { - use Statement::*; - match s { - Empty => "Empty ;".into(), - Expression(_) => "Expression".into(), - - LocalVariableDeclaration { - type_name, - identifiers: variable_names, - } => { - let count = variable_names.len(); - let names = variable_names - .iter() - .map(|n| n.to_string()) - .collect::>() - .join(", "); - format!("LocalVarDecl type={type_name} count={count} names=[{names}]") - } - - VariableDeclaration { - type_name, - declarations: variable_names, - } => { - let total = variable_names.len(); - let inits = variable_names - .iter() - .filter(|v| v.initializer.is_some()) - .count(); - let names = variable_names - .iter() - .map(|VariableDeclarator { name, .. }| name.to_string()) - .collect::>() - .join(", "); - format!("VarDecl type={type_name} vars={total} inits={inits} names=[{names}]") - } - - Label(name) => format!("Label {name}"), - - Error => "Error".into(), - } -} - -fn stmt_children<'src, 'a, 'b>(s: &'b Statement<'src, 'a>) -> Vec> { - use Statement::*; - match s { - Empty | Label(_) | Error => vec![], - - Expression(expr) => vec![AnyNode::Expr(&*expr)], - - LocalVariableDeclaration { - identifiers: variable_names, - .. - } => variable_names - .iter() - .map(|n| AnyNode::Text(format!("name: {n}"))) - .collect(), - - VariableDeclaration { - declarations: variable_names, - .. - } => { - let mut out = Vec::new(); - for VariableDeclarator { - name, - initializer: initial_value, - } in variable_names.iter() - { - out.push(AnyNode::Text(format!("var: {name}"))); - if let Some(init_expr) = initial_value { - out.push(AnyNode::Tagged( - "init", - Box::new(AnyNode::Expr(&*init_expr)), - )); - } - } - out - } - } -} - -fn case_children<'src, 'a, 'b>(c: &'b SwitchCase<'src, 'a>) -> Vec> { - let mut out = Vec::new(); - for lbl in c.labels.iter() { - out.push(AnyNode::Tagged("label", Box::new(AnyNode::Expr(&*lbl)))); - } - for stmt in c.body.iter() { - out.push(AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*stmt)))); - } - out -} - -fn case_label<'src, 'a>(c: &SwitchCase<'src, 'a>) -> String { - let l = c.labels.len(); - let b = c.body.len(); - format!("Case labels={l} body_items={b}") -} diff --git a/rottlib/src/parser/recovery.rs b/rottlib/src/parser/recovery.rs index e5d524d..5d8d147 100644 --- a/rottlib/src/parser/recovery.rs +++ b/rottlib/src/parser/recovery.rs @@ -8,88 +8,186 @@ //! General idea is that any method that returns something other than an error //! can be assumed to have reported it. -use crate::lexer::{Token, TokenLocation}; +use crate::ast::{AstSpan, CallableKind, IdentifierToken, QualifiedIdentifier}; +use crate::diagnostics::Diagnostic; +use crate::lexer::{Token, TokenPosition}; use crate::parser::{ParseError, ParseResult, Parser}; /// Synchronization groups the parser can stop at during recovery. /// -/// Stronger levels subsume weaker ones. The enum's variant order defines this -/// ordering of strength via [`Ord`]; changing it changes recovery behavior. +/// The variant order defines recovery strength: later variants are treated as +/// "stronger" boundaries, so synchronizing to a weaker level will also stop +/// at any stronger one. +/// +/// This enum is intentionally coarse-grained and semantic. It is not meant to +/// encode arbitrary token sets. #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] -pub(crate) enum SyncLevel { - /// Tokens that appear inside expressions. +pub enum SyncLevel { + /// Tokens that can reasonably continue or restart an expression. /// - /// Includes operators, member access `.`, ternary `? :`, an opening `(`, - /// and identifiers. + /// This is the loosest recovery level. Expression, - /// List separator `,`. + + /// Separator between homogeneous list elements, e.g. `,`. + /// + /// Synchronizing here also stops at closing delimiters and stronger + /// structural boundaries. ListSeparator, - /// Close of a parenthesized subexpression `)`. + + /// Closing `>` of an angle-bracket-delimited type/class argument list. + CloseAngleBracket, + + /// Closing `)` of a parenthesized/grouped construct. CloseParenthesis, - /// Close of an index or list `]`. + + /// Closing `]` of an index or bracket-delimited construct. CloseBracket, - /// Statement boundary or starter. + + /// A statement boundary or statement starter. + /// + /// Includes `;` and keywords that begin standalone statements / + /// statement-like control-flow forms. Statement, - /// Block boundary braces (both `{` and `}`). + + /// Start of a `switch` arm. + /// + /// This is useful because `case` / `default` are stronger boundaries than + /// ordinary statements inside switch parsing. + SwitchArmStart, + + /// Start of a declaration-like item. + /// + /// Used for recovery in declaration-containing bodies where the next + /// sensible point is "the next member/declaration" rather than merely + /// "some statement". + DeclarationStart, + + /// A hard block boundary. + /// + /// This is the strongest normal recovery point. BlockBoundary, - /// Start of a top-level or class-level declaration. - TopDeclaration, } impl SyncLevel { - /// Converts [`Token`] to its [`SyncLevel`], if it has one. - fn for_token(token: Token) -> Option { - use SyncLevel::*; - use Token::*; + /// Converts a token to its synchronization class, if any. + const fn for_token(token: Token) -> Option { + use crate::lexer::Keyword; + use SyncLevel::{ + BlockBoundary, CloseAngleBracket, CloseBracket, CloseParenthesis, DeclarationStart, + Expression, ListSeparator, Statement, SwitchArmStart, + }; match token { - Exponentiation | Increment | Decrement | Not | BitwiseNot | Dot | Cross | Multiply - | Divide | Modulo | Plus | Minus | ConcatSpace | Concat | LeftShift - | LogicalRightShift | RightShift | Less | LessEqual | Greater | GreaterEqual - | Equal | NotEqual | ApproximatelyEqual | ClockwiseFrom | BitwiseAnd | BitwiseOr - | BitwiseXor | And | Xor | Or | Assign | MultiplyAssign | DivideAssign - | ModuloAssign | PlusAssign | MinusAssign | ConcatAssign | ConcatSpaceAssign - | Period | Question | Colon | LeftParenthesis | Identifier => Some(Expression), - - Comma => Some(ListSeparator), - - RightParenthesis => Some(CloseParenthesis), - RightBracket => Some(CloseBracket), - - Case | Default | If | Else | Switch | For | ForEach | While | Do | Return | Break - | Continue | Local | Semicolon => Some(Statement), - - Brace(_) | RightBrace => Some(BlockBoundary), - - Class | Struct | Enum | State | Function | Event | Delegate | Operator | Var - | Replication | NativeReplication | DefaultProperties | CppText | ExecDirective => { - Some(TopDeclaration) + // Expression-level recovery points + Token::Exponentiation + | Token::Increment + | Token::Decrement + | Token::Not + | Token::BitwiseNot + | Token::Multiply + | Token::Divide + | Token::Modulo + | Token::Plus + | Token::Minus + | Token::ConcatSpace + | Token::Concat + | Token::LeftShift + | Token::LogicalRightShift + | Token::RightShift + | Token::LessEqual + | Token::GreaterEqual + | Token::Equal + | Token::NotEqual + | Token::ApproximatelyEqual + | Token::BitwiseAnd + | Token::BitwiseOr + | Token::BitwiseXor + | Token::LogicalAnd + | Token::LogicalXor + | Token::LogicalOr + | Token::Assign + | Token::MultiplyAssign + | Token::DivideAssign + | Token::ModuloAssign + | Token::PlusAssign + | Token::MinusAssign + | Token::ConcatAssign + | Token::ConcatSpaceAssign + | Token::Period + | Token::Question + | Token::Colon + | Token::LeftParenthesis + | Token::Identifier + | Token::Keyword(Keyword::Dot | Keyword::Cross | Keyword::ClockwiseFrom) => { + Some(Expression) } - _ => Option::None, + // List / delimiter boundaries + Token::Comma => Some(ListSeparator), + Token::Greater => Some(CloseAngleBracket), + Token::RightParenthesis => Some(CloseParenthesis), + Token::RightBracket => Some(CloseBracket), + + // Statement-level boundaries + Token::Semicolon + | Token::Keyword( + Keyword::If + | Keyword::Else + | Keyword::Switch + | Keyword::For + | Keyword::ForEach + | Keyword::While + | Keyword::Do + | Keyword::Until + | Keyword::Return + | Keyword::Break + | Keyword::Continue + | Keyword::Local, + ) => Some(Statement), + + // Switch-specific stronger boundary + Token::Keyword(Keyword::Case | Keyword::Default) => Some(SwitchArmStart), + + // Declaration/member starts + Token::Keyword( + Keyword::Class + | Keyword::Struct + | Keyword::Enum + | Keyword::State + | Keyword::Function + | Keyword::Event + | Keyword::Delegate + | Keyword::Operator + | Keyword::Var + | Keyword::Replication + | Keyword::NativeReplication + | Keyword::DefaultProperties + | Keyword::CppText + | Keyword::CppStruct, + ) + | Token::ExecDirective => Some(DeclarationStart), + + // Hard structural stop + Token::LeftBrace | Token::CppBlock | Token::RightBrace => Some(BlockBoundary), + + _ => None, } } } -impl<'src, 'arena> Parser<'src, 'arena> { +impl Parser<'_, '_> { /// Converts a parse error into a diagnostic and queues it. /// /// Placeholder implementation. - fn handle_error(&mut self, error: ParseError) { - let diagnostic = crate::diagnostics::DiagnosticBuilder::error(format!( - "error {:?} while parsing", - error.kind - )) - .primary_label(error.source_span, "happened here") - .build(); - self.diagnostics.push(diagnostic); + pub fn report_error(&mut self, error: ParseError) { + self.diagnostics.push(Diagnostic::from(error)); } /// Reports a parser error with [`crate::parser::ParseErrorKind`] at /// the current location and queues an appropriate diagnostic. pub fn report_error_here(&mut self, error_kind: crate::parser::ParseErrorKind) { let new_error = self.make_error_here(error_kind); - self.handle_error(new_error); + self.report_error(new_error); } /// Skips tokens until a token with `min_sync` level or stronger is found. @@ -111,18 +209,32 @@ impl<'src, 'arena> Parser<'src, 'arena> { /// Supplies a fallback value after a parse error so parsing can continue and /// reveal further errors. -pub(crate) trait RecoveryFallback<'src, 'arena>: Sized { +pub trait RecoveryFallback<'src, 'arena>: Sized { fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self; } /// Extends [`ParseResult`] with recovery-related methods for /// fluent error handling. -pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized { +/// !!!! Can we store a parser reference instead of passing it into every method? +pub trait ResultRecoveryExt<'src, 'arena, T>: Sized { /// Extends the left end of the error span to `from`. /// /// Does nothing if `Self` is `Ok(...)`. #[must_use] - fn widen_error_span_from(self, from: TokenLocation) -> Self; + fn widen_error_span_from(self, from: TokenPosition) -> Self; + + fn blame(self, blame_span: AstSpan) -> Self; + fn related(self, related_span: AstSpan) -> Self; + + fn blame_token(self, blame_position: TokenPosition) -> Self { + self.blame(AstSpan::new(blame_position)) + } + + fn extend_blame_to_covered_end(self) -> Self; + + fn related_token(self, related_position: TokenPosition) -> Self { + self.related(AstSpan::new(related_position)) + } /// Extends the right end of the error span up to but not including /// the next token of the given sync `level`. @@ -140,28 +252,44 @@ pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized { /// Either returns expected value or its best effort fallback. #[must_use] - fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T; + fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T + where + T: RecoveryFallback<'src, 'arena>; /// Produces the contained value if successful, /// or a fallback if an error occurred. - fn report_error(self, parser: &mut Parser<'src, 'arena>); + fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool; + + fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option; } -impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T> -where - T: RecoveryFallback<'src, 'arena>, -{ - fn widen_error_span_from(mut self, from: TokenLocation) -> Self { +impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T> { + fn widen_error_span_from(mut self, from: TokenPosition) -> Self { if let Err(ref mut error) = self { - error.source_span.from = std::cmp::min(error.source_span.from, from); + error.covered_span.token_from = std::cmp::min(error.covered_span.token_from, from); } self } + fn blame(self, blame_span: AstSpan) -> Self { + self.map_err(|error| error.blame(blame_span)) + } + + fn extend_blame_to_covered_end(self) -> Self { + self.map_err(|error| error.extend_blame_to_covered_end()) + } + + fn related(self, related_span: AstSpan) -> Self { + self.map_err(|error| error.related(related_span)) + } + fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { if let Err(ref mut error) = self { parser.recover_until(level); - error.source_span.to = parser.last_visited_location(); + error.covered_span.token_to = std::cmp::max( + error.covered_span.token_to, + parser.last_consumed_position_or_start(), + ); } self } @@ -169,72 +297,213 @@ where fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { if let Err(ref mut error) = self { parser.recover_until(level); - error.source_span.to = parser.peek_location(); // If we're at end-of-file, this'll simply do nothing. - parser.advance(); + if parser + .peek_token() + .and_then(SyncLevel::for_token) + .is_some_and(|next_level| next_level == level) + { + parser.advance(); + } + error.covered_span.token_to = parser.last_consumed_position_or_start(); // need to be peek } self } - fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T { + fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T + where + T: RecoveryFallback<'src, 'arena>, + { self.unwrap_or_else(|error| { let value = T::fallback_value(parser, &error); - parser.handle_error(error); + parser.report_error(error); value }) } - fn report_error(self, parser: &mut Parser<'src, 'arena>) { + fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool { if let Err(error) = self { - parser.handle_error(error); + parser.report_error(error); + true + } else { + false + } + } + + fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option { + match self { + Ok(value) => Some(value), + Err(error) => { + parser.report_error(error); + None + } } } } impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError { - fn widen_error_span_from(mut self, from: TokenLocation) -> Self { - self.source_span.from = std::cmp::min(self.source_span.from, from); + fn widen_error_span_from(mut self, from: TokenPosition) -> Self { + self.covered_span.token_from = std::cmp::min(self.covered_span.token_from, from); + self + } + + fn blame(mut self, blame_span: AstSpan) -> Self { + self.blame_span = blame_span; + self + } + + fn extend_blame_to_covered_end(mut self) -> Self { + self.blame_span.token_to = self.covered_span.token_to; + self + } + + fn related(mut self, related_span: AstSpan) -> Self { + self.related_span = Some(related_span); self } fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { parser.recover_until(level); - self.source_span.to = parser.last_visited_location(); + self.covered_span.token_to = parser.last_consumed_position_or_start(); self } fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { parser.recover_until(level); - self.source_span.to = parser.peek_location(); // If we're at end-of-file, this'll simply do nothing. parser.advance(); + self.covered_span.token_to = parser.last_consumed_position_or_start(); self } - fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> () { - parser.handle_error(self); + fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) { + parser.report_error(self); } - fn report_error(self, parser: &mut Parser<'src, 'arena>) { - parser.handle_error(self); + fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool { + parser.report_error(self); + true + } + + fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<()> { + parser.report_error(self); + None } } impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 { fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self { - i128::default() + Self::default() } } impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 { fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self { - f64::default() + Self::default() } } -impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation { +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::IdentifierToken { fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { - error.source_span.to + Self(error.covered_span.token_from) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> + for crate::ast::CallableDefinitionRef<'src, 'arena> +{ + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + // default return type: Named("") at error span + let ret_id = crate::ast::IdentifierToken(err.covered_span.token_from); + let return_type = crate::arena::ArenaNode::new_in( + crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, ret_id)), + err.covered_span, + parser.arena, + ); + + let def = crate::ast::CallableDefinition { + name: crate::ast::CallableName::Identifier(IdentifierToken( + err.covered_span.token_from, + )), + kind: CallableKind::Function, + return_type_specifier: Some(return_type), + modifiers: parser.arena.vec(), + parameters: parser.arena.vec(), + body: None, + }; + crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StructDefRef<'src, 'arena> { + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + let def = crate::ast::StructDefinition { + name: None, + base_type_name: None, + modifiers: parser.arena.vec(), + fields: parser.arena.vec(), + }; + crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassVarDeclRef<'src, 'arena> { + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + let dummy_ident = crate::ast::IdentifierToken(err.covered_span.token_from); + let type_spec = crate::arena::ArenaNode::new_in( + crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident( + parser.arena, + dummy_ident, + )), + err.covered_span, + parser.arena, + ); + let def = crate::ast::ClassVarDecl { + paren_specs: None, + modifiers: parser.arena.vec(), + type_spec, + declarators: parser.arena.vec(), + span: err.covered_span, + }; + crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> + for crate::ast::ReplicationBlockRef<'src, 'arena> +{ + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + let def = crate::ast::ReplicationBlock { + rules: parser.arena.vec(), + span: err.covered_span, + }; + crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StateDeclRef<'src, 'arena> { + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + let def = crate::ast::StateDecl { + name: crate::ast::IdentifierToken(err.covered_span.token_from), + parent: None, + modifiers: parser.arena.vec(), + ignores: None, + body: parser.arena.vec(), + span: err.covered_span, + }; + crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenPosition { + fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { + error.covered_span.token_to + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for (Token, TokenPosition) { + fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { + (Token::Error, error.covered_span.token_to) } } @@ -242,7 +511,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExpressionRef< fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { crate::arena::ArenaNode::new_in( crate::ast::Expression::Error, - error.source_span, + error.covered_span, parser.arena, ) } @@ -252,17 +521,51 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StatementRef<' fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { crate::arena::ArenaNode::new_in( crate::ast::Statement::Error, - error.source_span, + error.covered_span, parser.arena, ) } } -impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option -where - T: RecoveryFallback<'src, 'arena>, -{ - fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { - Some(T::fallback_value(parser, error)) +impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option { + fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self { + None + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassConstDeclRef<'src, 'arena> { + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + let name = crate::ast::IdentifierToken(err.covered_span.token_from); + let value = crate::ast::DeclarationLiteralRef { + literal: crate::ast::DeclarationLiteral::None, + position: err.covered_span.token_from, + }; + let def = crate::ast::ClassConstDecl { + name, + value, + span: err.covered_span, + }; + crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::TypeSpecifierRef<'src, 'arena> { + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + let dummy = crate::ast::IdentifierToken(err.covered_span.token_from); + crate::arena::ArenaNode::new_in( + crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, dummy)), + err.covered_span, + parser.arena, + ) + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExecDirectiveRef<'arena> { + fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { + let def = crate::ast::ExecDirective { + text: parser.arena.string(""), + span: err.covered_span, + }; + crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena) } } diff --git a/rottlib/src/parser/trivia.rs b/rottlib/src/parser/trivia.rs index 407004e..b2b375c 100644 --- a/rottlib/src/parser/trivia.rs +++ b/rottlib/src/parser/trivia.rs @@ -1,6 +1,6 @@ -//! This module provides trivia token collection mechanism that lets parser code -//! iterate over significant tokens while ignoring trivia and preserving -//! full information for linting, formatting, and documentation. +//! Records trivia separately from significant tokens so parser code can work +//! with significant tokens without losing comments, whitespace, or line +//! structure. //! //! Tokens considered *trivia* are: //! @@ -10,13 +10,27 @@ //! 4. [`crate::lexer::Token::Whitespace`]. //! //! Every other token is considered *significant*. +//! +//! ## Required usage +//! +//! This is an internal helper. Callers must follow the protocol below. +//! +//! [`TriviaIndexBuilder`] must be driven over a single token stream in +//! strictly increasing [`TokenPosition`] order. +//! Call [`TriviaIndexBuilder::record_trivia`] for each trivia token in source +//! order, and call [`TriviaIndexBuilder::record_significant_token`] for each +//! significant token. +//! +//! After the last significant token has been processed, call +//! [`TriviaIndexBuilder::into_index`] to attach any trailing trivia. +//! +//! Violating this protocol is a logic error. -use crate::lexer::TokenLocation; +use crate::lexer::TokenPosition; -/// Types of trivia tokens, corresponding directly to the matching variants of -/// [`crate::lexer::Token`]. +/// Kinds of trivia tokens corresponding to variants of [`crate::lexer::Token`]. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] -pub(crate) enum TriviaKind { +pub enum TriviaKind { Whitespace, Newline, LineComment, @@ -29,269 +43,215 @@ impl std::convert::TryFrom for TriviaKind { fn try_from(token: crate::lexer::Token) -> Result { use crate::lexer::Token; match token { - Token::Whitespace => Ok(TriviaKind::Whitespace), - Token::Newline => Ok(TriviaKind::Newline), - Token::LineComment => Ok(TriviaKind::LineComment), - Token::BlockComment => Ok(TriviaKind::BlockComment), + Token::Whitespace => Ok(Self::Whitespace), + Token::Newline => Ok(Self::Newline), + Token::LineComment => Ok(Self::LineComment), + Token::BlockComment => Ok(Self::BlockComment), _ => Err(()), } } } -/// Complete description of a trivia token. +/// A recorded trivia token. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] -pub(crate) struct TriviaToken<'src> { - /// Specific type of the trivia. +pub struct TriviaToken<'src> { + /// Kind of trivia token. pub kind: TriviaKind, - /// Actual content of the token. + /// Source text of the token. pub text: &'src str, /// Location of this trivia token in the token stream. - pub location: TokenLocation, + pub position: TokenPosition, } -type TriviaRange = std::ops::Range; -type TriviaMap = std::collections::HashMap; +type TriviaRangeMap = std::collections::HashMap>; -/// Immutable index over all recorded trivia. +/// Extends [`TokenPosition`] with start-of-file and end-of-file markers. /// -/// Enables O(1) access to trivia immediately before/after any significant -/// token, plus file-leading and file-trailing trivia. Returned slices alias -/// internal storage and live for `'src`. -#[derive(Clone, Debug, Default)] -#[allow(dead_code)] -pub(crate) struct TriviaIndex<'src> { - /// All trivia tokens, stored contiguously in file order. - tokens: Vec>, - /// Maps token location to the trivia tokens stored right after it. - after_map: TriviaMap, - /// Maps token location to the trivia tokens stored right before it. - before_map: TriviaMap, -} - -/// Extends [`TokenLocation`] with *start of file* value. -/// -/// Regular [`TokenLocation`] does not need this value, but trivia requires -/// a way to express "trivia before any significant token". +/// Regular [`TokenPosition`] values are enough for significant tokens, but +/// trivia also needs to represent content before the first significant token +/// and after the last one. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -enum TriviaLocation { - /// Position before any tokens, trivia or otherwise. +enum BoundaryLocation { StartOfFile, - /// This variant can also express "end of file" through - /// [`TokenLocation::EndOfFile`]. - At(TokenLocation), + Token(TokenPosition), + EndOfFile, } -/// Mutable builder for `TriviaIndex`. +/// Immutable index over recorded trivia. /// -/// Used inside the parser to record trivia between successive significant -/// tokens in file order, then frozen via `into_index`. -#[derive(Debug, Default)] +/// Provides O(1) access to trivia immediately before or after any significant +/// token, as well as file-leading and file-trailing trivia. Returned slices +/// borrow the index, and the contained token texts live for `'src`. +#[derive(Clone, Debug, PartialEq, Eq, Default)] #[allow(dead_code)] -pub(crate) struct TriviaComponent<'src> { +pub struct TriviaIndex<'src> { /// All trivia tokens, stored contiguously in file order. tokens: Vec>, - /// Maps token location to the trivia tokens stored right after it. - after_map: TriviaMap, - /// Maps token location to the trivia tokens stored right before it. - before_map: TriviaMap, - /// Location of the last gap's right boundary, - /// for debug-time invariant checks. - #[cfg(debug_assertions)] - last_right_boundary: Option, + /// Maps a trivia boundary location to the trivia tokens stored right + /// after it. + trivia_after_boundary: TriviaRangeMap, + /// Maps a trivia boundary location to the trivia tokens stored right + /// before it. + trivia_before_boundary: TriviaRangeMap, } -impl<'src> TriviaComponent<'src> { - /// Records trivia tokens that lie strictly between - /// `previous_token_location` and `next_token_location`. - /// - /// [`None`] for `previous_token_location` means beginning of file; - /// `next_token_location` may be [`TokenLocation::EndOfFile`]. - /// - /// Empties `gap_trivia` without changing its capacity. - /// - /// Requirements (checked in debug builds): - /// - previous_token_location < next_token_location; - /// - calls are monotonic: each gap starts at or after the last end; - /// - `collected` is nonempty and strictly ordered by `location`; - /// - all `collected` lie strictly inside (prev, next). - pub(crate) fn record_between_locations( - &mut self, - previous_token_location: Option, - next_token_location: TokenLocation, - gap_trivia: &mut Vec>, - ) { - #[cfg(debug_assertions)] - self.debug_assert_valid_recording_batch( - previous_token_location, - next_token_location, - &gap_trivia, - ); +/// Mutable builder for [`TriviaIndex`]. +/// +/// Records trivia between successive significant tokens while the caller walks +/// a token stream in file order. Once all tokens have been processed, call +/// [`TriviaIndexBuilder::into_index`] to finalize the index. +#[derive(Debug)] +#[allow(dead_code)] +pub struct TriviaIndexBuilder<'src> { + /// All trivia tokens, stored contiguously in file order. + tokens: Vec>, + /// Maps boundary location to the trivia tokens stored right after it. + trivia_after_boundary: TriviaRangeMap, + /// Maps boundary location to the trivia tokens stored right before it. + trivia_before_boundary: TriviaRangeMap, - if gap_trivia.is_empty() { + /// Trivia collected since the last significant token (or file start), + /// not yet attached to a right boundary. + pending_trivia: Vec>, + + /// Left boundary of the currently open gap. + current_left_boundary: BoundaryLocation, +} + +impl Default for TriviaIndexBuilder<'_> { + fn default() -> Self { + Self { + tokens: Vec::new(), + trivia_after_boundary: TriviaRangeMap::default(), + trivia_before_boundary: TriviaRangeMap::default(), + pending_trivia: Vec::new(), + current_left_boundary: BoundaryLocation::StartOfFile, + } + } +} + +impl<'src> TriviaIndexBuilder<'src> { + /// Records `token` as trivia. + /// + /// Tokens must be recorded in file order. + pub(crate) fn record_trivia(&mut self, token: TriviaToken<'src>) { + #[cfg(debug_assertions)] + self.debug_assert_position_is_in_order(token.position); + + self.pending_trivia.push(token); + } + + /// Records a significant token at `position`. + /// + /// Positions must be recorded in file order. + pub(crate) fn record_significant_token(&mut self, position: TokenPosition) { + let right_boundary = BoundaryLocation::Token(position); + + #[cfg(debug_assertions)] + self.debug_assert_position_is_in_order(position); + + self.flush_pending_trivia_to_boundary(right_boundary); + self.current_left_boundary = right_boundary; + } + + // Stores one trivia range under both neighboring boundaries so lookups + // from either side return the same slice. + fn flush_pending_trivia_to_boundary(&mut self, right_boundary: BoundaryLocation) { + if self.pending_trivia.is_empty() { return; } - - let previous_token_location = previous_token_location - .map(TriviaLocation::At) - .unwrap_or(TriviaLocation::StartOfFile); - let next_token_location = TriviaLocation::At(next_token_location); - let trivia_start = self.tokens.len(); - self.tokens.append(gap_trivia); + self.tokens.append(&mut self.pending_trivia); let trivia_end = self.tokens.len(); - - self.after_map - .insert(previous_token_location, trivia_start..trivia_end); - self.before_map - .insert(next_token_location, trivia_start..trivia_end); + self.trivia_after_boundary + .insert(self.current_left_boundary, trivia_start..trivia_end); + self.trivia_before_boundary + .insert(right_boundary, trivia_start..trivia_end); } - /// Freezes into an immutable, shareable index. + /// Finalizes the builder and returns the completed trivia index. + /// + /// Any pending trivia is recorded as trailing trivia. #[must_use] #[allow(dead_code)] - pub(crate) fn into_index(self) -> TriviaIndex<'src> { + pub(crate) fn into_index(mut self) -> TriviaIndex<'src> { + self.flush_pending_trivia_to_boundary(BoundaryLocation::EndOfFile); + TriviaIndex { tokens: self.tokens, - after_map: self.after_map, - before_map: self.before_map, + trivia_after_boundary: self.trivia_after_boundary, + trivia_before_boundary: self.trivia_before_boundary, } } - /// Trivia immediately after the significant token at `location`. - /// - /// Returns an empty slice if `location` is not pointing at - /// a significant token or if no trivia was recorded after it. - #[must_use] - #[allow(dead_code)] - pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] { - self.slice_for(TriviaLocation::At(location), &self.after_map) - } - - /// Trivia immediately before the significant token at `location`. - /// - /// Returns an empty slice if `location` is not pointing at - /// a significant token or if no trivia was recorded before it. - #[must_use] - #[allow(dead_code)] - pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] { - self.slice_for(TriviaLocation::At(location), &self.before_map) - } - - /// Trivia before any significant token. - #[must_use] - #[allow(dead_code)] - pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] { - self.slice_for(TriviaLocation::StartOfFile, &self.after_map) - } - - /// Trivia after the last significant token. - #[must_use] - #[allow(dead_code)] - pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] { - self.slice_for( - TriviaLocation::At(TokenLocation::EndOfFile), - &self.before_map, - ) - } - - // Helper: return the recorded slice or an empty slice if none. - #[track_caller] - #[allow(dead_code)] - fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] { - if let Some(range) = map.get(&key) { - // Ranges are guaranteed to be valid by construction - &self.tokens[range.start..range.end] - } else { - &[] - } - } - - /// Debug-only validation for `record_between_locations`'s contract. + // Catches out-of-order recording during development; the builder relies + // on this ordering invariant. #[cfg(debug_assertions)] - fn debug_assert_valid_recording_batch( - &mut self, - previous_token_location: Option, - next_token_location: TokenLocation, - collected: &[TriviaToken<'src>], - ) { - // Prevent zero-width or reversed gaps - debug_assert!(previous_token_location < Some(next_token_location)); - let previous_token_location = previous_token_location - .map(TriviaLocation::At) - .unwrap_or(TriviaLocation::StartOfFile); - let next_token_location = TriviaLocation::At(next_token_location); - // Enforce monotonic gaps: we record in file order - if let Some(last_right) = self.last_right_boundary { - debug_assert!(previous_token_location >= last_right); + fn debug_assert_position_is_in_order(&self, position: TokenPosition) { + let location = BoundaryLocation::Token(position); + debug_assert!(location > self.current_left_boundary); + if let Some(last) = self.pending_trivia.last() { + debug_assert!(last.position < position); } - self.last_right_boundary = Some(next_token_location); - let first_trivia_location = collected - .first() - .map(|token| TriviaLocation::At(token.location)) - .expect("Provided trivia tokens array should not be empty."); - let last_trivia_location = collected - .last() - .map(|token| TriviaLocation::At(token.location)) - .expect("Provided trivia tokens array should not be empty."); - // Ensure trivia lies strictly inside the gap - debug_assert!(previous_token_location < first_trivia_location); - debug_assert!(next_token_location > last_trivia_location); - // Ensure trivia locations are strictly increasing - debug_assert!( - collected - .windows(2) - .all(|window| window[0].location < window[1].location) - ); } } impl<'src> TriviaIndex<'src> { - /// Trivia immediately after the significant token at `location`. + /// Returns the trivia immediately after the significant token at + /// `position`. /// - /// Returns an empty slice if `location` is not pointing at - /// a significant token or if no trivia was recorded after it. + /// Returns an empty slice if `position` does not identify a recorded + /// significant token or if no trivia was recorded after it. #[must_use] #[allow(dead_code)] - pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] { - self.slice_for(TriviaLocation::At(location), &self.after_map) - } - - /// Trivia immediately before the significant token at `location`. - /// - /// Returns an empty slice if `location` is not pointing at - /// a significant token or if no trivia was recorded before it. - #[must_use] - #[allow(dead_code)] - pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] { - self.slice_for(TriviaLocation::At(location), &self.before_map) - } - - /// Trivia before any significant token. - #[must_use] - #[allow(dead_code)] - pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] { - self.slice_for(TriviaLocation::StartOfFile, &self.after_map) - } - - /// Trivia after the last significant token. - #[must_use] - #[allow(dead_code)] - pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] { + pub(crate) fn trivia_after_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] { self.slice_for( - TriviaLocation::At(TokenLocation::EndOfFile), - &self.before_map, + BoundaryLocation::Token(position), + &self.trivia_after_boundary, ) } - // Helper: return the recorded slice or an empty slice if none. - #[track_caller] + /// Returns the trivia immediately before the significant token at `position`. + /// + /// Returns an empty slice if `position` does not identify a recorded + /// significant token or if no trivia was recorded before it. + #[must_use] #[allow(dead_code)] - fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] { - if let Some(range) = map.get(&key) { - // Ranges are guaranteed to be valid by construction - &self.tokens[range.start..range.end] - } else { - &[] + pub(crate) fn trivia_before_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] { + self.slice_for( + BoundaryLocation::Token(position), + &self.trivia_before_boundary, + ) + } + + /// Returns the trivia before the first significant token. + /// + /// If no significant tokens were recorded, returns all recorded trivia. + #[must_use] + #[allow(dead_code)] + pub(crate) fn leading_trivia(&self) -> &[TriviaToken<'src>] { + self.slice_for(BoundaryLocation::StartOfFile, &self.trivia_after_boundary) + } + + /// Returns the trivia after the last significant token. + /// + /// If no significant tokens were recorded, returns all recorded trivia. + #[must_use] + #[allow(dead_code)] + pub(crate) fn trailing_trivia(&self) -> &[TriviaToken<'src>] { + self.slice_for(BoundaryLocation::EndOfFile, &self.trivia_before_boundary) + } + + #[allow(dead_code)] + fn slice_for(&self, key: BoundaryLocation, map: &TriviaRangeMap) -> &[TriviaToken<'src>] { + match map.get(&key) { + Some(range) => { + // Ranges are guaranteed to be valid by construction + debug_assert!(range.start <= range.end); + debug_assert!(range.end <= self.tokens.len()); + self.tokens.get(range.clone()).unwrap_or(&[]) + } + None => &[], } } } diff --git a/rottlib/tests/common.rs b/rottlib/tests/common.rs new file mode 100644 index 0000000..d467771 --- /dev/null +++ b/rottlib/tests/common.rs @@ -0,0 +1,63 @@ +use std::path::{Path, PathBuf}; + +use rottlib::lexer::{Token, TokenData, TokenPosition, TokenizedFile}; + +pub fn fixture_path(name: &str) -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +pub fn read_fixture(name: &str) -> String { + let path = fixture_path(name); + std::fs::read_to_string(&path) + .unwrap_or_else(|e| panic!("failed to read fixture {}: {e}", path.display())) +} + +pub fn with_fixture(name: &str, f: impl for<'src> FnOnce(&'src str, TokenizedFile<'src>)) { + let source = read_fixture(name); + let file = TokenizedFile::tokenize(&source); + f(&source, file); +} + +pub fn line_lexemes<'file, 'src>(file: &'file TokenizedFile<'src>, line: usize) -> Vec<&'src str> { + file.line_tokens(line).map(|(_, t)| t.lexeme).collect() +} + +pub fn line_tokens<'src>(file: &TokenizedFile<'src>, line: usize) -> Vec { + file.line_tokens(line).map(|(_, t)| t.token).collect() +} + +pub fn line_positions<'src>(file: &TokenizedFile<'src>, line: usize) -> Vec { + file.line_tokens(line).map(|(pos, _)| pos).collect() +} + +pub fn line_pairs<'file, 'src>( + file: &'file TokenizedFile<'src>, + line: usize, +) -> Vec<(Token, &'src str)> { + file.line_tokens(line) + .map(|(_, t)| (t.token, t.lexeme)) + .collect() +} + +pub fn all_lexemes<'file, 'src>(file: &'file TokenizedFile<'src>) -> Vec<&'src str> { + file.iter().map(|(_, t)| t.lexeme).collect() +} + +pub fn all_tokens<'src>(file: &TokenizedFile<'src>) -> Vec { + file.iter().map(|(_, t)| t.token).collect() +} + +pub fn token_at<'src>(file: &TokenizedFile<'src>, index: usize) -> Option> { + file.token_at(TokenPosition(index)) +} + +pub fn reconstruct_source<'file, 'src>(file: &'file TokenizedFile<'src>) -> String { + file.iter().map(|(_, t)| t.lexeme).collect() +} + +pub fn find_line<'src>(file: &TokenizedFile<'src>, needle: &str) -> Option { + (0..file.line_count()).find(|&line| file.line_text(line).as_deref() == Some(needle)) +} diff --git a/rottlib/tests/fixtures/CommandAPI.uc b/rottlib/tests/fixtures/CommandAPI.uc new file mode 100644 index 0000000..2b6e403 --- /dev/null +++ b/rottlib/tests/fixtures/CommandAPI.uc @@ -0,0 +1,1578 @@ +/** + * Author: dkanus + * Home repo: https://www.insultplayers.ru/git/AcediaFramework/AcediaCore + * License: GPL + * Copyright 2023 Anton Tarasenko + *------------------------------------------------------------------------------ + * This file is part of Acedia. + * + * Acedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License, or + * (at your option) any later version. + * + * Acedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Acedia. If not, see . + */ +class CommandAPI extends AcediaObject; + +/// Possible statuses of command locks. +/// +/// Command locks can prevent certain commands from being used. +/// A good example is having a "cheat" lock that commands can check to before +/// doing anything that can be considered cheating. +/// Their purpose is to track the "purity" of the game matches +/// (e.g. whether cheats were used). +enum LockStatus { + /// Lock is currently closed, preventing commands from doing actions that + /// might require it opened. + /// Lock can still be opened. + LS_Closed, + /// Lock is currently opened, allowing commands to perform actions that + /// might require it opened. + /// Lock can still be closed. + LS_Open, + /// Lock is currently closed, preventing commands from doing actions that + /// might require it opened. + /// Lock can no longer be opened during the game session. + LS_ClosedFixed, + /// Lock is currently opened, allowing commands to perform actions that + /// might require it opened. + /// Lock can no longer be closed during the game session. + /// For "cheat" lock, for example, it can happen if someone has already + /// used cheats. + LS_OpenFixed +}; + +/// Describes possible outcomes of starting a voting by its name +enum StartVotingResult { + /// Voting was successfully started. + SVR_Success, + /// There is no people that are allowed to vote. + SVR_NoVoters, + /// Voting itself decided to reject being started, most likely because of + /// the specified arguments. + SVR_Rejected, + /// Voting wasn't started because another one was still in progress. + SVR_AlreadyInProgress, + /// Voting wasn't started because voting with that name hasn't been + /// registered. + SVR_UnknownVoting, + /// `CommandAPI` isn't functioning properly. + SVR_InvalidState +}; + +/// Struct for storing the result of "resolving" [`Command`] access for +/// some user. +struct CommandConfigInfo { + /// Instance of the command. If it equals to `none`, then [`configName`] + /// will also equal `none`. + var public Command instance; + /// Config that determines permissions for the command [`instance`]. + /// Can equal to `none` if: + /// + /// * [`Command`] class didn't setup a custom permissions config. + /// * Using provided instance is forbidden + /// (in this case `usageForbidden` will be set to `true`). + var public CommandPermissions config; + /// Set to `true` in case using provided command if forbidden. + var public bool usageForbidden; +}; + +/// Struct for storing the result of "resolving" [`Voting`] access for +/// some user. +struct VotingConfigInfo { + /// [`Voting`]'s class. If it equals to `none`, then [`configName`] + /// will also equal `none`. + var public class votingClass; + /// Config that determines permissions for the [`votingClass`]. + /// Can equal to `none` if using provided instance is forbidden + /// (in this case `usageForbidden` will be set to `true`). + var public VotingPermissions config; + /// Set to `true` in case using provided voting if forbidden. + var public bool usageForbidden; +}; + +/// Internal enum that describes all types of scheduled jobs that this API can +/// produce. +enum AsyncJobType { + CAJT_AddCommand, + CAJT_AuthorizeCommand, + CAJT_AddVoting, + CAJT_AuthorizeVoting +}; + +/// Internal struct that can describe data for any scheduled job that this API +/// can produce. +struct AsyncTask { + var public AsyncJobType type; + // For all jobs + var public Text entityName; + // For `CAJT_Add...` + var public class entityClass; + // For `CAJT_Authorize` + var public Text userGroup; + // For `CAJT_Authorize...` + var public Text configName; +}; + +/// Internal struct for storing all the utility objects that `Commands_Feature` +/// uses. +struct CommandFeatureTools { + var public CommandsTool commands; + var public VotingsTool votings; +}; + +/// Internal struct for representing the result of the resolve command by either +/// [`CommandsTool`] or [`VotingsTool`]. +/// +/// Defined in `CommandAPI` instead of their base [`CmdItemsTool`] to avoid +/// compiler issues with resolving `dependson`-graph. +struct ItemConfigInfo { + var public AcediaObject instance; + var public class class; + // Name of config that determines permissions for the item [`instance`] + var public Text configName; +}; + +/// Classes registered to be added in async way +var private array pendingAsyncJobs; +/// Job that is supposed to register pending commands, will be asking for new +/// jobs from [`pendingAsyncJobs`] once it completes its current task. +var private CommandRegistrationJob registeringJob; + +/// Saves `HashTable` with command locks. +/// Locks are simply boolean switches that mark for commands whether they +/// can be executed. +/// +/// Lock is considered "unlocked" if this `HashTable` stores `true` at the key +/// with its name and `false` otherwise. +var private HashTable commandLocks; + +var private CommandsTool commandsTool; +var private VotingsTool votingsTool; +var private int commandsToolLifeVersion; +var private int votingsToolLifeVersion; + +var protected CommandsAPI_OnCommandAdded_Signal onCommandAddedSignal; +var protected CommandsAPI_OnCommandRemoved_Signal onCommandRemovedSignal; +var protected CommandsAPI_OnVotingAdded_Signal onVotingAddedSignal; +var protected CommandsAPI_OnVotingRemoved_Signal onVotingRemovedSignal; + +protected function Constructor() { + onCommandAddedSignal = CommandsAPI_OnCommandAdded_Signal( + _.memory.Allocate(class'CommandsAPI_OnCommandAdded_Signal')); + onCommandRemovedSignal = CommandsAPI_OnCommandRemoved_Signal( + _.memory.Allocate(class'CommandsAPI_OnCommandRemoved_Signal')); + onVotingAddedSignal = CommandsAPI_OnVotingAdded_Signal( + _.memory.Allocate(class'CommandsAPI_OnVotingAdded_Signal')); + onVotingRemovedSignal = CommandsAPI_OnVotingRemoved_Signal( + _.memory.Allocate(class'CommandsAPI_OnVotingRemoved_Signal')); +} + +protected function Finalizer() { + _.memory.Free(onCommandAddedSignal); + _.memory.Free(onCommandRemovedSignal); + _.memory.Free(onVotingAddedSignal); + _.memory.Free(onVotingRemovedSignal); + onCommandAddedSignal = none; + onCommandRemovedSignal = none; + onVotingAddedSignal = none; + onVotingRemovedSignal = none; +} + +/// Signal that will be emitted when a new [`Command`] class is successfully +/// added through this API. +/// +/// # Slot description +/// +/// bool (class addedClass, Text usedName) +/// +/// ## Parameters +/// +/// * [`addedClass`]: Class of the command that got added. +/// * [`usedName`]: Name, under which command class was added. +public /*signal*/ function CommandsAPI_OnCommandAdded_Slot OnCommandAdded(AcediaObject receiver) { + return CommandsAPI_OnCommandAdded_Slot(onCommandAddedSignal.NewSlot(receiver)); +} + +/// Signal that will be emitted when a [`Command`] class is removed through +/// this API. +/// +/// # Slot description +/// +/// bool (class removedClass) +/// +/// ## Parameters +/// +/// * [`removedClass`]: Class of the command that got removed. +public /*signal*/ function CommandsAPI_OnCommandRemoved_Slot OnCommandRemoved( + AcediaObject receiver +) { + return CommandsAPI_OnCommandRemoved_Slot(onCommandRemovedSignal.NewSlot(receiver)); +} + +/// Signal that will be emitted when a new [`Voting`] class is successfully +/// added through this API. +/// +/// # Slot description +/// +/// bool (class addedClass, Text usedName) +/// +/// ## Parameters +/// +/// * [`addedClass`]: Class of the voting that got added. +/// * [`usedName`]: Name, under which voting class was added. +public /*signal*/ function CommandsAPI_OnVotingAdded_Slot OnVotingAdded(AcediaObject receiver) { + return CommandsAPI_OnVotingAdded_Slot(onVotingAddedSignal.NewSlot(receiver)); +} + +/// Signal that will be emitted when a [`Voting`] class is removed through +/// this API. +/// +/// # Slot description +/// +/// bool (class removedClass) +/// +/// ## Parameters +/// +/// * [`removedClass`]: Class of the voting that got removed. +public /*signal*/ function CommandsAPI_OnVotingRemoved_Slot OnVotingRemoved(AcediaObject receiver) { + return CommandsAPI_OnVotingRemoved_Slot(onVotingRemovedSignal.NewSlot(receiver)); +} + +/// Checks if `Commands_Feature` is enabled, which is required for this API +/// to be functional. +public final function bool AreCommandsEnabled() { + local Commands_Feature feature; + + feature = Commands_Feature(class'Commands_Feature'.static.GetEnabledInstance()); + _.memory.Free(feature); + // We can still compare to `none` even after deallocation to see if + // `GetEnabledInstance()` returned a valid instance. + return (feature != none); +} + +/// Returns array of names of all available commands. +/// +/// Resulting array cannot contain duplicates. +public final function array GetAllCommandNames() { + local array emptyResult; + + if (VerifyCommandsTool()) { + return commandsTool.GetItemsNames(); + } + return emptyResult; +} + +/// Registers given command class, making it available via [`Execute()`]. +/// +/// Optionally a [`BaseText`] can be specified to be used as this command's name +/// (the main name, that command aliases are to be resolved into). +/// If name parameter is omitted (specified as `none`) or constitutes an empty +/// [`BaseText`], a command's preferred name will be used. +/// +/// If command name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// # Errors +/// +/// Returns `true` if command was successfully registered and `false` +/// otherwise`. +/// +/// If [`commandClass`] provides command with a name that is already taken +/// (comparison is case-insensitive) by a different command - a warning will be +/// logged and command class won't be registered. +/// +/// If `commandClass` provides command with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and command class +/// won't be registered. +public final function bool AddCommand( + class commandClass, + optional BaseText commandName +) { + local bool result; + local Text immutableCommandName; + + if (commandClass == none) { + return false; + } + if (VerifyCommandsTool()) { + if (commandName != none) { + immutableCommandName = commandName.Copy(); + } else { + immutableCommandName = commandClass.static.GetPreferredName(); + } + result = commandsTool.AddItemClass(commandClass, immutableCommandName); + if (result) { + onCommandAddedSignal.Emit(commandClass, immutableCommandName); + } + _.memory.Free(immutableCommandName); + } + return false; +} + +/// Registers given command class, making it available via [`Execute()`]. +/// +/// Optionally a [`BaseText`] can be specified to be used as this command's name +/// (the main name, that command aliases are to be resolved into). +/// If name parameter is omitted (specified as empty [`string`]), a command's +/// default name (defined via its [`CommandDataBuilder`]) will be used. +/// +/// Invalid name (according to [`BaseText::IsValidName()`] method) will prevent +/// the [`Command`] from being authorized. +/// +/// # Errors +/// +/// Returns `true` if command was successfully registered and +/// `false` otherwise`. +/// +/// If `commandClass` provides command with a name that is already taken +/// (comparison is case-insensitive) by a different command - a warning will be +/// logged and command class won't be registered. +/// +/// If `commandClass` provides command with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and command class +/// won't be registered. +public final function bool AddCommand_S( + class commandClass, + optional string commandName +) { + local bool result; + local MutableText wrapper; + + if (commandName != "") { + wrapper = _.text.FromStringM(commandName); + } + result = AddCommand(commandClass, wrapper); + _.memory.Free(wrapper); + return result; +} + +/// Registers given command class asynchronously, making it available +/// via [`Execute()`]. +/// +/// Doesn't register commands immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. Different async calls from [`CommandAPI`] are +/// guaranteed to be handled in the order they were called. +/// +/// Optionally a [`BaseText`] can be specified to be used as this command's name +/// (the main name, that command aliases are to be resolved into). +/// If name parameter is omitted (specified as `none`) or constitutes an empty +/// [`BaseText`], a command's default name (defined via its +/// [`CommandDataBuilder`]) will be used. +/// +/// If command name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// # Errors +/// +/// Returns `true` if command was successfully registered and +/// `false` otherwise`. +/// +/// If [`commandClass`] provides command with a name that is already taken +/// (comparison is case-insensitive) by a different command - a warning will be +/// logged and command class won't be registered. +/// +/// If [`commandClass`] provides command with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and command class +/// won't be registered. +/// +/// Warnings might be logged *after* the call itself, when the command class is +/// actually added. +public final function AddCommandAsync( + class commandClass, + optional BaseText commandName +) { + local AsyncTask newJob; + + if (!VerifyCommandsTool()) { + return; + } + newJob.type = CAJT_AddCommand; + newJob.entityClass = commandClass; + if (commandName != none) { + newJob.entityName = commandName.Copy(); + } + pendingAsyncJobs[pendingAsyncJobs.length] = newJob; + if (registeringJob == none || registeringJob.IsCompleted()) { + _.memory.Free(registeringJob); + registeringJob = CommandRegistrationJob(_.memory.Allocate(class'CommandRegistrationJob')); + _.scheduler.AddJob(registeringJob); + } +} + +/// Registers given command class asynchronously, making it available +/// via [`Execute()`]. +/// +/// Doesn't register commands immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. Different async calls from [`CommandAPI`] are +/// guaranteed to be handled in the order they were called. +/// +/// Optionally a [`BaseText`] can be specified to be used as this command's name +/// (the main name, that command aliases are to be resolved into). +/// If name parameter is omitted (specified as empty [`string`]), a command's +/// default name (defined via its [`CommandDataBuilder`]) will be used. +/// +/// Invalid name (according to [`BaseText::IsValidName()`] method) will prevent +/// the [`Command`] from being authorized. +/// +/// # Errors +/// +/// Returns `true` if command was successfully registered and +/// `false` otherwise`. +/// +/// If [`commandClass`] provides command with a name that is already taken +/// (comparison is case-insensitive) by a different command - a warning will be +/// logged and command class won't be registered. +/// +/// If [`commandClass`] provides command with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and command class +/// won't be registered. +/// +/// Warnings might be logged *after* the call itself, when the command class is +/// actually added. +public final function AddCommandAsync_S( + class commandClass, + optional string commandName +) { + local MutableText wrapper; + + if (commandName != "") { + wrapper = _.text.FromStringM(commandName); + } + AddCommandAsync(commandClass, wrapper); + _.memory.Free(wrapper); +} + +/// Authorizes new user group to use the specified command, optionally +/// specifying name of the config (config's class is determined by the +/// [`Command`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Command`] with a given name is added. +/// +/// If config name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// Function will return `true` if group was successfully authorized and +/// `false` otherwise (either group already authorized or no command with +/// specified name was added in the caller tool). +/// +/// # Errors +/// +/// If specified group was already authorized to use a command, then it +/// will log a warning message about it. +public function bool AuthorizeCommandUsage( + BaseText commandName, + BaseText groupName, + optional BaseText configName +) { + if (VerifyCommandsTool()) { + return commandsTool.AuthorizeUsage(commandName, groupName, configName); + } + return false; +} + +/// Authorizes new user group to use the specified command, optionally +/// specifying name of the config (config's class is determined by the +/// [`Command`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Command`] with a given name is added. +/// +/// If this config name is specified as empty, then "default" will be +/// used instead. For non-empty values, an invalid name (according to +/// [`BaseText::IsValidName()`] method) will prevent the group from being +/// authorized. +/// +/// # Errors +/// +/// If specified group was already authorized to use a command, then it +/// will log a warning message about it. +public function bool AuthorizeCommandUsage_S( + string commandName, + string userGroupName, + optional string configName +) { + local bool result; + local MutableText wrapperVotingName, wrapperGroupName, wrapperConfigName; + + wrapperVotingName = _.text.FromStringM(commandName); + wrapperGroupName = _.text.FromStringM(userGroupName); + wrapperConfigName = _.text.FromStringM(configName); + result = AuthorizeCommandUsage(wrapperVotingName, wrapperGroupName, wrapperConfigName); + _.memory.Free3(wrapperVotingName, wrapperGroupName, wrapperConfigName); + return result; +} + +/// Authorizes new user group to use the specified command, optionally +/// specifying name of the config (config's class is determined by the +/// [`Command`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Command`] with a given name is added. +/// +/// Doesn't authorize commands immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. Different async calls from [`CommandAPI`] are +/// guaranteed to be handled in the order they were called. +/// +/// If config name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// Function will return `true` if group was successfully authorized and +/// `false` otherwise (either group already authorized or no command with +/// specified name was added in the caller tool). +/// +/// # Errors +/// +/// If specified group was already authorized to use a command, then it +/// will log a warning message about it. +public function AuthorizeCommandUsageAsync( + BaseText commandName, + BaseText userGroupName, + optional BaseText configName +) { + local AsyncTask newTask; + + if (!VerifyCommandsTool()) { + return; + } + newTask.type = CAJT_AuthorizeCommand; + if (commandName != none) { + newTask.entityName = commandName.Copy(); + } + if (userGroupName != none) { + newTask.userGroup = userGroupName.Copy(); + } + if (configName != none) { + newTask.configName = configName.Copy(); + } + pendingAsyncJobs[pendingAsyncJobs.length] = newTask; + if (registeringJob == none || registeringJob.IsCompleted()) { + _.memory.Free(registeringJob); + registeringJob = CommandRegistrationJob(_.memory.Allocate(class'CommandRegistrationJob')); + _.scheduler.AddJob(registeringJob); + } +} + +/// Authorizes new user group to use the specified command, optionally +/// specifying name of the config (config's class is determined by the +/// [`Command`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Command`] with a given name is added. +/// +/// Doesn't authorize commands immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. Different async calls from [`CommandAPI`] are +/// guaranteed to be handled in the order they were called. +/// +/// If this config name is specified as empty, then "default" will be +/// used instead. For non-empty values, an invalid name (according to +/// [`BaseText::IsValidName()`] method) will prevent the group from being +/// authorized. +/// +/// Function will return `true` if group was successfully authorized and +/// `false` otherwise (either group already authorized or no command with +/// specified name was added in the caller tool). +/// +/// # Errors +/// +/// If specified group was already authorized to use a command, then it +/// will log a warning message about it. +public function AuthorizeCommandUsageAsync_S( + string commandName, + string userGroupName, + optional string configName +) { + local MutableText wrapperCommandName, wrapperGroupName, wrapperConfigName; + + wrapperCommandName = _.text.FromStringM(commandName); + wrapperGroupName = _.text.FromStringM(userGroupName); + wrapperConfigName = _.text.FromStringM(configName); + AuthorizeCommandUsageAsync(wrapperCommandName, wrapperGroupName, wrapperConfigName); + _.memory.Free3(wrapperCommandName, wrapperGroupName, wrapperConfigName); +} + +/// Removes command of given class from the list of registered commands. +/// +/// Removing once registered commands is not an action that is expected to +/// be performed under normal circumstances and does not have an efficient +/// implementation (it is linear on the current amount of commands). +/// +/// Returns `true` if successfully removed registered [`Command`] class and +/// `false` otherwise (command wasn't registered). +public final function bool RemoveCommand(class commandClass) { + local bool result; + + if (VerifyCommandsTool()) { + result = commandsTool.RemoveItemClass(commandClass); + if (result) { + onCommandRemovedSignal.Emit(commandClass); + } + } + return result; +} + +/// Returns instance of the [`Command`] that was added under a specified name. +public final function Command GetCommand(BaseText commandName) { + local ItemConfigInfo intermediaryResult; + + if (VerifyCommandsTool()) { + // `none` means we'll get instance + class, but without config, + // so nothing to deallocate in `intermediaryResult` + intermediaryResult = commandsTool.ResolveItem(commandName, none); + return Command(intermediaryResult.instance); + } + return none; +} + +/// Returns instance of the [`Command`] that was added under a specified name. +public final function Command GetCommand_S(string commandName) { + local Command result; + local MutableText wrapper; + + wrapper = _.text.FromStringM(commandName); + result = GetCommand(wrapper); + _.memory.Free(wrapper); + return result; +} + +/// Returns pair of [`Command`] and config name based on a given +/// case-insensitive name and text ID of the caller player. +/// +/// Function only returns `none` for [`Command`] instance if [`Command`] with +/// a given name wasn't found. +public final function CommandConfigInfo ResolveCommandForTextID(BaseText itemName, BaseText id) { + local ItemConfigInfo intermediaryResult; + local CommandConfigInfo result; + + if (VerifyCommandsTool()) { + intermediaryResult = commandsTool.ResolveItem(itemName, id); + result.instance = Command(intermediaryResult.instance); + if (result.instance == none) { + return result; + } + if (intermediaryResult.configName == none) { + result.usageForbidden = true; + } else { + result.config = result.instance.LoadConfig(intermediaryResult.configName); + _.memory.Free(intermediaryResult.configName); + } + } + return result; +} + +/// Returns pair of [`Command`] and config name based on a given +/// case-insensitive name and text ID of the caller player. +/// +/// Function only returns `none` for [`Command`] instance if [`Command`] with +/// a given name wasn't found. +public final function CommandConfigInfo ResolveCommandForTextID_S(string itemName, string id) { + local CommandConfigInfo result; + local MutableText wrapperItemname, wrapperID; + + wrapperItemname = _.text.FromStringM(itemName); + wrapperID = _.text.FromStringM(id); + result = ResolveCommandForTextID(wrapperItemname, wrapperID); + _.memory.Free2(wrapperItemname, wrapperID); + return result; +} + +/// Returns pair of [`Command`] and config name based on a given +/// case-insensitive name and [`UserID`] of the caller player. +/// +/// Function only returns `none` for [`Command`] instance if [`Command`] with +/// a given name wasn't found or provided id was `none`. +public final function CommandConfigInfo ResolveCommandForUserID(BaseText itemName, UserID id) { + local CommandConfigInfo result; + local Text textID; + + if (itemName == none) return result; + if (id == none) return result; + textID = id.GetUniqueID(); + if (textID == none) return result; + + result = ResolveCommandForTextID(itemName, textID); + textID.FreeSelf(); + return result; +} + +/// Returns pair of [`Command`] and config name based on a given +/// case-insensitive name and [`UserID`] of the caller player. +/// +/// Function only returns `none` for [`Command`] instance if [`Command`] with +/// a given name wasn't found or provided id was `none`. +public final function CommandConfigInfo ResolveCommandForUserID_S(string itemName, UserID id) { + local CommandConfigInfo result; + local MutableText wrapper; + + wrapper = _.text.FromStringM(itemName); + result = ResolveCommandForUserID(wrapper, id); + _.memory.Free(wrapper); + return result; +} + +/// Returns pair of [`Command`] and config name based on a given +/// case-insensitive name and [`User`] of the caller player. +/// +/// Function only returns `none` for [`Command`] instance if [`Command`] with +/// a given name wasn't found or provided id was `none`. +public final function CommandConfigInfo ResolveCommandForUser(BaseText itemName, User user) { + local CommandConfigInfo result; + local UserID id; + + if (itemName == none) return result; + if (user == none) return result; + + id = user.GetID(); + result = ResolveCommandForUserID(itemName, id); + _.memory.Free(id); + return result; +} + +/// Returns pair of [`Command`] and config name based on a given +/// case-insensitive name and [`User`] of the caller player. +/// +/// Function only returns `none` for [`Command`] instance if [`Command`] with +/// a given name wasn't found or provided id was `none`. +public final function CommandConfigInfo ResolveCommandForUser_S(string itemName, User user) { + local CommandConfigInfo result; + local MutableText wrapper; + + wrapper = _.text.FromStringM(itemName); + result = ResolveCommandForUser(wrapper, user); + _.memory.Free(wrapper); + return result; +} + +/// Returns all available command groups' names. +public final function array GetGroupsNames() { + local array emptyResult; + + if (VerifyCommandsTool()) { + return commandsTool.GetGroupsNames(); + } + return emptyResult; +} + +/// Returns all available command groups' names. +public final function array GetGroupsNames_S() { + local array emptyResult; + + if (VerifyCommandsTool()) { + return _.text.IntoStrings(commandsTool.GetGroupsNames()); + } + return emptyResult; +} + +/// Returns array of names of all available commands belonging to the specified +/// command group. +public final function array GetCommandNamesInGroup(BaseText groupName) { + local array emptyResult; + + if (VerifyCommandsTool()) { + return commandsTool.GetCommandNamesInGroup(groupName); + } + return emptyResult; +} + +/// Returns array of names of all available commands belonging to the specifie +/// command group. +public final function array GetCommandNamesInGroup_S(string groupName) { + local array result; + local MutableText wrapper; + + if (VerifyCommandsTool()) { + wrapper = _.text.FromStringM(groupName); + result = _.text.IntoStrings(commandsTool.GetCommandNamesInGroup(wrapper)); + _.memory.Free(wrapper); + } + return result; +} + +/// Executes command based on the textual input with a given instigator. +/// +/// Input should be provided in a form that players are expected to use. +/// For example, "mutate inventory @all" or "say !inventory @all" will both +/// translate into calling this method with "inventory @all" argument. +/// +/// Command's instigator will receive appropriate result/error messages. +/// +/// # Errors +/// +/// Doesn't log any errors, but can complain about errors in name or parameters +/// to the [`instigator`]. +public final function Execute(BaseText commandLine, EPlayer instigator) { + local Commands_Feature feature; + + feature = Commands_Feature(class'Commands_Feature'.static.GetEnabledInstance()); + if (feature != none) { + feature.HandleInput(commandLine, instigator); + } + _.memory.Free(feature); +} + +/// Executes command based on the textual input with a given instigator. +/// +/// Input should be provided in a form that players are expected to use. +/// For example, "mutate inventory @all" or "say !inventory @all" will both +/// translate into calling this method with "inventory @all" argument. +/// +/// Command's instigator will receive appropriate result/error messages. +/// +/// # Errors +/// +/// Doesn't log any errors, but can complain about errors in name or parameters +/// to the [`instigator`]. +public final function Execute_S(string commandLine, EPlayer instigator) { + local MutableText wrapper; + local Commands_Feature feature; + + feature = Commands_Feature(class'Commands_Feature'.static.GetEnabledInstance()); + if (feature != none) { + wrapper = _.text.FromStringM(commandLine); + feature.HandleInput(wrapper, instigator); + _.memory.Free(wrapper); + } + _.memory.Free(feature); +} + +/// Returns current status of a lock with a given name. +public final function LockStatus GetLockStatus(BaseText lockName) { + local LockStatus result; + local Text lowerCaseName; + + if (lockName == none) { + return LS_ClosedFixed; + } + if (commandLocks == none) { + commandLocks = _.collections.EmptyHashTable(); + } + lowerCaseName = lockName.LowerCopy(); + result = LockStatus(commandLocks.GetInt(lowerCaseName)); + lowerCaseName.FreeSelf(); + return result; +} + +/// Returns current status of a lock with a given name. +public final function LockStatus GetLockStatus_S(string lockName) { + local LockStatus result; + local MutableText wrapper; + + wrapper = _.text.FromStringM(lockName); + result = GetLockStatus(wrapper); + _.memory.Free(wrapper); + return result; +} + +/// Sets new status for a lock with a given name. +/// +/// Can fail and return `false` in case lock was already fixed in the opposite +/// state. Otherwise returns `true`. +public final function bool SetLockStatus(BaseText lockName, LockStatus newStatus) { + local LockStatus previousStatus; + local Text lowerCaseName; + + if (lockName == none) { + return false; + } + if (commandLocks == none) { + commandLocks = _.collections.EmptyHashTable(); + } + lowerCaseName = lockName.LowerCopy(); + previousStatus = LockStatus(commandLocks.GetInt(lowerCaseName)); + if (previousStatus != LS_OpenFixed) { + commandLocks.SetInt(lowerCaseName, int(newStatus)); + lowerCaseName.FreeSelf(); + return true; + } + lowerCaseName.FreeSelf(); + return false; +} + +/// Sets new status for a lock with a given name. +/// +/// Can fail and return `false` in case lock was already fixed in the opposite +/// state. Otherwise returns `true`. +public final function bool SetLockStatus_S(string lockName, LockStatus newStatus) { + local bool result; + local MutableText wrapper; + + wrapper = _.text.FromStringM(lockName); + result = SetLockStatus(wrapper, newStatus); + _.memory.Free(wrapper); + return result; +} + +/// Closes a command lock with a given case-insensitive name, preventing it from +/// being opened again. +/// +/// Can fail and return `false` in case lock was already fixed in the opened +/// state. Otherwise returns `true`. +public final function bool Lock(BaseText lockName) { + return SetLockStatus(lockName, LS_ClosedFixed); +} + +/// Closes a command lock with a given case-insensitive name, preventing it from +/// being opened again. +/// +/// Can fail and return `false` in case lock was already fixed in the opened +/// state. Otherwise returns `true`. +public final function bool Lock_S(string lockName) { + return SetLockStatus_S(lockName, LS_ClosedFixed); +} + +/// Opens a command lock with a given case-insensitive name. +/// +/// Lock can still be closed after successful execution of this command. +/// +/// Can fail and return `false` in case lock was already fixed in the closed +/// state. Otherwise returns `true`. +public final function bool Unlock(BaseText lockName) { + return SetLockStatus(lockName, LS_Open); +} + +/// Opens a command lock with a given case-insensitive name. +/// +/// Lock can still be closed after successful execution of this command. +/// +/// Can fail and return `false` in case lock was already fixed in the closed +/// state. Otherwise returns `true`. +public final function bool Unlock_S(string lockName) { + return SetLockStatus_S(lockName, LS_Open); +} + +/// Opens a command lock with a given case-insensitive name, preventing it from +/// being closed again. +/// +/// Can fail and return `false` in case lock was already fixed in the closed +/// state. Otherwise returns `true`. +public final function bool BreakOpenLock(BaseText lockName) { + return SetLockStatus(lockName, LS_OpenFixed); +} + +/// Opens a command lock with a given case-insensitive name, preventing it from +/// being closed again. +/// +/// Can fail and return `false` in case lock was already fixed in the closed +/// state. Otherwise returns `true`. +public final function bool BreakOpenLock_S(string lockName) { + return SetLockStatus_S(lockName, LS_OpenFixed); +} + +/// Checks if a command lock with a given case-insensitive name is closed. +/// +/// If lock is closed, method returns `true` (regardless of whether or not it is +/// fixed in closed state) and `false` if its currently open. +public final function bool IsLocked(BaseText lockName) { + local LockStatus currentStatus; + currentStatus = GetLockStatus(lockName); + return (currentStatus == LS_ClosedFixed || currentStatus == LS_Closed); +} + +/// Checks if a command lock with a given case-insensitive name is closed. +/// +/// If lock is closed, method returns `true` (regardless of whether or not it is +/// fixed in closed state) and `false` if its currently open. +public final function bool IsLocked_S(string lockName) { + local LockStatus currentStatus; + currentStatus = GetLockStatus_S(lockName); + return (currentStatus == LS_ClosedFixed || currentStatus == LS_Closed); +} + +/// Returns array of names of all available commands. +/// +/// Resulting array can contain duplicates, but only if the same voting class +/// was registered by several different names. +public final function array< class > GetAllVotingClasses() { + local int i; + local array< class > intermediaryResult; + local array< class > result; + + if (VerifyVotingsTool()) { + intermediaryResult = votingsTool.GetAllItemClasses(); + for (i = 0; i < intermediaryResult.length; i += 1) { + result[result.length] = class(intermediaryResult[i]); + } + } + return result; +} + +/// Returns array of names of all available votings. +/// +/// Resulting array cannot contain duplicates. +public final function array GetAllVotingsNames() { + local array emptyResult; + + if (VerifyVotingsTool()) { + return votingsTool.GetItemsNames(); + } + return emptyResult; +} + +/// Registers given voting class, making it available via [`StartVoting()`]. +/// +/// Optionally a [`BaseText`] can be specified to be used as this voting's name. +/// If name parameter is omitted (specified as `none`) or constitutes an empty +/// [`BaseText`], a voting's preferred name will be used. +/// +/// If voting name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// # Errors +/// +/// Returns `true` if voting was successfully registered and `false` +/// otherwise`. +/// +/// If [`votingClass`] provides voting with a name that is already taken +/// (comparison is case-insensitive) by a different voting - a warning will be +/// logged and voting class won't be registered. +/// +/// If votingClass` provides voting with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and voting class +/// won't be registered. +public final function bool AddVoting( + class votingClass, + optional BaseText votingName +) { + local bool result; + local Text immutableVotingName; + + if (votingClass == none) { + return false; + } + if (VerifyVotingsTool()) { + if (votingName != none) { + immutableVotingName = votingName.Copy(); + } else { + immutableVotingName = votingClass.static.GetPreferredName(); + } + result = votingsTool.AddItemClass(votingClass, immutableVotingName); + if (result) { + onVotingAddedSignal.Emit(votingClass, immutableVotingName); + } + _.memory.Free(immutableVotingName); + } + return false; +} + +/// Registers given voting class, making it available via [`StartVoting()`]. +/// +/// Optionally a [`BaseText`] can be specified to be used as this voting's name. +/// If name parameter is omitted (specified as `none`) or constitutes an empty +/// [`BaseText`], a voting's preferred name will be used. +/// +/// Invalid name (according to [`BaseText::IsValidName()`] method) will prevent +/// the [`Voting`] from being authorized. +/// +/// # Errors +/// +/// Returns `true` if voting was successfully registered and `false` +/// otherwise`. +/// +/// If [`votingClass`] provides voting with a name that is already taken +/// (comparison is case-insensitive) by a different voting - a warning will be +/// logged and voting class won't be registered. +/// +/// If votingClass` provides voting with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and voting class +/// won't be registered. +public final function bool AddVoting_S( + class votingClass, + optional string votingName +) { + local bool result; + local MutableText wrapper; + + if (votingName != "") { + wrapper = _.text.FromStringM(votingName); + } + result = AddVoting(votingClass, wrapper); + _.memory.Free(wrapper); + return result; +} + +/// Registers given voting class, making it available via [`StartVoting()`]. +/// +/// Doesn't register voting immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. +/// +/// Optionally a [`BaseText`] can be specified to be used as this voting's name. +/// If name parameter is omitted (specified as `none`) or constitutes an empty +/// [`BaseText`], a voting's preferred name will be used. +/// +/// If voting name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// # Errors +/// +/// Returns `true` if voting was successfully registered and `false` +/// otherwise`. +/// +/// If [`votingClass`] provides voting with a name that is already taken +/// (comparison is case-insensitive) by a different voting - a warning will be +/// logged and voting class won't be registered. +/// +/// If votingClass` provides voting with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and voting class +/// won't be registered. +public final function AddVotingAsync( + class votingClass, + optional BaseText votingName +) { + local AsyncTask newTask; + + if (!VerifyVotingsTool()) { + return; + } + newTask.type = CAJT_AddVoting; + newTask.entityClass = votingClass; + if (votingName != none) { + newTask.entityName = votingName.Copy(); + } + pendingAsyncJobs[pendingAsyncJobs.length] = newTask; + if (registeringJob == none || registeringJob.IsCompleted()) { + _.memory.Free(registeringJob); + registeringJob = CommandRegistrationJob(_.memory.Allocate(class'CommandRegistrationJob')); + _.scheduler.AddJob(registeringJob); + } +} + +/// Registers given voting class, making it available via [`StartVoting()`]. +/// +/// Doesn't register votings immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. Different async calls from [`CommandAPI`] are +/// guaranteed to be handled in the order they were called. +/// +/// Optionally a [`BaseText`] can be specified to be used as this voting's name. +/// If name parameter is omitted (specified as `none`) or constitutes an empty +/// [`BaseText`], a voting's preferred name will be used. +/// +/// Invalid name (according to [`BaseText::IsValidName()`] method) will prevent +/// the [`Voting`] from being authorized. +/// +/// # Errors +/// +/// Returns `true` if voting was successfully registered and `false` +/// otherwise`. +/// +/// If [`votingClass`] provides voting with a name that is already taken +/// (comparison is case-insensitive) by a different voting - a warning will be +/// logged and voting class won't be registered. +/// +/// If votingClass` provides voting with an empty name (and it isn't +/// overridden by the argument) - a warning will be logged and voting class +/// won't be registered. +public final function AddVotingAsync_S( + class votingClass, + optional string votingName +) { + local MutableText wrapper; + + if (votingName != "") { + wrapper = _.text.FromStringM(votingName); + } + AddVotingAsync(votingClass, wrapper); + _.memory.Free(wrapper); +} + +/// Authorizes new user group to use the specified voting, optionally +/// specifying name of the config (config's class is determined by the +/// [`Voting`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Voting`] with a given name is added. +/// +/// If config name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// Function will return `true` if group was successfully authorized and +/// `false` otherwise (either group already authorized or no voting with +/// specified name was added in the caller tool). +/// +/// # Errors +/// +/// If specified group was already authorized to use a voting, then it +/// will log a warning message about it. +public function bool AuthorizeVotingUsage( + BaseText votingName, + BaseText groupName, + optional BaseText configName +) { + if (VerifyVotingsTool()) { + return votingsTool.AuthorizeUsage(votingName, groupName, configName); + } + return false; +} + +/// Authorizes new user group to use the specified voting, optionally +/// specifying name of the config (config's class is determined by the +/// [`Voting`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Voting`] with a given name is added. +/// +/// If this config name is specified as empty, then "default" will be +/// used instead. For non-empty values, an invalid name (according to +/// [`BaseText::IsValidName()`] method) will prevent the group from being +/// authorized. +/// +/// Function will return `true` if group was successfully authorized and +/// `false` otherwise (either group already authorized or no voting with +/// specified name was added in the caller tool). +/// +/// # Errors +/// +/// If specified group was already authorized to use a voting, then it +/// will log a warning message about it. +public function bool AuthorizeVotingUsage_S( + string votingName, + string userGroupName, + optional string configName +) { + local bool result; + local MutableText wrapperVotingName, wrapperGroupName, wrapperConfigName; + + wrapperVotingName = _.text.FromStringM(votingName); + wrapperGroupName = _.text.FromStringM(userGroupName); + wrapperConfigName = _.text.FromStringM(configName); + result = AuthorizeVotingUsage(wrapperVotingName, wrapperGroupName, wrapperConfigName); + _.memory.Free3(wrapperVotingName, wrapperGroupName, wrapperConfigName); + return result; +} + +/// Authorizes new user group to use the specified voting, optionally +/// specifying name of the config (config's class is determined by the +/// [`Voting`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Voting`] with a given name is added. +/// +/// Doesn't authorize votings immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. Different async calls from [`CommandAPI`] are +/// guaranteed to be handled in the order they were called. +/// +/// If config name is specified as `none`, then "default" will be +/// used instead. For non-`none` values, invalid name (according to +/// [`BaseText::IsValidName()`] method) will also prevent the group from being +/// authorized. +/// +/// Function will return `true` if group was successfully authorized and +/// `false` otherwise (either group already authorized or no voting with +/// specified name was added in the caller tool). +/// +/// # Errors +/// +/// If specified group was already authorized to use a voting, then it +/// will log a warning message about it. +public function AuthorizeVotingUsageAsync( + BaseText votingName, + BaseText userGroupName, + optional BaseText configName +) { + local AsyncTask newTask; + + if (!VerifyVotingsTool()) { + return; + } + newTask.type = CAJT_AuthorizeVoting; + if (votingName != none) { + newTask.entityName = votingName.Copy(); + } + if (userGroupName != none) { + newTask.userGroup = userGroupName.Copy(); + } + if (configName != none) { + newTask.configName = configName.Copy(); + } + pendingAsyncJobs[pendingAsyncJobs.length] = newTask; + if (registeringJob == none || registeringJob.IsCompleted()) { + _.memory.Free(registeringJob); + registeringJob = CommandRegistrationJob(_.memory.Allocate(class'CommandRegistrationJob')); + _.scheduler.AddJob(registeringJob); + } +} + +/// Authorizes new user group to use the specified voting, optionally +/// specifying name of the config (config's class is determined by the +/// [`Voting`]'s class) that describes permissions of that group. +/// +/// Method must be called after [`Voting`] with a given name is added. +/// +/// Doesn't authorize votings immediately, instead scheduling it to be done at +/// a later moment in time. This can help to reduce amount of work we do every +/// tick during server startup, therefore avoiding crashed due to the faulty +/// infinite loop detection. Different async calls from [`CommandAPI`] are +/// guaranteed to be handled in the order they were called. +/// +/// If this config name is specified as empty, then "default" will be +/// used instead. For non-empty values, an invalid name (according to +/// [`BaseText::IsValidName()`] method) will prevent the group from being +/// authorized. +/// +/// Function will return `true` if group was successfully authorized and +/// `false` otherwise (either group already authorized or no voting with +/// specified name was added in the caller tool). +/// +/// # Errors +/// +/// If specified group was already authorized to use a voting, then it +/// will log a warning message about it. +public function AuthorizeVotingUsageAsync_S( + string votingName, + string userGroupName, + optional string configName +) { + local MutableText wrapperVotingName, wrapperGroupName, wrapperConfigName; + + wrapperVotingName = _.text.FromStringM(votingName); + wrapperGroupName = _.text.FromStringM(userGroupName); + wrapperConfigName = _.text.FromStringM(configName); + AuthorizeVotingUsageAsync(wrapperVotingName, wrapperGroupName, wrapperConfigName); + _.memory.Free3(wrapperVotingName, wrapperGroupName, wrapperConfigName); +} + +/// Removes voting of given class from the list of registered votings. +/// +/// Removing once registered votings is not an action that is expected to +/// be performed under normal circumstances and does not have an efficient +/// implementation (it is linear on the current amount of votings). +/// +/// Returns `true` if successfully removed registered [`Voting`] class and +/// `false` otherwise (voting wasn't registered). +public final function bool RemoveVoting(class votingClass) { + local bool result; + + if (VerifyVotingsTool()) { + result = votingsTool.RemoveItemClass(votingClass); + if (result) { + onVotingRemovedSignal.Emit(votingClass); + } + } + return result; +} + +/// Returns class of the [`Voting`] that was added under a specified name. +public final function class GetVotingClass(BaseText itemName) { + local ItemConfigInfo intermediaryResult; + + if (VerifyVotingsTool()) { + intermediaryResult = votingsTool.ResolveItem(itemName, none); + return class(intermediaryResult.class); + } + return none; +} + +/// Returns pair of [`Voting`] and config name based on a given +/// case-insensitive name and text ID of the caller player. +/// +/// Function only returns `none` for [`Voting`] instance if [`Voting`] with +/// a given name wasn't found. +public final function VotingConfigInfo ResolveVotingForTextID(BaseText itemName, BaseText id) { + local ItemConfigInfo intermediaryResult; + local VotingConfigInfo result; + + if (VerifyVotingsTool()) { + intermediaryResult = votingsTool.ResolveItem(itemName, id); + result.votingClass = class(intermediaryResult.class); + if (result.votingClass == none) { + return result; + } + if (intermediaryResult.configName == none) { + result.usageForbidden = true; + } else { + result.config = result.votingClass.static.LoadConfig(intermediaryResult.configName); + _.memory.Free(intermediaryResult.configName); + } + } + return result; +} + +/// Returns pair of [`Voting`] and config name based on a given +/// case-insensitive name and text ID of the caller player. +/// +/// Function only returns `none` for [`Voting`] instance if [`Voting`] with +/// a given name wasn't found. +public final function VotingConfigInfo ResolveVotingForTextID_S(string itemName, string id) { + local VotingConfigInfo result; + local MutableText wrapperItemname, wrapperID; + + wrapperItemname = _.text.FromStringM(itemName); + wrapperID = _.text.FromStringM(id); + result = ResolveVotingForTextID(wrapperItemname, wrapperID); + _.memory.Free2(wrapperItemname, wrapperID); + return result; +} + +/// Returns pair of [`Voting`] and config name based on a given +/// case-insensitive name and [`UserID`] of the caller player. +/// +/// Function only returns `none` for [`Voting`] instance if [`Voting`] with +/// a given name wasn't found or provided id was `none`. +public final function VotingConfigInfo ResolveVotingForUserID(BaseText itemName, UserID id) { + local VotingConfigInfo result; + local Text textID; + + if (itemName == none) return result; + if (id == none) return result; + textID = id.GetUniqueID(); + if (textID == none) return result; + + result = ResolveVotingForTextID(itemName, textID); + textID.FreeSelf(); + return result; +} + +/// Returns pair of [`Voting`] and config name based on a given +/// case-insensitive name and [`UserID`] of the caller player. +/// +/// Function only returns `none` for [`Voting`] instance if [`Voting`] with +/// a given name wasn't found or provided id was `none`. +public final function VotingConfigInfo ResolveVotingForUserID_S(string itemName, UserID id) { + local VotingConfigInfo result; + local MutableText wrapper; + + wrapper = _.text.FromStringM(itemName); + result = ResolveVotingForUserID(wrapper, id); + _.memory.Free(wrapper); + return result; +} + +/// Returns pair of [`Voting`] and config name based on a given +/// case-insensitive name and [`User`] of the caller player. +/// +/// Function only returns `none` for [`Voting`] instance if [`Voting`] with +/// a given name wasn't found or provided id was `none`. +public final function VotingConfigInfo ResolveVotingForUser(BaseText itemName, User user) { + local VotingConfigInfo result; + local UserID id; + + if (itemName == none) return result; + if (user == none) return result; + + id = user.GetID(); + result = ResolveVotingForUserID(itemName, id); + _.memory.Free(id); + return result; +} + +/// Returns pair of [`Voting`] and config name based on a given +/// case-insensitive name and [`User`] of the caller player. +/// +/// Function only returns `none` for [`Voting`] instance if [`Voting`] with +/// a given name wasn't found or provided id was `none`. +public final function VotingConfigInfo ResolveVotingForUser_S(string itemName, User user) { + local VotingConfigInfo result; + local MutableText wrapper; + + wrapper = _.text.FromStringM(itemName); + result = ResolveVotingForUser(wrapper, user); + _.memory.Free(wrapper); + return result; +} + +/// Starts a voting process with a given name and arguments. +public final function StartVotingResult StartVoting( + VotingConfigInfo votingData, + optional HashTable arguments) { + if (VerifyVotingsTool()) { + return votingsTool.StartVoting(votingData, arguments); + } + return SVR_InvalidState; +} + +/// Returns instance of the active voting. +/// +/// `none` iff no voting is currently active. +public final function Voting GetCurrentVoting() { + if (VerifyVotingsTool()) { + return votingsTool.GetCurrentVoting(); + } + return none; +} + +// DO NOT CALL MANUALLY +public final /*internal*/ function AsyncTask _popPending() { + local AsyncTask result; + + if (pendingAsyncJobs.length == 0) { + return result; + } + result = pendingAsyncJobs[0]; + pendingAsyncJobs.Remove(0, 1); + return result; +} + +// DO NOT CALL MANUALLY +public final /*internal*/ function _reloadFeature() { + local Commands_Feature commandsFeature; + local CommandFeatureTools toolsBundle; + + commandsToolLifeVersion = -1; + votingsToolLifeVersion = -1; + commandsFeature = Commands_Feature(class'Commands_Feature'.static.GetEnabledInstance()); + if (commandsFeature != none) { + toolsBundle = commandsFeature._borrowTools(); + commandsTool = toolsBundle.commands; + if (commandsTool != none) { + commandsToolLifeVersion = commandsTool.GetLifeVersion(); + } + votingsTool = toolsBundle.votings; + if (votingsTool != none) { + votingsToolLifeVersion = votingsTool.GetLifeVersion(); + } + } + _.memory.Free(commandsFeature); +} + +private final function bool VerifyCommandsTool() { + if (commandsTool == none) { + return false; + } + if (!commandsTool.IsAllocated() || commandsTool.GetLifeVersion() != commandsToolLifeVersion) { + commandsTool = none; + return false; + } + return true; +} + +private final function bool VerifyVotingsTool() { + if (votingsTool == none) { + return false; + } + if (!votingsTool.IsAllocated() || votingsTool.GetLifeVersion() != votingsToolLifeVersion) { + votingsTool = none; + return false; + } + return true; +} + +defaultproperties { +} \ No newline at end of file diff --git a/rottlib/tests/fixtures/DBRecord.uc b/rottlib/tests/fixtures/DBRecord.uc new file mode 100644 index 0000000..c9b9688 --- /dev/null +++ b/rottlib/tests/fixtures/DBRecord.uc @@ -0,0 +1,1199 @@ +/** + * This should be considered an internal class and a detail of + * implementation. + * This is a data object that is used to store JSON data inside + * Unreal Engine's save packages (see `GameInfo` class, starting from + * `CreateDataObject()` method). + * Auxiliary data object that can store either a JSON array or an object in + * the local Acedia database. It is supposed to be saved and loaded + * to / from packages. + * Copyright 2021-2023 Anton Tarasenko + *------------------------------------------------------------------------------ + * This file is part of Acedia. + * + * Acedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License, or + * (at your option) any later version. + * + * Acedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Acedia. If not, see . + */ +class DBRecord extends Object + perobjectconfig + config(AcediaDB); + +/** + * # How can JSON information be stored in per-config-objects? + * Standard way to store information locally would be by simply recording + * it inside a config file. This is enough for almost anything. + * Even something like ServerPerks' player database is implemented with just + * a per-config-objects: since it just stores a particular data per player - + * it can do with simply creating one storage object per player. + * We, however, want to store an arbitrary JSON object inside our database + * that can contain any other kind of JSON data and not just player's + * numeric stats. With some additional work this again can also be done with + * per-config-objects. For example, if we want to store a JSON object inside + * another JSON object - we can create both of them separately, give them some + * different arbitrary names and then make the first one refer to the second + * one by it's given name. + * This way we can create a config object for each JSON array/object and + * then store it's data as an array of plain types (same as ServerPerks' one): + * null, boolean, number and string can be stored as is and other + * JSON arrays/objects can be stored by their references. + * + * # Why are we using data objects instead of per-object-configs? + * Despite everything described above, Acedia's local databases DO NOT use + * per-object-configs to store their data, opting for data objects and + * Unreal Engine's save packages instead. + * Data objects can be created, loaded and saved inside Unreal Engine's + * binary packages with methods available from `GameInfo` class (look them up + * starting from `CreateDataObject()` or browsing through + * [wiki](https://wiki.beyondunreal.com/Legacy:DataObject)). + * They can essentially act the same as per-object-configs, but have + * an advantage of allowing us to cheaply (execution time-wise) create/delete + * as many objects as we need and then update their package on the disk instead + * of calling `SaveConfig()` or `ClearConfig()` on them one-by-one. This both + * simplifies and speed up a bunch of necessary operations. + * They also seem to behave more predictably. + * + * # Some terminology + * Acedia's objects (representing JSON values) that are getting loaded + * into the `DBRecord`s are called "objects". We then refer to their + * representation within `DBRecord`s as "items". For example, this class has + * two methods for conversion between the two: `ConvertObjectToItem()` and + * `ConvertItemToObject()`. + * Most other methods are: + * 1. either methods that actually perform Acedia's database queries; + * 2. or methods that provide safe and easy access to the `DBRecord`'s + * items array (like making sure to remove unneeded data objects). + * All of the methods that perform database query rely on the + * `ConvertPointer()` method that take `JSONPointer` and convert it into + * internal pointer representation that immediately points at `DBRecord` that + * represents referred data (or contains it). + */ + +// Name of the database package this object belongs to +var private string package; +// Does this record store a JSON array (`true`) or object (`false`)? +var private bool isJSONArray; + +// `ToCollection()` and `EraseSelf()` methods make recursive calls on their +// "sub-objects" (referred via name). If database was somehow damaged - a loop +// of references can occur, leading to infinite recursive calls (which results +// in a crash). These variable help to avoid that by preventing re-entry into +// these methods for the same object. +var private bool lockToCollection; +var private bool lockEraseSelf; + +/** + * We pack as much information into the type of the record: + * whether it's 'null', 'boolean', 'number', 'string' or reference to another + * `DBRecord`. + * If it's 'boolean', then record value in the type + * (`DBAT_False` / `DBAT_True`), if `number` record whether it's `int` or + * `float`. + * While JSON does not distinguish between `int` and `float`, we still + * have to pick one of these type when transferring JSON numeric value into + * UnrealScript, plus it's easier for us to store it in one of these types. +*/ +enum DBDataType +{ + DBAT_Null, + DBAT_False, + DBAT_True, + DBAT_Int, + DBAT_Float, + DBAT_String, + // We actually store the name of another `DBRecord` that represents either + // sub-array or sub-object. + DBAT_Reference, + // Some integer values we might want to store won't fit into `int`, so we + // store them as `BigIntData` + DBAT_BigInt, +}; + +/** + * Store JSON array / object as a bunch of values. + * Which variable is used to store value depends on the type `t`. + */ +struct StorageItem +{ + // Determines whether variable's value is stored in `i`, `f` or `s`. + var DBDataType t; + // For JSON objects only (`isJSONArray == false`), stores the key of + // corresponding value. + var string k; + var int i; + var float f; + // For both `DBRecord` references and JSON strings + var string s; + // For storing `BigInt`'s `BigIntData` - last `byte` stores `negative` + // value + var array b; +}; +var private config array storage; + +var private const int LATIN_LETTERS_AMOUNT; +var private const int LOWER_A_CODEPOINT, UPPER_A_CODEPOINT; +var private const string JSONPOINTER_NEW_ARRAY_ELEMENT; + +/** + * Since `DBRecord` represents JSON array or object, we can use + * JSON pointers to refer to any sub-value inside it. + * However, JSON pointers are not convenient or efficient enough for that, + * so internally we use this struct that provides quick and easy access to + * any sub-value. + */ +struct DBRecordPointer +{ + // `DBRecord` inside which referred value is directly stored. + // `record == none` automatically makes `DBRecordPointer` invalid. + var DBRecord record; + // Index in `record`'s `storage` variable that corresponds to + // referred (simple) value. + // Negative `index` values mean `record` itself is pointed at. + // To point at JSON array / object represented by a `DBRecord`, always set + // `record` to that record and `index` to negative value (e.g. `-1`). + var int index; +}; + +private final function bool IsValidPointer(DBRecordPointer pointer) +{ + return pointer.record != none; +} + +private final function bool IsPointerToRecord(DBRecordPointer pointer) +{ + return (pointer.record != none && pointer.index < 0); +} + +// Auxiliary method serving as a simple constructor. +private final function DBRecordPointer MakeRecordPointer( + DBRecord record, + optional int index) +{ + local DBRecordPointer pointer; + pointer.record = record; + pointer.index = index; + return pointer; +} + +// Converts `JSONPointer` into our internal representation. +private final function DBRecordPointer ConvertPointer(BaseJSONPointer jsonPointer) +{ + if (jsonPointer == none) { + return MakeRecordPointer(none); + } + return ConvertPointerPath(jsonPointer, 0, jsonPointer.GetLength()); +} + +// Produced out internal pointer representation `DBRecordPointer` to +// the container that stores object, referred to by a given `JSONPointer`. +private final function DBRecordPointer ConvertContainerPointer(BaseJSONPointer jsonPointer) +{ + local DBRecordPointer pointer; + if (jsonPointer == none) { + return MakeRecordPointer(none); + } + pointer = ConvertPointerPath(jsonPointer, 0, jsonPointer.GetLength() - 1); + if (!IsPointerToRecord(pointer)) { + pointer.record = none; // invalidate pointer + } + return pointer; +} + +// Converts `JSONPointer` into internal `DBRecordPointer`. +// Only uses sub-pointer: components from `startIndex` to `endIndex`. +private final function DBRecordPointer ConvertPointerPath( + BaseJSONPointer pointer, + int startIndex, + int endIndex) +{ + local int index; + local StorageItem nextElement; + local DBRecord nextRecord; + local string nextComponent; + if (pointer == none) { + return MakeRecordPointer(none); + } + // We are done! + if (startIndex >= endIndex) { + return MakeRecordPointer(self, -1); + } + // Use first available to us component to find next sub-object + if (isJSONArray) + { + index = pointer.GetNumericComponent(startIndex); + if (index < 0 || index >= storage.length) { + return MakeRecordPointer(none); // fail: out-of-bounds index + } + } + else + { + nextComponent = __().text.IntoString(pointer.GetComponent(startIndex)); + index = FindItem(nextComponent); + } + if (index < 0) { + return MakeRecordPointer(none); // fail: missing key for component + } + nextElement = storage[index]; + if (nextElement.t != DBAT_Reference) + { + if (startIndex + 1 >= endIndex) { + return MakeRecordPointer(self, index); + } + // fail: found value cannot contain sub-values, + // but pointer is not exhausted + return MakeRecordPointer(none); + } + nextRecord = LoadRecordFor(nextElement.s, package); + if (nextRecord == none) { + return MakeRecordPointer(none); // fail: bad database + } + // Success for the component, do recursive call + startIndex += 1; + return nextRecord.ConvertPointerPath(pointer, startIndex, endIndex); +} + +public static final function Global __() +{ + return class'Global'.static.GetInstance(); +} + +public static final function CoreGlobal __level() +{ + return class'CoreGlobal'.static.GetGenericInstance(); +} + +/** + * Method for creating a new `DBRecord` in a package named `dbPackageName`, + * picking an appropriate and unique name for it. + * + * @param dbPackageName Name of the package new `DBRecord` must belong to. + * @return New `DBRecord`, created in specified package. + * `none` iff `dbPackageName == none`. + */ +public final static function DBRecord NewRecord(BaseText dbPackageName) +{ + if (dbPackageName == none) { + return none; + } + return NewRecordFor(dbPackageName.ToString()); +} + +// Auxiliary method that does what `NewRecord()` does, but for `string` +// parameter. This makes it cheaper to call for internal use. +private final static function DBRecord NewRecordFor(string dbPackageName) +{ + local string nextName; + local DBRecord recordCandidate; + // Try to generate new random name. + // This cycle can in theory be infinite. However in practice it will + // only run for one iteration (unless user messed with settings and + // set length of randomized names too low), since by default there is + // 26^20 == 19,928,148,895,209,409,152,340,197,376 different + // random names and the chance of duplicate in infinitesimal. + while (true) + { + nextName = GetRandomName(); + recordCandidate = LoadRecordFor(nextName, dbPackageName); + if (recordCandidate != none) { + continue; + } + recordCandidate = __level().unreal_api().GetGameType() + .CreateDataObject(class'DBRecord', nextName, dbPackageName); + recordCandidate.package = dbPackageName; + return recordCandidate; + } + // We cannot actually reach here + return none; +} + +public final static function DBRecord LoadRecord( + BaseText recordName, + BaseText dbPackageName) +{ + if (dbPackageName == none) return none; + if (recordName == none) return none; + + return LoadRecordFor( recordName.ToString(), + dbPackageName.ToString()); +} + +// Auxiliary method that does what `LoadRecord()` does, but for `string` +// parameter. This makes it cheaper to call for internal use. +private final static function DBRecord LoadRecordFor( + string name, + string package) +{ + return __level().unreal_api().GetGameType() + .LoadDataObject(class'DBRecord', name, package); +} + +private final static function string GetRandomName() +{ + local int i; + local int length; + local string result; + length = Max(1, class'LocalDBSettings'.default.randomNameLength); + for (i = 0; i < length; i += 1) { + result = result $ GetRandomLetter(); + } + return result; +} + +private final static function string GetRandomLetter() +{ + return Chr(Rand(default.LATIN_LETTERS_AMOUNT) + default.LOWER_A_CODEPOINT); +} + +/** + * Loads Acedia's representation of JSON value stored at `pointer` inside + * the JSON object/array represented by the caller `DBRecord`. + * + * @param jsonPointer JSON pointer to the value to load + * (either simple, array or object one). + * @param result Loaded value will be recorded inside this variable. + * Set to `none` on failure. + * @param makeMutable `false` if you want simple value to be recorded as + * immutable "boxes" (and `Text` for JSON strings) and `true` if you want + * them to be recorded as mutable "references" + * (`MutableText` for JSON strings). + * @return `true` if method successfully loaded JSON value and + * `false` otherwise. Failure can happen if passed `pointer` is invalid + * (either does not point at any existing value or is equal to `none`). + */ +public final function bool LoadObject( + BaseJSONPointer jsonPointer, + out AcediaObject result, + bool makeMutable) +{ + local int itemIndex; + local DBRecord container; + local DBRecordPointer pointer; + if (jsonPointer == none) return false; + pointer = ConvertPointer(jsonPointer); + if (!IsValidPointer(pointer)) return false; + + if (IsPointerToRecord(pointer)) { + result = pointer.record.ToCollection(makeMutable); + } + else + { + itemIndex = pointer.index; + container = pointer.record; + result = ConvertItemToObject(container.GetItem(itemIndex), makeMutable); + } + return true; +} + +/** + * Saves Acedia's representation of JSON value at a `pointer` inside + * the JSON object/array represented by the caller `DBRecord`. + * + * @param jsonPointer JSON pointer to location at which to save the value. + * Only the last segment of the path will be created (if missing), the rest + * must already exist and will not be automatically created. + * If another value is already recorded at `pointer` - it will be erased. + * @param newItem New value to save at `pointer` inside + * the caller `DBRecord`. + * @return `true` if method successfully saved new JSON value and + * `false` otherwise. Failure can happen if passed `pointer` is invalid + * (either missing some necessary segments or is equal to `none`). + */ +public final function bool SaveObject( + BaseJSONPointer jsonPointer, + AcediaObject newItem) +{ + local int index; + local string itemKey; + local DBRecord directContainer; + local Collection newItemAsCollection; + local DBRecordPointer pointer; + if (jsonPointer == none) { + return false; + } + if (jsonPointer.IsEmpty()) + { + // Special case - rewriting caller `DBRecord` itself + newItemAsCollection = Collection(newItem); + if (newItemAsCollection == none) { + return false; + } + EmptySelf(); + isJSONArray = (newItemAsCollection.class == class'ArrayList'); + FromCollection(newItemAsCollection); + return true; + } + pointer = ConvertContainerPointer(jsonPointer); + if (!IsValidPointer(pointer)) { + return false; + } + directContainer = pointer.record; + itemKey = __().text.IntoString(jsonPointer.Peek()); + if (directContainer.isJSONArray) + { + index = jsonPointer.PeekNumeric(); + if (index < 0 && itemKey == JSONPOINTER_NEW_ARRAY_ELEMENT) { + index = directContainer.GetStorageLength(); + } + if (index < 0) { + return false; + } + } + else { + index = directContainer.FindItem(itemKey); + } + directContainer.SetItem(index, ConvertObjectToItem(newItem), itemKey); + return true; +} + +/** + * Removes Acedia's values stored in the database at `pointer` inside + * the JSON object/array represented by the caller `DBRecord`. + * + * @param jsonPointer JSON pointer to the value to remove + * (either simple, array or object one). + * @return `true` if method successfully removed JSON value and + * `false` otherwise. Failure can happen if passed `pointer` is invalid + * (either does not point at any existing value or equal to `none`). + */ +public final function bool RemoveObject(BaseJSONPointer jsonPointer) +{ + local int itemIndex; + local string itemKey; + local DBRecord directContainer; + local DBRecordPointer containerPointer; + if (jsonPointer == none) return false; + containerPointer = ConvertContainerPointer(jsonPointer); + if (!IsValidPointer(containerPointer)) return false; + + directContainer = containerPointer.record; + if (directContainer.isJSONArray) { + itemIndex = jsonPointer.PeekNumeric(); + } + else + { + itemKey = __().text.IntoString(jsonPointer.Peek()); + itemIndex = directContainer.FindItem(itemKey); + } + if (itemIndex >= 0) + { + directContainer.RemoveItem(itemIndex); + return true; + } + return false; +} + +/** + * Checks type of the JSON value stored at `pointer` inside + * the JSON object/array represented by the caller `DBRecord`. + * + * @param jsonPointer JSON pointer to the value for which type + * should be checked. + * @return `Database.DataType` that corresponds to the type of referred value. + * `JSON_Undefined` if value is missing or passed pointer is invalid. + */ +public final function LocalDatabaseInstance.DataType GetObjectType( + BaseJSONPointer jsonPointer) +{ + local DBRecord directContainer; + local DBRecordPointer pointer; + if (jsonPointer == none) return JSON_Undefined; + pointer = ConvertPointer(jsonPointer); + if (!IsValidPointer(pointer)) return JSON_Undefined; + + if (IsPointerToRecord(pointer)) + { + if (pointer.record.isJSONArray) { + return JSON_Array; + } + else { + return JSON_Object; + } + } + directContainer = pointer.record; + switch (directContainer.GetItem(pointer.index).t) + { + case DBAT_Null: + return JSON_Null; + case DBAT_False: + case DBAT_True: + return JSON_Boolean; + case DBAT_Int: + case DBAT_Float: + return JSON_Number; + case DBAT_String: + return JSON_String; + } + // We should not reach here + return JSON_Undefined; +} + +/** + * Returns "size" of the JSON value stored at `pointer` inside + * the JSON object/array represented by the caller `DBRecord`. + * + * For JSON arrays and objects it's the amount of stored elements. + * For other values it's considered undefined and method returns negative + * value instead. + * + * @param jsonPointer JSON pointer to the value for which method should + * return size. + * @return If `pointer` refers to the JSON array or object - amount of it's + * elements is returned. Otherwise returns `-1`. + */ +public final function int GetObjectSize(BaseJSONPointer jsonPointer) +{ + local DBRecordPointer pointer; + if (jsonPointer == none) { + return -1; + } + pointer = ConvertPointer(jsonPointer); + if (IsPointerToRecord(pointer)) { + return pointer.record.GetStorageLength(); + } + return -1; +} + +/** + * Returns keys of the JSON object stored at `pointer` inside + * the JSON object/array represented by the caller `DBRecord`. + * + * @param jsonPointer JSON pointer to the value for which method should + * return size. + * @return If `pointer` refers to the JSON object - all available keys. + * `none` otherwise (including case of JSON arrays). + */ +public final function ArrayList GetObjectKeys(BaseJSONPointer jsonPointer) +{ + local int i; + local ArrayList resultKeys; + local array items; + local DBRecord referredObject; + local DBRecordPointer pointer; + if (jsonPointer == none) return none; + pointer = ConvertPointer(jsonPointer); + if (!IsValidPointer(pointer)) return none; + if (!IsPointerToRecord(pointer)) return none; + referredObject = pointer.record; + if (referredObject.isJSONArray) return none; + + resultKeys = __().collections.EmptyArrayList(); + items = referredObject.storage; + for (i = 0; i < items.length; i += 1) { + resultKeys.AddString(items[i].k); + } + return resultKeys; +} + +/** + * Increments JSON value at a `pointer` inside the JSON object/array + * represented by the caller `DBRecord` by a given Acedia's value. + * + * For "increment" operation description refer to `Database.IncrementData()`. + * + * @param jsonPointer JSON pointer to location at which to save the value. + * Only the last segment of the path might be created (if missing), + * the rest must already exist and will not be automatically created. + * If another value is already recorded at `pointer` - it will be erased. + * @param object Value by which to increment another value, stored at + * `pointer` inside the caller `DBRecord`. + * @return Returns query result that is appropriate for "increment" operation, + * according to `Database.IncrementData()` specification. + */ +public final function Database.DBQueryResult IncrementObject( + BaseJSONPointer jsonPointer, + AcediaObject object) +{ + local int index; + local string itemKey; + local DBRecord directContainer; + local HashTable objectAsHashTable; + local DBRecordPointer pointer; + if (jsonPointer == none) { + return DBR_InvalidPointer; + } + if (jsonPointer.IsEmpty()) + { + // Special case - incrementing caller `DBRecord` itself + objectAsHashTable = HashTable(object); + if (objectAsHashTable == none) { + return DBR_InvalidData; + } + FromCollection(objectAsHashTable); + return DBR_Success; + } + // All the work will be done by the separate `IncrementItem()` method; + // But it is applied to the `DBRecord` that contains referred item, + // so we have to find it. + pointer = ConvertContainerPointer(jsonPointer); + if (!IsValidPointer(pointer)) { + return DBR_InvalidPointer; + } + directContainer = pointer.record; + itemKey = __().text.IntoString(jsonPointer.Peek()); + if (directContainer.isJSONArray) + { + index = jsonPointer.PeekNumeric(); + if (index < 0 && itemKey == JSONPOINTER_NEW_ARRAY_ELEMENT) { + index = directContainer.GetStorageLength(); + } + if (index < 0) { + return DBR_InvalidPointer; + } + } + else { + index = directContainer.FindItem(itemKey); + } + if (directContainer.IncrementItem(index, object, itemKey)) { + return DBR_Success; + } + return DBR_InvalidData; +} + +private final function StorageItem GetItem(int index) +{ + local StorageItem emptyResult; + if (index < 0) return emptyResult; + if (index >= storage.length) return emptyResult; + + return storage[index]; +} + +// Negative `index` means that value will need to be appended to the end +// of the `storage`. +// Optionally lets you specify item's key (via `itemName`) for +// JSON objects. +private final function SetItem( + int index, + StorageItem newItem, + optional string itemName) +{ + local DBRecord oldRecord; + local StorageItem oldItem; + if (index < 0) { + index = storage.length; + } + if (index < storage.length) + { + // Clean up old value + oldItem = storage[index]; + if (oldItem.t == DBAT_Reference) + { + oldRecord = LoadRecordFor(oldItem.s, package); + if (oldRecord != none) { + oldRecord.EmptySelf(); + } + __level().unreal_api().GetGameType() + .DeleteDataObject(class'DBRecord', oldItem.s, package); + } + } + storage[index] = newItem; + storage[index].k = itemName; +} + +// Auxiliary getter that helps us avoid referring to `storage` array +// directly from `DBRecord` reference, which would cause unnecessary copying of +// it's data. +private final function int GetStorageLength() +{ + return storage.length; +} + +// Auxiliary method for removing items from `storage` array that helps us +// avoid referring to it directly from `DBRecord` reference, which would cause +// unnecessary copying of it's data. +private final function RemoveItem(int index) +{ + local DBRecord oldRecord; + local StorageItem oldItem; + if (index >= storage.length) return; + if (index < 0) return; + + // Clean up old value + oldItem = storage[index]; + if (oldItem.t == DBAT_Reference) + { + oldRecord = LoadRecordFor(oldItem.s, package); + if (oldRecord != none) { + oldRecord.EmptySelf(); + } + __level().unreal_api().GetGameType() + .DeleteDataObject(class'DBRecord', oldItem.s, package); + } + storage.Remove(index, 1); +} + +private final function int FindItem(string itemName) +{ + local int index; + if (isJSONArray) { + return -1; + } + for (index = 0; index < storage.length; index += 1) + { + if (storage[index].k == itemName) { + return index; + } + } + return -1; +} + +// Negative `index` means that `object` value needs to be appended to the +// end of the `storage`, instead of incrementing an existing value. +// Returns `true` if changes were successfully made and `false` otherwise. +private final function bool IncrementItem( + int index, + AcediaObject object, + optional string itemName) +{ + local StorageItem itemToIncrement; + if (index < 0) + { + index = storage.length; + // `itemToIncrement` is blank at this point and has type `DBAT_Null`, + // which will simply be rewritten by `IncrementItemByObject()` + // call later + storage[index] = itemToIncrement; + } + else if (index < storage.length) { + itemToIncrement = storage[index]; + } + if (IncrementItemByObject(itemToIncrement, object)) + { + // Increment object cannot overwrite existing `DBRecord` with + // other value, so it's safe to skip cleaning check + storage[index] = itemToIncrement; + storage[index].k = itemName; + return true; + } + return false; +} + +/** + * Extracts JSON object or array data from caller `DBRecord` as either + * `HashTable` (for JSON objects) or `ArrayList` (for JSON arrays). + * + * Type conversion rules in immutable case: + * 1. 'null' -> `none`; + * 2. 'boolean' -> `BoolBox`; + * 3. 'number' -> either `IntBox` or `FloatBox`, depending on + * what seems to fit better; + * 4. 'string' -> `Text`; + * 5. 'array' -> `ArrayList`; + * 6. 'object' -> `HashTable`. + * + * Type conversion rules in mutable case: + * 1. 'null' -> `none`; + * 2. 'boolean' -> `BoolRef`; + * 3. 'number' -> either `IntRef` or `FloatRef`, depending on + * what seems to fit better; + * 4. 'string' -> `MutableText`; + * 5. 'array' -> `ArrayList`; + * 6. 'object' -> `HashTable`. + * + * @param makeMutable `false` if you want this method to produce + * immutable types and `true` otherwise. + * @return `HashTable` if caller `DBRecord` represents a JSON object + * and `ArrayList` if it represents JSON array. + * Returned collection must have all of it's keys deallocated before being + * discarded. + * `none` iff caller `DBRecord` was not initialized as either. + */ +public final function Collection ToCollection(bool makeMutable) +{ + local Collection result; + if (lockToCollection) { + return none; + } + lockToCollection = true; + if (isJSONArray) { + result = ToArrayList(makeMutable); + } + else { + result = ToHashTable(makeMutable); + } + lockToCollection = false; + return result; +} + +// Does not do any validation check, assumes caller `DBRecord` +// represents an array. +private final function Collection ToArrayList(bool makeMutable) +{ + local int i; + local ArrayList result; + local AcediaObject nextObject; + result = __().collections.EmptyArrayList(); + for (i = 0; i < storage.length; i += 1) + { + nextObject = ConvertItemToObject(storage[i], makeMutable); + result.AddItem(nextObject); + __().memory.Free(nextObject); + } + return result; +} + +// Does not do any validation check, assumes caller `DBRecord` +// represents an object. +private final function Collection ToHashTable(bool makeMutable) +{ + local int i; + local HashTable result; + local Text nextKey; + local AcediaObject nextObject; + result = __().collections.EmptyHashTable(); + for (i = 0; i < storage.length; i += 1) + { + nextKey = __().text.FromString(storage[i].k); + nextObject = ConvertItemToObject(storage[i], makeMutable); + result.SetItem(nextKey, nextObject); + __().memory.Free(nextKey); + __().memory.Free(nextObject); + } + return result; +} + +/** + * Completely erases all data inside a caller `DBRecord`, recursively deleting + * all referred `DBRecord`. + */ +public final function EmptySelf() +{ + local int i; + local GameInfo game; + local DBRecord subRecord; + if (lockEraseSelf) { + return; + } + lockEraseSelf = true; + game = __level().unreal_api().GetGameType(); + for (i = 0; i < storage.length; i += 1) + { + if (storage[i].t != DBAT_Reference) continue; + subRecord = LoadRecordFor(storage[i].s, package); + if (subRecord == none) continue; + + subRecord.EmptySelf(); + game.DeleteDataObject(class'DBRecord', string(subRecord.name), package); + } + storage.length = 0; + lockEraseSelf = false; +} + +/** + * Takes all available values from `source` and records them into caller + * `DBRecord`. Does not erase untouched old values, but will overwrite them + * in case of the conflict. + * + * Can only convert items in passed collection that return `true` for + * `__().json.IsCompatible()` check. Any other values will be treated as `none`. + * + * Only works as long as caller `DBRecord` has the same container type as + * `source`. `isJSONArray` iff `source.class == class'ArrayList` and + * `!isJSONArray` iff `source.class == class'HashTable`. + * + * Values that cannot be converted into JSON will be replaced with `none`. + * + * @param source `Collection` to write into the caller `DBRecord`. + */ +public final function FromCollection(Collection source) +{ + local ArrayList asArrayList; + local HashTable asHashTable; + asArrayList = ArrayList(source); + asHashTable = HashTable(source); + if (asArrayList != none && isJSONArray) { + FromArrayList(asArrayList); + } + if (asHashTable != none && !isJSONArray) { + FromHashTable(asHashTable); + } +} + +// Does not do any validation check. +private final function FromArrayList(ArrayList source) +{ + local int i, length; + local AcediaObject nextObject; + length = source.GetLength(); + for (i = 0; i < length; i += 1) + { + nextObject = source.GetItem(i); + storage[storage.length] = ConvertObjectToItem(nextObject); + __().memory.Free(nextObject); + } +} + +// Does not do any validation check. +private final function FromHashTable(HashTable source) +{ + local int i, originalStorageLength; + local CollectionIterator iter; + local string nextKey; + local bool isNewKey; + local AcediaObject nextObject; + originalStorageLength = storage.length; + for (iter = source.Iterate(); !iter.HasFinished(); iter.Next()) + { + if (iter.GetKey() == none) { + continue; + } + nextKey = __().text.IntoString(BaseText(iter.GetKey())); + isNewKey = true; + for (i = 0; i < originalStorageLength; i += 1) + { + if (storage[i].k == nextKey) + { + isNewKey = false; + break; + } + } + if (isNewKey) + { + nextObject = iter.Get(); + SetItem(storage.length, ConvertObjectToItem(nextObject), nextKey); + __().memory.Free(nextObject); + } + } + iter.FreeSelf(); +} + +// Converts `AcediaObject` into it's internal representation. +private final function StorageItem ConvertObjectToItem(AcediaObject data) +{ + local StorageItem result; + local DBRecord newDBRecord; + if (Text(data) != none) + { + result.t = DBAT_String; + result.s = Text(data).ToString(); + } + else if(Collection(data) != none) + { + result.t = DBAT_Reference; + newDBRecord = NewRecordFor(package); + newDBRecord.isJSONArray = (data.class == class'ArrayList'); + newDBRecord.FromCollection(Collection(data)); + result.s = string(newDBRecord.name); + } + else if (FloatBox(data) != none || FloatRef(data) != none) + { + result.t = DBAT_Float; + if (FloatBox(data) != none) { + result.f = FloatBox(data).Get(); + } + else { + result.f = FloatRef(data).Get(); + } + } + else if (IntBox(data) != none || IntRef(data) != none) + { + result.t = DBAT_Int; + if (IntBox(data) != none) { + result.i = IntBox(data).Get(); + } + else { + result.i = IntRef(data).Get(); + } + } + else if (BoolBox(data) != none || BoolRef(data) != none) + { + result.t = DBAT_False; + if (BoolBox(data) != none && BoolBox(data).Get()) { + result.t = DBAT_True; + } + if (BoolRef(data) != none && BoolRef(data).Get()) { + result.t = DBAT_True; + } + } + return result; +} + +// Converts internal data representation into `AcediaObject`. +private final function AcediaObject ConvertItemToObject( + StorageItem item, + bool makeMutable) +{ + local DBRecord subRecord; + switch (item.t) { + case DBAT_False: + case DBAT_True: + if (makeMutable) { + return __().ref.bool(item.t == DBAT_True); + } + else { + return __().box.bool(item.t == DBAT_True); + } + case DBAT_Int: + if (makeMutable) { + return __().ref.int(item.i); + } + else { + return __().box.int(item.i); + } + case DBAT_Float: + if (makeMutable) { + return __().ref.float(item.f); + } + else { + return __().box.float(item.f); + } + case DBAT_String: + if (makeMutable) { + return __().text.FromStringM(item.s); + } + else { + return __().text.FromString(item.s); + } + case DBAT_Reference: + subRecord = LoadRecordFor(item.s, package); + if (subRecord != none) { + return subRecord.ToCollection(makeMutable); + } + default: + } + return none; +} + +// "Increments" internal data representation by value inside given +// `AcediaObject`. +// See `IncrementObject()` method for details. +private final function bool IncrementItemByObject( + out StorageItem item, + AcediaObject object) +{ + local DBRecord itemRecord; + if (object == none) { + return (item.t == DBAT_Null); + } + if (item.t == DBAT_Null) + { + item = ConvertObjectToItem(object); + return true; + } + else if (item.t == DBAT_String && Text(object) != none) + { + item.s $= Text(object).ToString(); + return true; + } + else if(item.t == DBAT_Reference && Collection(object) != none) + { + itemRecord = LoadRecordFor(item.s, package); + if (itemRecord == none) + { + itemRecord = NewRecordFor(package); // DB was broken somehow + item.s = string(itemRecord.name); + itemRecord.isJSONArray = (object.class == class'ArrayList'); + } + if ( (itemRecord.isJSONArray && object.class != class'ArrayList') + || ( !itemRecord.isJSONArray + && object.class != class'HashTable')) + { + return false; + } + itemRecord.FromCollection(Collection(object)); + return true; + } + else if ( (item.t == DBAT_False || item.t == DBAT_True) + && (BoolBox(object) != none || BoolRef(object) != none)) + { + if (BoolBox(object) != none && BoolBox(object).Get()) { + item.t = DBAT_True; + } + if (BoolRef(object) != none && BoolRef(object).Get()) { + item.t = DBAT_True; + } + return true; + } + return IncrementNumericItemByObject(item, object); +} + +private final function bool IncrementNumericItemByObject( + out StorageItem item, + AcediaObject object) +{ + local int storedValueAsInteger, incrementAsInteger; + local float storedValueAsFloat, incrementAsFloat; + if (item.t != DBAT_Float && item.t != DBAT_Int) { + return false; + } + if (!ReadNumericObjectInto(object, incrementAsInteger, incrementAsFloat)) { + return false; + } + if (item.t == DBAT_Float) + { + storedValueAsInteger = int(item.f); + storedValueAsFloat = item.f; + } + else + { + storedValueAsInteger = item.i; + storedValueAsFloat = float(item.i); + } + // Later we want to implement arbitrary precision arithmetic for storage, + // but for now let's just assume that if either value is a float - + // then user wants a float precision. + if ( item.t == DBAT_Float || FloatBox(object) != none + || FloatRef(object) != none) + { + item.t = DBAT_Float; + item.f = storedValueAsFloat + incrementAsFloat; + item.i = 0; + } + else + { + item.t = DBAT_Int; + item.i = storedValueAsInteger + incrementAsInteger; + item.f = 0; + } + return true; +} + +private final function bool ReadNumericObjectInto( + AcediaObject object, + out int valueAsInt, + out float valueAsFloat) +{ + if (IntBox(object) != none || IntRef(object) != none) + { + if (IntBox(object) != none) { + valueAsInt = IntBox(object).Get(); + } + else { + valueAsInt = IntRef(object).Get(); + } + valueAsFloat = float(valueAsInt); + return true; + } + if (FloatBox(object) != none || FloatRef(object) != none) + { + if (FloatBox(object) != none) { + valueAsFloat = FloatBox(object).Get(); + } + else { + valueAsFloat = FloatRef(object).Get(); + } + valueAsInt = int(valueAsFloat); + return true; + } + return false; +} + +// Add storing bytes +defaultproperties +{ + LATIN_LETTERS_AMOUNT = 26 + LOWER_A_CODEPOINT = 97 + UPPER_A_CODEPOINT = 65 + // JSON Pointers allow using "-" as an indicator that element must be + // added at the end of the array + JSONPOINTER_NEW_ARRAY_ELEMENT = "-" +} \ No newline at end of file diff --git a/rottlib/tests/fixtures/KVehicle.uc b/rottlib/tests/fixtures/KVehicle.uc new file mode 100644 index 0000000..cb68bd8 --- /dev/null +++ b/rottlib/tests/fixtures/KVehicle.uc @@ -0,0 +1,326 @@ +// Generic 'Karma Vehicle' base class that can be controlled by a Pawn. + +class KVehicle extends Vehicle + native + abstract; + +cpptext +{ +#ifdef WITH_KARMA + virtual void PostNetReceive(); + virtual void PostEditChange(); + virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV); + virtual void TickSimulated( FLOAT DeltaSeconds ); + virtual void TickAuthoritative( FLOAT DeltaSeconds ); +#endif + +} + +// Effect spawned when vehicle is destroyed +var (KVehicle) class DestroyEffectClass; + +// Simple 'driving-in-rings' logic. +var (KVehicle) bool bAutoDrive; + +// The factory that created this vehicle. +//var KVehicleFactory ParentFactory; + +// Weapon system +var bool bVehicleIsFiring, bVehicleIsAltFiring; + +const FilterFrames = 5; +var vector CameraHistory[FilterFrames]; +var int NextHistorySlot; +var bool bHistoryWarmup; + +// Useful function for plotting data to real-time graph on screen. +native final function GraphData(string DataName, float DataValue); + +// if _RO_ +function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation, + vector momentum, class damageType, optional int HitIndex) +// else UT +//function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation, +// vector momentum, class damageType) +{ + Super.TakeDamage(Damage,instigatedBy,HitLocation,Momentum,DamageType); +} + +// You got some new info from the server (ie. VehicleState has some new info). +event VehicleStateReceived(); + +// Called when a parameter of the overall articulated actor has changed (like PostEditChange) +// The script must then call KUpdateConstraintParams or Actor Karma mutators as appropriate. +simulated event KVehicleUpdateParams(); + +// The pawn Driver has tried to take control of this vehicle +function bool TryToDrive(Pawn P) +{ + if ( P.bIsCrouched || (P.Controller == None) || (Driver != None) || !P.Controller.bIsPlayer ) + return false; + + if ( !P.IsHumanControlled() || !P.Controller.IsInState('PlayerDriving') ) + { + KDriverEnter(P); + return true; + } + + return false; +} + +// Events called on driver entering/leaving vehicle + +simulated function ClientKDriverEnter(PlayerController pc) +{ + pc.myHUD.bCrosshairShow = false; + pc.myHUD.bShowWeaponInfo = false; + pc.myHUD.bShowPoints = false; + + pc.bBehindView = true; + pc.bFreeCamera = true; + + pc.SetRotation(rotator( vect(-1, 0, 0) >> Rotation )); +} + +function KDriverEnter(Pawn P) +{ + local PlayerController PC; + local Controller C; + + // Set pawns current controller to control the vehicle pawn instead + Driver = P; + + // Move the driver into position, and attach to car. + Driver.SetCollision(false, false); + Driver.bCollideWorld = false; + Driver.bPhysicsAnimUpdate = false; + Driver.Velocity = vect(0,0,0); + Driver.SetPhysics(PHYS_None); + Driver.SetBase(self); + + // Disconnect PlayerController from Driver and connect to KVehicle. + C = P.Controller; + p.Controller.Unpossess(); + Driver.SetOwner(C); // This keeps the driver relevant. + C.Possess(self); + + PC = PlayerController(C); + if ( PC != None ) + { + PC.ClientSetViewTarget(self); // Set playercontroller to view the vehicle + + // Change controller state to driver + PC.GotoState('PlayerDriving'); + + ClientKDriverEnter(PC); + } +} + +simulated function ClientKDriverLeave(PlayerController pc) +{ + pc.bBehindView = false; + pc.bFreeCamera = false; + // This removes any 'roll' from the look direction. + //exitLookDir = Vector(pc.Rotation); + //pc.SetRotation(Rotator(exitLookDir)); + + pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow; + pc.myHUD.bShowWeaponInfo = pc.myHUD.default.bShowWeaponInfo; + pc.myHUD.bShowPoints = pc.myHUD.default.bShowPoints; + + // Reset the view-smoothing + NextHistorySlot = 0; + bHistoryWarmup = true; +} + +// Called from the PlayerController when player wants to get out. +function bool KDriverLeave(bool bForceLeave) +{ + local PlayerController pc; + local int i; + local bool havePlaced; + local vector HitLocation, HitNormal, tryPlace; + + // Do nothing if we're not being driven + if(Driver == None) + return false; + + // Before we can exit, we need to find a place to put the driver. + // Iterate over array of possible exit locations. + + if (!bRemoteControlled) + { + + Driver.bCollideWorld = true; + Driver.SetCollision(true, true); + + havePlaced = false; + for(i=0; i < ExitPositions.Length && havePlaced == false; i++) + { + //Log("Trying Exit:"$i); + + tryPlace = Location + (ExitPositions[i] >> Rotation); + + // First, do a line check (stops us passing through things on exit). + if( Trace(HitLocation, HitNormal, tryPlace, Location, false) != None ) + continue; + + // Then see if we can place the player there. + if( !Driver.SetLocation(tryPlace) ) + continue; + + havePlaced = true; + } + + // If we could not find a place to put the driver, leave driver inside as before. + if(!havePlaced && !bForceLeave) + { + Log("Could not place driver."); + + Driver.bCollideWorld = false; + Driver.SetCollision(false, false); + + return false; + } + } + + pc = PlayerController(Controller); + ClientKDriverLeave(pc); + + // Reconnect PlayerController to Driver. + pc.Unpossess(); + pc.Possess(Driver); + + pc.ClientSetViewTarget(Driver); // Set playercontroller to view the persone that got out + + Controller = None; + + Driver.PlayWaiting(); + Driver.bPhysicsAnimUpdate = Driver.Default.bPhysicsAnimUpdate; + + // Do stuff on client + //pc.ClientSetBehindView(false); + //pc.ClientSetFixedCamera(true); + + if (!bRemoteControlled) + { + + Driver.Acceleration = vect(0, 0, 24000); + Driver.SetPhysics(PHYS_Falling); + Driver.SetBase(None); + } + + // Car now has no driver + Driver = None; + + // Put brakes on before you get out :) + Throttle=0; + Steering=0; + + // Stop firing when you get out! + bVehicleIsFiring = false; + bVehicleIsAltFiring = false; + + return true; +} + +// Special calc-view for vehicles +simulated function bool SpecialCalcView(out actor ViewActor, out vector CameraLocation, out rotator CameraRotation ) +{ + local vector CamLookAt, HitLocation, HitNormal; + local PlayerController pc; + local int i, averageOver; + + pc = PlayerController(Controller); + + // Only do this mode we have a playercontroller viewing this vehicle + if(pc == None || pc.ViewTarget != self) + return false; + + ViewActor = self; + CamLookAt = Location + (vect(-100, 0, 100) >> Rotation); + + ////////////////////////////////////////////////////// + // Smooth lookat position over a few frames. + CameraHistory[NextHistorySlot] = CamLookAt; + NextHistorySlot++; + + if(bHistoryWarmup) + averageOver = NextHistorySlot; + else + averageOver = FilterFrames; + + CamLookAt = vect(0, 0, 0); + for(i=0; i> CameraRotation); + + if( Trace( HitLocation, HitNormal, CameraLocation, CamLookAt, false, vect(10, 10, 10) ) != None ) + { + CameraLocation = HitLocation; + } + + return true; +} + +simulated function Destroyed() +{ + // If there was a driver in the vehicle, destroy him too + if ( Driver != None ) + Driver.Destroy(); + + // Trigger any effects for destruction + if(DestroyEffectClass != None) + spawn(DestroyEffectClass, , , Location, Rotation); + + Super.Destroyed(); +} + +simulated event Tick(float deltaSeconds) +{ +} + +// Includes properties from KActor +defaultproperties +{ + Steering=0 + Throttle=0 + + ExitPositions(0)=(X=0,Y=0,Z=0) + + DrivePos=(X=0,Y=0,Z=0) + DriveRot=() + + bHistoryWarmup = true; + + + Physics=PHYS_Karma + bEdShouldSnap=True + bStatic=False + bShadowCast=False + bCollideActors=True + bCollideWorld=False + bProjTarget=True + bBlockActors=True + bBlockNonZeroExtentTraces=True + bBlockZeroExtentTraces=True + bWorldGeometry=False + bBlockKarma=True + bAcceptsProjectors=True + bCanBeBaseForPawns=True + bAlwaysRelevant=True + RemoteRole=ROLE_SimulatedProxy + bNetInitialRotation=True + bSpecialCalcView=True + //bSpecialHUD=true +} diff --git a/rottlib/tests/fixtures_tokenization.rs b/rottlib/tests/fixtures_tokenization.rs new file mode 100644 index 0000000..37384c9 --- /dev/null +++ b/rottlib/tests/fixtures_tokenization.rs @@ -0,0 +1,135 @@ +use std::{fs, path::PathBuf}; + +use rottlib::lexer::{Keyword, Token, TokenizedFile}; + +/// Returns the path to a fixture file in `tests/fixtures/`. +fn fixture_file_path(name: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(name) +} + +/// Loads a fixture source file as UTF-8 text. +fn read_fixture_source(name: &str) -> String { + fs::read_to_string(fixture_file_path(name)) + .unwrap_or_else(|e| panic!("failed to read fixture {name}: {e}")) +} + +/// Returns the token at the given token index on a physical line. +/// +/// Here `line` is 1-based, to match human line numbers in fixture files. +/// `token_index` is 0-based within `TokenizedFile::line_tokens`. +fn token_on_line(file: &TokenizedFile<'_>, line: usize, token_index: usize) -> Option { + file.line_tokens(line - 1) + .nth(token_index) + .map(|(_, token_data)| token_data.token) +} + +/// Returns reconstructed visible text for a physical line. +/// +/// Here `line` is 1-based, to match human line numbers in fixture files. +fn line_text(file: &TokenizedFile<'_>, line: usize) -> Option { + file.line_text(line - 1) +} + +#[test] +fn command_api_fixture_queries() { + let source = read_fixture_source("CommandAPI.uc"); + let file = TokenizedFile::tokenize(&source); + assert_eq!(file.line_count(), 1578); + + assert_eq!( + line_text(&file, 704).as_deref(), + Some( + "public final function CommandConfigInfo ResolveCommandForUserID(BaseText itemName, UserID id) {" + ) + ); + assert_eq!( + line_text(&file, 806).as_deref(), + Some(" _.memory.Free(wrapper);") + ); + assert_eq!( + line_text(&file, 1274).as_deref(), + Some("/// Method must be called after [`Voting`] with a given name is added.") + ); + assert_eq!( + line_text(&file, 14).as_deref(), + Some(" * Acedia is distributed in the hope that it will be useful,") + ); + + let token = token_on_line(&file, 22, 0).unwrap(); + assert_eq!(token, Token::Keyword(Keyword::Class)); + + let token = token_on_line(&file, 1577, 0).unwrap(); + assert_eq!(token, Token::Keyword(Keyword::DefaultProperties)); + + let token = token_on_line(&file, 649, 4).unwrap(); + assert_eq!(token, Token::Whitespace); +} + +#[test] +fn dbrecord_fixture_queries() { + let source = read_fixture_source("DBRecord.uc"); + let file = TokenizedFile::tokenize(&source); + assert_eq!(file.line_count(), 1199); + + assert_eq!( + line_text(&file, 149).as_deref(), + Some(" * However, JSON pointers are not convenient or efficient enough for that,") + ); + assert_eq!( + line_text(&file, 787).as_deref(), + Some(" * 3. 'number' -> either `IntBox` or `FloatBox`, depending on") + ); + assert_eq!( + line_text(&file, 1023).as_deref(), + Some(" bool makeMutable)") + ); + assert_eq!( + line_text(&file, 29).as_deref(), + Some(" config(AcediaDB);") + ); + + let token = token_on_line(&file, 565, 0).unwrap(); + assert_eq!(token, Token::BlockComment); + + let token = token_on_line(&file, 467, 10).unwrap(); + assert_eq!(token, Token::Identifier); + + let token = token_on_line(&file, 467, 9).unwrap(); + assert_eq!(token, Token::LeftParenthesis); +} + +#[test] +fn kvehicle_fixture_queries() { + let source = read_fixture_source("KVehicle.uc"); + let file = TokenizedFile::tokenize(&source); + assert_eq!(file.line_count(), 326); + + assert_eq!( + line_text(&file, 12).as_deref(), + Some(" virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV);") + ); + assert_eq!( + line_text(&file, 127).as_deref(), + Some(" pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow;") + ); + assert_eq!( + line_text(&file, 264).as_deref(), + Some(" //////////////////////////////////////////////////////") + ); + assert_eq!( + line_text(&file, 299).as_deref(), + Some(" ExitPositions(0)=(X=0,Y=0,Z=0)") + ); + + let token = token_on_line(&file, 17, 0).unwrap(); + assert_eq!(token, Token::Newline); + + let token = token_on_line(&file, 20, 7).unwrap(); + assert_eq!(token, Token::Less); + + let token = token_on_line(&file, 246, 2).unwrap(); + assert_eq!(token, Token::Increment); +} diff --git a/rottlsp/src/main.rs b/rottlsp/src/main.rs index e82bab5..ed419e9 100644 --- a/rottlsp/src/main.rs +++ b/rottlsp/src/main.rs @@ -37,7 +37,7 @@ impl tower_lsp::LanguageServer for RottLanguageServer { // Measure lexing performance to track parser responsiveness. let start_time = std::time::Instant::now(); let has_errors = - rottlib::lexer::TokenizedFile::from_str(¶ms.text_document.text).has_errors(); + rottlib::lexer::TokenizedFile::tokenize(¶ms.text_document.text).has_errors(); let elapsed_time = start_time.elapsed(); self.client diff --git a/test.uc b/test.uc new file mode 100644 index 0000000..6f5a9d6 --- /dev/null +++ b/test.uc @@ -0,0 +1,97 @@ +/// BOF line comment +/* BOF block comment */ + +class TestClass extends Actor + abstract + native; + //nativereplication; + +/* One blank line follows to test has_blank_line_after() */ + +function int fuck_you(int a, float b, string c) +{ + // ---- locals with an error to trigger recovery to comma/semicolon + local int i, /* oops */ , k; + local int a, b, c; + + // ---- builtins: valid + error + various initializers + int a = 1, b, , c = 3; + float f = (1.0 + 2.0) * 0.5; + bool flag = true; + string s = "hi\n\"ok\""; + name tag; + array nums; + + // ---- label + goto + start: + goto start2; + + // ---- if / else with tail-as-value and missing semicolons inside + if (a + c > 0) { + while (a < 5) { + if (flag) { + break; + } + a + 1; // ok + continue + } // missing ';' before '}' should be fine (SelfTerminating) + } else { + { + a + 2; + b // tail expression (no ';') becomes block tail + } + } + + // ---- for with header pieces using statement-as-value + for (i; i < 10; i += 1) { + j + i; + i + j // no semicolon, next is '}' so this is a tail + } + + // ---- assert with a block-as-value (statement-as-value) + assert { + i = i + 1; + i // tail is the value of the block + }; + + // ---- foreach (paren and no-paren forms) + foreach (nums) { + i++ + } + foreach nums { + --i; // prefix and postfix in play + j-- + } + + // ---- do ... until (paren and no-paren) + semicolon handling + do { + i = i + 1 + } until (i > 3); + do i = i + 1; until i > 5; + + // ---- switch with multi-label case, recovery, and default + switch (a + c) { + case 0: + case 1: + a = a + 10 + // missing ';' here forces recovery to next boundary (case/default/}) + case 2: + assert (a > 0); // regular statement + break; + case 3, 4: + break; + default: + // some stray token sequence to poke "unexpected token in switch body" + /* block comment with + newlines */ + a + ; // malformed expr; recover to boundary + continue; // legal statement after recovery + } + + // ---- second label target for goto + start2: + return a; // final return +} + +// EOF trailing line comment +/* EOF trailing block comment */ diff --git a/test_full.uc b/test_full.uc new file mode 100644 index 0000000..19dd535 --- /dev/null +++ b/test_full.uc @@ -0,0 +1,10 @@ +// #[config(JustConfig)] +abstract class NewWay extends AcediaObject { + // #[config(MaxWavesAmount)] + var int _value; +} + +class TestClass extends Actor + abstract + native + nativereplication; \ No newline at end of file