Compare commits
11 Commits
933722bd42
...
47693fc5a5
Author | SHA1 | Date | |
---|---|---|---|
47693fc5a5 | |||
bb54c6a124 | |||
688121c5a1 | |||
c79e552f09 | |||
d2e1913c63 | |||
0fa8140644 | |||
7ed934e2b8 | |||
2a31ed08b8 | |||
41672a7125 | |||
d9923fd762 | |||
d519ecab2e |
@ -11,6 +11,10 @@ path = "src/dump_tokens.rs"
|
||||
name = "uc_lexer_verify"
|
||||
path = "src/uc_lexer_verify.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "temp"
|
||||
path = "src/temp.rs"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
|
@ -70,7 +70,7 @@ fn main() {
|
||||
let (decoded_str, _, _) = encoding.decode(&raw_bytes);
|
||||
|
||||
let source_text = decoded_str.to_string();
|
||||
let tokenized_file = TokenizedFile::from_source(&source_text);
|
||||
let tokenized_file = TokenizedFile::from_str(&source_text);
|
||||
|
||||
tokenized_file.dump_debug_layout();
|
||||
}
|
||||
|
129
dev_tests/src/temp.rs
Normal file
129
dev_tests/src/temp.rs
Normal file
@ -0,0 +1,129 @@
|
||||
//! src/main.rs
|
||||
//! --------------------------------------------
|
||||
//! Build & run:
|
||||
//! cargo run
|
||||
//! --------------------------------------------
|
||||
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::{self, Read, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use rottlib::arena::Arena;
|
||||
use rottlib::lexer::TokenizedFile;
|
||||
use rottlib::parser::{ParseError, Parser, pretty::ExprTree};
|
||||
|
||||
/*
|
||||
- Convenient array definitions: [1, 3, 5, 2, 4]
|
||||
- Boolean dynamic arrays
|
||||
- Structures in default properties
|
||||
- Auto conversion of arrays into strings
|
||||
- Making 'var' and 'local' unnecessary
|
||||
- Allowing variable creation in 'for' loops
|
||||
- Allowing variable creation at any place inside a function
|
||||
- Default parameters for functions
|
||||
- Function overloading?
|
||||
- repeat/until
|
||||
- The syntax of the default properties block is pretty strict for an arcane reason. Particularly adding spaces before or after the "=" will lead to errors in pre-UT2003 versions.
|
||||
- Scopes
|
||||
- different names for variables and in config file
|
||||
- anonymous pairs (objects?) and value destruction
|
||||
>>> AST > HIR > MIR > byte code
|
||||
*/
|
||||
|
||||
/// Closest plan:
|
||||
/// - Add top-level declaration parsing
|
||||
/// - Handle pretty.rs shit somehow
|
||||
/// - COMMITS
|
||||
/// ---------------------------------------
|
||||
/// - Add fancy error reporting
|
||||
/// - Make a fancy REPL
|
||||
/// - Add evaluation
|
||||
///
|
||||
/// WARNINGS:
|
||||
/// - Empty code/switch blocks
|
||||
|
||||
fn parse_and_print(src: &str) -> Result<(), ParseError> {
|
||||
let tokenized = TokenizedFile::from_str(src);
|
||||
let arena = Arena::new();
|
||||
let mut parser = Parser::new(&tokenized, &arena);
|
||||
|
||||
let expr = parser.parse_expression(); // ArenaNode<Expression>
|
||||
println!("{}", ExprTree(&*expr)); // if ArenaNode<Deref>
|
||||
// or: println!("{}", ExprTree(expr.as_ref())); // if no Deref
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn repl_once() -> Result<(), ParseError> {
|
||||
print!("Enter an statement > ");
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
let mut input = String::new();
|
||||
if io::stdin().read_line(&mut input).is_err() {
|
||||
eprintln!("failed to read input");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if input.trim().is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
parse_and_print(&input)
|
||||
}
|
||||
|
||||
fn read_stdin_all() -> io::Result<String> {
|
||||
let mut buf = String::new();
|
||||
io::stdin().read_to_string(&mut buf)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
fn read_file_to_string(path: &Path) -> io::Result<String> {
|
||||
fs::read_to_string(path)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), ParseError> {
|
||||
// Accept a single positional arg as the input path.
|
||||
// "-" means read all of stdin.
|
||||
let mut args = env::args().skip(1);
|
||||
|
||||
if let Some(arg1) = args.next() {
|
||||
if arg1 == "-h" || arg1 == "--help" {
|
||||
println!("Usage:");
|
||||
println!(
|
||||
" {} # REPL",
|
||||
env::args().next().unwrap_or_else(|| "prog".into())
|
||||
);
|
||||
println!(
|
||||
" {} <file> # parse file",
|
||||
env::args().next().unwrap_or_else(|| "prog".into())
|
||||
);
|
||||
println!(
|
||||
" {} - # read source from stdin",
|
||||
env::args().next().unwrap_or_else(|| "prog".into())
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if arg1 == "-" {
|
||||
match read_stdin_all() {
|
||||
Ok(src) => return parse_and_print(&src),
|
||||
Err(e) => {
|
||||
eprintln!("stdin read error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let path = Path::new(&arg1);
|
||||
match read_file_to_string(path) {
|
||||
Ok(src) => return parse_and_print(&src),
|
||||
Err(e) => {
|
||||
eprintln!("file read error ({}): {}", path.display(), e);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No filename provided -> keep REPL behavior
|
||||
repl_once()
|
||||
}
|
@ -75,7 +75,7 @@ fn main() {
|
||||
let path = entry.path();
|
||||
match fs::read(path) {
|
||||
Ok(raw_bytes) => {
|
||||
// Auto‑detect encoding for old Unreal script sources
|
||||
// Auto-detect encoding for old Unreal script sources
|
||||
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
|
||||
let encoding = encoding_rs::Encoding::for_label(encoding_label.as_bytes())
|
||||
.unwrap_or(encoding_rs::UTF_8);
|
||||
@ -95,8 +95,8 @@ fn main() {
|
||||
let tokenized_files: Vec<(PathBuf, TokenizedFile)> = uc_files
|
||||
.iter()
|
||||
.map(|(path, source_code)| {
|
||||
let tokenized_file = TokenizedFile::from_source(source_code);
|
||||
if tokenized_file.had_errors() {
|
||||
let tokenized_file = TokenizedFile::from_str(source_code);
|
||||
if tokenized_file.has_errors() {
|
||||
println!("TK: {}", path.display());
|
||||
}
|
||||
(path.clone(), tokenized_file)
|
||||
@ -109,7 +109,7 @@ fn main() {
|
||||
elapsed_time
|
||||
);
|
||||
|
||||
// Round‑trip check
|
||||
// Round-trip check
|
||||
for ((path, original), (_, tokenized_file)) in uc_files.iter().zip(tokenized_files.iter()) {
|
||||
let reconstructed = tokenized_file.reconstruct_source();
|
||||
if original != &reconstructed {
|
||||
|
@ -8,4 +8,5 @@ default = []
|
||||
debug = []
|
||||
|
||||
[dependencies]
|
||||
logos = "0.15"
|
||||
logos = "0.15"
|
||||
bumpalo = { version = "3", features = ["boxed", "collections"] }
|
277
rottlib/src/arena.rs
Normal file
277
rottlib/src/arena.rs
Normal file
@ -0,0 +1,277 @@
|
||||
//! Arena submodule defining types that exist in their own memory space and
|
||||
//! allow multiple cheap allocations (both performance- and fragmentation-wise).
|
||||
//!
|
||||
//! ## Memory safety
|
||||
//!
|
||||
//! Dropping the [`Arena`] frees all its memory at once and does not run
|
||||
//! [`Drop`] for values allocated within it. Avoid storing types that implement
|
||||
//! [`Drop`] or own external resources inside [`ArenaNode`], [`ArenaVec`], or
|
||||
//! [`ArenaString`]. If you must, arrange an explicit "drain/drop" pass before
|
||||
//! the arena is dropped.
|
||||
|
||||
use core::fmt::{Debug, Display, Formatter, Result};
|
||||
use core::ops::{Deref, DerefMut};
|
||||
|
||||
use bumpalo::{Bump, boxed, collections};
|
||||
|
||||
use crate::ast::AstSpan;
|
||||
use crate::lexer::TokenLocation;
|
||||
|
||||
/// Object that manages a separate memory space, which can be deallocated all
|
||||
/// at once after use.
|
||||
///
|
||||
/// All allocations borrow the arena immutably.
|
||||
///
|
||||
/// Dropping the [`Arena`] does not run [`Drop`] for values allocated within it
|
||||
/// (including values contained in [`ArenaNode`], [`ArenaVec`]
|
||||
/// and [`ArenaString`]).
|
||||
///
|
||||
/// This arena is not thread-safe (`!Send`, `!Sync`). Values borrow the arena
|
||||
/// and therefore cannot be sent across threads independently.
|
||||
#[derive(Debug)]
|
||||
pub struct Arena {
|
||||
bump: Bump,
|
||||
}
|
||||
|
||||
impl Arena {
|
||||
/// Creates a new, empty arena.
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self { bump: Bump::new() }
|
||||
}
|
||||
|
||||
/// Constructs an empty [`ArenaVec`] allocated in this arena.
|
||||
///
|
||||
/// The returned vector borrows this arena and cannot outlive it.
|
||||
#[must_use]
|
||||
pub fn vec<T>(&self) -> ArenaVec<'_, T> {
|
||||
ArenaVec(collections::Vec::new_in(&self.bump))
|
||||
}
|
||||
|
||||
///Allocates a copy of `string` in this arena and returns
|
||||
/// an [`ArenaString`].
|
||||
#[must_use]
|
||||
pub fn string(&self, string: &str) -> ArenaString<'_> {
|
||||
ArenaString(collections::String::from_str_in(string, &self.bump))
|
||||
}
|
||||
|
||||
/// Allocates `value` in this arena with the given `span`,
|
||||
/// returning an [`ArenaNode`].
|
||||
///
|
||||
/// The node's storage borrows this arena and cannot outlive it.
|
||||
///
|
||||
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
|
||||
#[must_use]
|
||||
pub fn alloc<T>(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> {
|
||||
ArenaNode {
|
||||
inner: boxed::Box::new_in(value, &self.bump),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn alloc_between<T>(
|
||||
&self,
|
||||
value: T,
|
||||
from: TokenLocation,
|
||||
to: TokenLocation,
|
||||
) -> ArenaNode<'_, T> {
|
||||
self.alloc(value, AstSpan { from, to })
|
||||
}
|
||||
|
||||
pub fn alloc_at<T>(&self, value: T, at: TokenLocation) -> ArenaNode<'_, T> {
|
||||
self.alloc(value, AstSpan { from: at, to: at })
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Arena {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// An arena-allocated box with an attached source span.
|
||||
///
|
||||
/// Equality and hashing take into account both the contained `T` and the `span`
|
||||
/// (when `T: Eq + Hash`).
|
||||
///
|
||||
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
|
||||
#[derive(Hash, PartialEq, Eq)]
|
||||
pub struct ArenaNode<'arena, T> {
|
||||
/// Value allocated in the arena; this node owns it.
|
||||
inner: boxed::Box<'arena, T>,
|
||||
/// Token range covered by the value.
|
||||
span: AstSpan,
|
||||
}
|
||||
|
||||
impl<'arena, T> ArenaNode<'arena, T> {
|
||||
/// Creates a new [`ArenaNode`] by allocating `value` in `arena`.
|
||||
#[must_use]
|
||||
pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self {
|
||||
Self {
|
||||
inner: boxed::Box::new_in(value, &arena.bump),
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new [`ArenaNode`] for an AST node that spans a single token.
|
||||
pub fn from_token_location(
|
||||
value: T,
|
||||
token_location: crate::lexer::TokenLocation,
|
||||
arena: &'arena Arena,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: boxed::Box::new_in(value, &arena.bump),
|
||||
span: AstSpan {
|
||||
from: token_location,
|
||||
to: token_location,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn span_mut(&mut self) -> &mut AstSpan {
|
||||
&mut self.span
|
||||
}
|
||||
|
||||
pub fn extend_to(&mut self, to: TokenLocation) {
|
||||
self.span.to = to;
|
||||
}
|
||||
|
||||
pub fn extend_from(&mut self, from: TokenLocation) {
|
||||
self.span.from = from;
|
||||
}
|
||||
|
||||
/// Returns the token span covered by this node.
|
||||
pub fn span(&self) -> &AstSpan {
|
||||
&self.span
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena, T> Deref for ArenaNode<'arena, T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &T {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena, T> DerefMut for ArenaNode<'arena, T> {
|
||||
fn deref_mut(&mut self) -> &mut T {
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena, T: Debug> Debug for ArenaNode<'arena, T> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||
f.debug_struct("ArenaNode")
|
||||
.field("inner", &**self)
|
||||
.field("span", &self.span())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Version of [`Vec`] that can be safely used inside a memory arena.
|
||||
///
|
||||
/// Elements do not have their destructors run when the arena is dropped.
|
||||
///
|
||||
/// This type dereferences to `[T]` and supports iteration by reference
|
||||
/// (`&ArenaVec` and `&mut ArenaVec` implement [`IntoIterator`]).
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
pub struct ArenaVec<'arena, T>(collections::Vec<'arena, T>);
|
||||
|
||||
impl<'arena, T> ArenaVec<'arena, T> {
|
||||
/// Creates an empty `ArenaVec` allocated in `arena`.
|
||||
#[must_use]
|
||||
pub fn new_in(arena: &'arena Arena) -> Self {
|
||||
Self(collections::Vec::new_in(&arena.bump))
|
||||
}
|
||||
|
||||
/// Appends an element to the end of the vector.
|
||||
///
|
||||
/// Growth is backed by the arena; increasing capacity allocates new space
|
||||
/// in the arena and never frees previous blocks.
|
||||
pub fn push(&mut self, value: T) {
|
||||
self.0.push(value)
|
||||
}
|
||||
|
||||
pub fn reserve(&mut self, additional: usize) {
|
||||
self.0.reserve(additional)
|
||||
}
|
||||
pub fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
|
||||
self.0.extend(it)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena, T> Deref for ArenaVec<'arena, T> {
|
||||
type Target = [T];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena, T> DerefMut for ArenaVec<'arena, T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena, 's, T> IntoIterator for &'s ArenaVec<'arena, T> {
|
||||
type Item = &'s T;
|
||||
type IntoIter = core::slice::Iter<'s, T>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena, 's, T> IntoIterator for &'s mut ArenaVec<'arena, T> {
|
||||
type Item = &'s mut T;
|
||||
type IntoIter = core::slice::IterMut<'s, T>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.iter_mut()
|
||||
}
|
||||
}
|
||||
|
||||
/// Version of [`String`] that can be safely used inside a memory arena.
|
||||
///
|
||||
/// This type dereferences to [`str`] and implements [`AsRef<str>`] and
|
||||
/// [`core::borrow::Borrow<str>`] for ergonomic use with APIs expecting string
|
||||
/// slices.
|
||||
///
|
||||
/// The string borrows the arena and cannot outlive it. Dropping the arena
|
||||
/// frees its memory without running `Drop` for the string contents.
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
pub struct ArenaString<'arena>(collections::String<'arena>);
|
||||
|
||||
impl<'arena> ArenaString<'arena> {
|
||||
/// Allocates a copy of `string` in `arena` and returns an [`ArenaString`].
|
||||
#[must_use]
|
||||
pub fn from_str_in(string: &str, arena: &'arena Arena) -> Self {
|
||||
Self(collections::String::from_str_in(string, &arena.bump))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena> Deref for ArenaString<'arena> {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena> AsRef<str> for ArenaString<'arena> {
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena> core::borrow::Borrow<str> for ArenaString<'arena> {
|
||||
fn borrow(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'arena> Display for ArenaString<'arena> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||
Display::fmt(&self.0, f)
|
||||
}
|
||||
}
|
376
rottlib/src/ast.rs
Normal file
376
rottlib/src/ast.rs
Normal file
@ -0,0 +1,376 @@
|
||||
use crate::arena::ArenaVec;
|
||||
|
||||
use super::lexer::TokenLocation;
|
||||
|
||||
use core::fmt;
|
||||
|
||||
use crate::arena::{Arena, ArenaNode, ArenaString};
|
||||
|
||||
// All inclusive!
|
||||
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
||||
pub struct AstSpan {
|
||||
pub from: TokenLocation,
|
||||
pub to: TokenLocation,
|
||||
}
|
||||
|
||||
impl AstSpan {
|
||||
pub fn merge(left_span: &AstSpan, right_span: &AstSpan) -> AstSpan {
|
||||
AstSpan {
|
||||
from: left_span.from,
|
||||
to: right_span.to,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(single_location: TokenLocation) -> AstSpan {
|
||||
AstSpan {
|
||||
from: single_location,
|
||||
to: single_location,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn range(from: TokenLocation, to: TokenLocation) -> AstSpan {
|
||||
AstSpan { from, to }
|
||||
}
|
||||
|
||||
pub fn extend_to(&mut self, right_most_location: TokenLocation) {
|
||||
if right_most_location > self.to {
|
||||
self.to = right_most_location
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum PrefixOperator {
|
||||
Not,
|
||||
Minus,
|
||||
BitwiseNot,
|
||||
Increment,
|
||||
Decrement,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum PostfixOperator {
|
||||
Increment,
|
||||
Decrement,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum InfixOperator {
|
||||
// Assignments
|
||||
Assign,
|
||||
MultiplyAssign,
|
||||
DivideAssign,
|
||||
ModuloAssign,
|
||||
PlusAssign,
|
||||
MinusAssign,
|
||||
ConcatAssign,
|
||||
ConcatSpaceAssign,
|
||||
// String operations
|
||||
ConcatSpace,
|
||||
Concat,
|
||||
// Logical
|
||||
And,
|
||||
Xor,
|
||||
Or,
|
||||
// Bit-wise
|
||||
BitwiseAnd,
|
||||
BitwiseOr,
|
||||
BitwiseXor,
|
||||
// Not-equal
|
||||
NotEqual,
|
||||
// Comparison
|
||||
Equal,
|
||||
ApproximatelyEqual,
|
||||
Less,
|
||||
LessEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
ClockwiseFrom,
|
||||
// Shifts
|
||||
LeftShift,
|
||||
LogicalRightShift,
|
||||
RightShift,
|
||||
// Terms
|
||||
Plus,
|
||||
Minus,
|
||||
// Modulo
|
||||
Modulo,
|
||||
// Factor
|
||||
Multiply,
|
||||
Divide,
|
||||
Dot,
|
||||
Cross,
|
||||
// Exponentiation
|
||||
Exponentiation,
|
||||
}
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
#[derive(Debug)]
|
||||
pub enum Expression<'src, 'arena> {
|
||||
Binary(
|
||||
ExpressionRef<'src, 'arena>,
|
||||
InfixOperator,
|
||||
ExpressionRef<'src, 'arena>,
|
||||
),
|
||||
LeftUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
|
||||
RightUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
|
||||
|
||||
Identifier(&'src str),
|
||||
String(ArenaString<'arena>),
|
||||
Integer(i128),
|
||||
Float(f64),
|
||||
|
||||
Bool(bool),
|
||||
None,
|
||||
Parentheses(ExpressionRef<'src, 'arena>),
|
||||
|
||||
Block {
|
||||
// All these end with `;`
|
||||
statements: ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||
// Last statement, but only if it doesn't end with `;`
|
||||
tail: Option<ExpressionRef<'src, 'arena>>,
|
||||
},
|
||||
If {
|
||||
condition: ExpressionRef<'src, 'arena>,
|
||||
body: ExpressionRef<'src, 'arena>,
|
||||
else_body: Option<ExpressionRef<'src, 'arena>>,
|
||||
},
|
||||
While {
|
||||
condition: ExpressionRef<'src, 'arena>,
|
||||
body: ExpressionRef<'src, 'arena>,
|
||||
},
|
||||
DoUntil {
|
||||
condition: ExpressionRef<'src, 'arena>,
|
||||
body: ExpressionRef<'src, 'arena>,
|
||||
},
|
||||
ForEach {
|
||||
iterator: ExpressionRef<'src, 'arena>,
|
||||
body: ExpressionRef<'src, 'arena>,
|
||||
},
|
||||
For {
|
||||
init: Option<ExpressionRef<'src, 'arena>>,
|
||||
condition: Option<ExpressionRef<'src, 'arena>>,
|
||||
step: Option<ExpressionRef<'src, 'arena>>,
|
||||
body: ExpressionRef<'src, 'arena>,
|
||||
},
|
||||
Switch {
|
||||
selector: ExpressionRef<'src, 'arena>,
|
||||
cases: ArenaVec<'arena, CaseRef<'src, 'arena>>,
|
||||
// default case
|
||||
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
|
||||
// last statement of the case block
|
||||
tail: Option<ExpressionRef<'src, 'arena>>,
|
||||
},
|
||||
Goto(ArenaString<'arena>),
|
||||
Continue,
|
||||
Break(Option<ExpressionRef<'src, 'arena>>),
|
||||
Return(Option<ExpressionRef<'src, 'arena>>),
|
||||
// For injecting in place of parts that couldn't be parsed
|
||||
// (along with text that wasn't able to be parsed)
|
||||
Error,
|
||||
}
|
||||
|
||||
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct VariableDeclarator<'src, 'arena> {
|
||||
pub name: ArenaString<'arena>,
|
||||
pub initializer: Option<ExpressionRef<'src, 'arena>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SwitchCase<'src, 'arena> {
|
||||
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>, // UScript allows expressions; multiple labels ok
|
||||
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, // allow fallthrough unless a Break/Goto ends it
|
||||
}
|
||||
|
||||
pub type CaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Statement<'src, 'arena> {
|
||||
// For the cases where user just used too many semi-colons `;;;;`
|
||||
Empty,
|
||||
Expression(ExpressionRef<'src, 'arena>),
|
||||
// Just declarations without assignment:
|
||||
// `local int i, j, k`
|
||||
LocalVariableDeclaration {
|
||||
type_name: ArenaString<'arena>,
|
||||
identifiers: ArenaVec<'arena, ArenaString<'arena>>,
|
||||
},
|
||||
// Just `int i, j = 3, k = 0`
|
||||
VariableDeclaration {
|
||||
type_name: ArenaString<'arena>,
|
||||
declarations: ArenaVec<'arena, VariableDeclarator<'src, 'arena>>,
|
||||
},
|
||||
Label(ArenaString<'arena>),
|
||||
// For injecting in place of parts that couldn't be parsed
|
||||
// (along with text that wasn't able to be parsed)
|
||||
Error,
|
||||
}
|
||||
|
||||
pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>;
|
||||
|
||||
impl<'src, 'arena> Expression<'src, 'arena> {
|
||||
pub fn new_prefix(
|
||||
arena: &'arena Arena,
|
||||
op_position: TokenLocation,
|
||||
op: PrefixOperator,
|
||||
rhs: ArenaNode<'arena, Self>,
|
||||
) -> ArenaNode<'arena, Self> {
|
||||
let span = AstSpan {
|
||||
from: op_position,
|
||||
to: rhs.span().to,
|
||||
};
|
||||
ArenaNode::new_in(Self::LeftUnary(op, rhs), span, arena)
|
||||
}
|
||||
pub fn new_postfix(
|
||||
arena: &'arena Arena,
|
||||
lhs: ArenaNode<'arena, Self>,
|
||||
op: PostfixOperator,
|
||||
op_position: TokenLocation,
|
||||
) -> ArenaNode<'arena, Self> {
|
||||
let span = AstSpan {
|
||||
from: lhs.span().from,
|
||||
to: op_position,
|
||||
};
|
||||
ArenaNode::new_in(Self::RightUnary(lhs, op), span, arena)
|
||||
}
|
||||
pub fn new_binary(
|
||||
arena: &'arena Arena,
|
||||
lhs: ArenaNode<'arena, Self>,
|
||||
op: InfixOperator,
|
||||
rhs: ArenaNode<'arena, Self>,
|
||||
) -> ArenaNode<'arena, Self> {
|
||||
let span = AstSpan::merge(&lhs.span(), &rhs.span());
|
||||
ArenaNode::new_in(Self::Binary(lhs, op, rhs), span, arena)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` for expressions that require `;` when used as a statement
|
||||
/// (i.e., everything except blocky control-flow forms).
|
||||
pub trait NeedsSemi {
|
||||
fn needs_semicolon(&self) -> bool;
|
||||
}
|
||||
|
||||
impl<'src, 'arena> NeedsSemi for Expression<'src, 'arena> {
|
||||
#[inline]
|
||||
fn needs_semicolon(&self) -> bool {
|
||||
match self {
|
||||
Expression::Block { .. }
|
||||
| Expression::If { .. }
|
||||
| Expression::While { .. }
|
||||
| Expression::DoUntil { .. }
|
||||
| Expression::ForEach { .. }
|
||||
| Expression::For { .. }
|
||||
| Expression::Error => false,
|
||||
|
||||
// All other expressions require `;` when used as a statement.
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If `ArenaNode<T>` derefs to `T`, this works as-is.
|
||||
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
|
||||
impl<'src, 'arena> NeedsSemi for ExpressionRef<'src, 'arena> {
|
||||
#[inline]
|
||||
fn needs_semicolon(&self) -> bool {
|
||||
(**self).needs_semicolon()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> NeedsSemi for Statement<'src, 'arena> {
|
||||
#[inline]
|
||||
fn needs_semicolon(&self) -> bool {
|
||||
match self {
|
||||
Statement::Empty | Statement::Label { .. } | Statement::Error { .. } => false,
|
||||
// All other expressions require `;` when used as a statement.
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If `ArenaNode<T>` derefs to `T`, this works as-is.
|
||||
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
|
||||
impl<'src, 'arena> NeedsSemi for StatementRef<'src, 'arena> {
|
||||
#[inline]
|
||||
fn needs_semicolon(&self) -> bool {
|
||||
(**self).needs_semicolon()
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for PrefixOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let s = match self {
|
||||
PrefixOperator::Not => "!",
|
||||
PrefixOperator::Minus => "-",
|
||||
PrefixOperator::BitwiseNot => "~",
|
||||
PrefixOperator::Increment => "++.",
|
||||
PrefixOperator::Decrement => "--.",
|
||||
};
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
||||
impl fmt::Display for PostfixOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let s = match self {
|
||||
PostfixOperator::Increment => ".++",
|
||||
PostfixOperator::Decrement => ".--",
|
||||
};
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
||||
impl fmt::Display for InfixOperator {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use InfixOperator::*;
|
||||
let s = match self {
|
||||
// Assignments
|
||||
Assign => "=",
|
||||
MultiplyAssign => "*=",
|
||||
DivideAssign => "/=",
|
||||
ModuloAssign => "%=",
|
||||
PlusAssign => "+=",
|
||||
MinusAssign => "-=",
|
||||
ConcatAssign => "$=",
|
||||
ConcatSpaceAssign => "@=",
|
||||
// String operations
|
||||
ConcatSpace => "@",
|
||||
Concat => "$",
|
||||
// Logical
|
||||
And => "&&",
|
||||
Xor => "^^",
|
||||
Or => "||",
|
||||
// Bitwise
|
||||
BitwiseAnd => "&",
|
||||
BitwiseOr => "|",
|
||||
BitwiseXor => "^",
|
||||
// Not equal
|
||||
NotEqual => "!=",
|
||||
// Comparison
|
||||
Equal => "==",
|
||||
ApproximatelyEqual => "~+",
|
||||
Less => "<",
|
||||
LessEqual => "<=",
|
||||
Greater => ">",
|
||||
GreaterEqual => ">=",
|
||||
ClockwiseFrom => "ClockwiseFrom",
|
||||
// Shift
|
||||
LeftShift => "<<",
|
||||
LogicalRightShift => ">>>",
|
||||
RightShift => ">>",
|
||||
// Term
|
||||
Plus => "+",
|
||||
Minus => "-",
|
||||
// Modulo
|
||||
Modulo => "%",
|
||||
// Factor
|
||||
Multiply => "*",
|
||||
Divide => "/",
|
||||
Dot => "Dot",
|
||||
Cross => "Cross",
|
||||
// Exp
|
||||
Exponentiation => "**",
|
||||
};
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
251
rottlib/src/diagnostics.rs
Normal file
251
rottlib/src/diagnostics.rs
Normal file
@ -0,0 +1,251 @@
|
||||
//! Diagnostics primitives for all stages of compiler and frontend code.
|
||||
//!
|
||||
//! These types describe what to show the user when something goes wrong while
|
||||
//! parsing or doing lightweight frontend checks. They are intentionally small,
|
||||
//! depend only on [`AstSpan`], and are easy to construct and store.
|
||||
|
||||
use crate::ast::AstSpan;
|
||||
|
||||
/// Classification of a diagnostic by its impact.
|
||||
///
|
||||
/// Choose the most restrictive level that reflects the state of the source and
|
||||
/// the compiler's ability to continue.
|
||||
///
|
||||
/// - `Error`: use when the source is invalid according to the language rules or
|
||||
/// the parser cannot make a sound interpretation. Errors typically prevent
|
||||
/// code generation. Examples: mismatched delimiters,
|
||||
/// missing required tokens, invalid escapes, unrecoverable ambiguity.
|
||||
/// - `Warning`: use when the source is valid but likely unintended, obsolete,
|
||||
/// or suboptimal. Warnings should not change program semantics if ignored
|
||||
/// and must not block compilation. Examples: deprecated syntax, shadowing
|
||||
/// that is allowed but suspicious, unreachable code after a return.
|
||||
/// Do not use warnings to paper over true syntax errors. If the construct
|
||||
/// is invalid, prefer [`Severity::Error`] even if recovery is possible.
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum Severity {
|
||||
/// Fatal to the current compilation unit or requires recovery.
|
||||
Error,
|
||||
/// Non-fatal advisory about suspicious but valid code.
|
||||
Warning,
|
||||
}
|
||||
|
||||
/// A labeled source span with a short inline message.
|
||||
///
|
||||
/// Message should be one sentence, start lowercase, and omit the final period.
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
pub struct Label {
|
||||
/// Span to highlight in source coordinates.
|
||||
pub span: AstSpan,
|
||||
/// Short inline text shown next to the caret line.
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// A single pure data diagnostic message with optional structured context.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[must_use]
|
||||
pub struct Diagnostic {
|
||||
/// Headline, e.g. "Mismatched closing delimiter: `}`".
|
||||
headline: String,
|
||||
/// Impact of the diagnostic. See [`Severity`] for guidance.
|
||||
severity: Severity,
|
||||
/// Optional stable identifier, e.g. "P0007" or "L0001".
|
||||
///
|
||||
/// Codes must match `^[LPTSXD][0-9]{4}$` where the prefix is the domain:
|
||||
/// `L` lexer, `P` parser, `T` type check, `S` semantics, `X` lints,
|
||||
/// `D` deprecations.
|
||||
///
|
||||
/// Codes help users search documentation and suppress or elevate specific
|
||||
/// diagnostics. Keep codes stable across releases once published.
|
||||
code: Option<String>,
|
||||
/// Marks the main location the user should look at first.
|
||||
///
|
||||
/// Typically the exact token or span that triggered the diagnostic.
|
||||
primary_label: Option<Label>,
|
||||
/// Supplemental locations that add context or cross-reference the primary
|
||||
/// site.
|
||||
///
|
||||
/// Examples: "matching delimiter was opened here", "declared here",
|
||||
/// "previous use here". Secondary labels should not compete with
|
||||
/// the primary.
|
||||
secondary_labels: Vec<Label>,
|
||||
/// A single actionable suggestion aimed at a quick fix. Keep it concise.
|
||||
help: Option<String>,
|
||||
/// Additional free-form lines not intended as a fix suggestion.
|
||||
///
|
||||
/// Use for technical details, references, or rationale. Keep each string to
|
||||
/// a single paragraph.
|
||||
notes: Vec<String>,
|
||||
}
|
||||
|
||||
impl Diagnostic {
|
||||
/// Construct a new error diagnostic with the given headline.
|
||||
///
|
||||
/// Use for invalid constructs that prevent a sound interpretation.
|
||||
pub fn error(headline: impl Into<String>) -> Self {
|
||||
Self {
|
||||
headline: headline.into(),
|
||||
severity: Severity::Error,
|
||||
code: None,
|
||||
primary_label: None,
|
||||
secondary_labels: Vec::new(),
|
||||
notes: Vec::new(),
|
||||
help: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a new warning diagnostic with the given headline.
|
||||
///
|
||||
/// Use for valid but suspicious or suboptimal constructs.
|
||||
pub fn warning(headline: impl Into<String>) -> Self {
|
||||
Self {
|
||||
headline: headline.into(),
|
||||
severity: Severity::Warning,
|
||||
code: None,
|
||||
primary_label: None,
|
||||
secondary_labels: Vec::new(),
|
||||
notes: Vec::new(),
|
||||
help: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` iff severity is [`Severity::Error`].
|
||||
pub fn stops_compilation(&self) -> bool {
|
||||
self.severity == Severity::Error
|
||||
}
|
||||
|
||||
/// Returns the diagnostic code if present.
|
||||
///
|
||||
/// See [DiagnosticBuilder::code] for code scheme.
|
||||
pub fn code(&self) -> Option<&str> {
|
||||
self.code.as_deref()
|
||||
}
|
||||
|
||||
/// Returns the primary label, if any.
|
||||
pub fn primary_label(&self) -> Option<&Label> {
|
||||
self.primary_label.as_ref()
|
||||
}
|
||||
|
||||
/// Returns the secondary labels in insertion order.
|
||||
pub fn secondary_labels(&self) -> &[Label] {
|
||||
&self.secondary_labels
|
||||
}
|
||||
|
||||
/// Returns the headline.
|
||||
pub fn headline(&self) -> &str {
|
||||
&self.headline
|
||||
}
|
||||
|
||||
/// Returns the severity.
|
||||
pub fn severity(&self) -> Severity {
|
||||
self.severity
|
||||
}
|
||||
|
||||
/// Returns the notes.
|
||||
pub fn notes(&self) -> &[String] {
|
||||
&self.notes
|
||||
}
|
||||
|
||||
/// Returns the help message, if any.
|
||||
pub fn help(&self) -> Option<&str> {
|
||||
self.help.as_deref()
|
||||
}
|
||||
}
|
||||
|
||||
/// A convenient diagnostic builder.
|
||||
#[derive(Debug)]
|
||||
#[must_use]
|
||||
pub struct DiagnosticBuilder {
|
||||
diagnostic: Diagnostic,
|
||||
}
|
||||
|
||||
impl DiagnosticBuilder {
|
||||
/// Creates a new builder for an error diagnostic with a given headline.
|
||||
pub fn error(headline: impl Into<String>) -> Self {
|
||||
Self {
|
||||
diagnostic: Diagnostic::error(headline),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new builder for a warning diagnostic with a given headline.
|
||||
pub fn warning(headline: impl Into<String>) -> Self {
|
||||
Self {
|
||||
diagnostic: Diagnostic::warning(headline),
|
||||
}
|
||||
}
|
||||
|
||||
/// Attach or replace the stable diagnostic code.
|
||||
///
|
||||
/// Codes identify classes of diagnostics across versions.
|
||||
/// Keep them short, ASCII-only, and stable. Prefer the scheme:
|
||||
///
|
||||
/// 1. Prefix = domain:
|
||||
/// - `L`: lexer (invalid char, unterminated string);
|
||||
/// - `P`: parser (mismatched delimiters, expected X found Y);
|
||||
/// - `T`: type check;
|
||||
/// - `S`: semantic analysis (name resolution, visibility);
|
||||
/// - `X`: style/lints (shadowing, dead code);
|
||||
/// - `D`: deprecations.
|
||||
///
|
||||
/// 2. Suffix = 4 digits, zero-padded: `0001`..`9999`.
|
||||
/// Example codes: `L0001`, `P0007`, `T0123`.
|
||||
///
|
||||
/// Codes are optional, but once published should not change.
|
||||
pub fn code(mut self, code: impl Into<String>) -> Self {
|
||||
self.diagnostic.code = Some(code.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Attach or replace a primary label.
|
||||
///
|
||||
/// One sentence, starting with lowercase letter, no period at the end.
|
||||
/// Since only one primary label can be specified, the previous primary is
|
||||
/// replaced.
|
||||
pub fn primary_label(mut self, span: AstSpan, message: impl Into<String>) -> Self {
|
||||
self.diagnostic.primary_label = Some(Label {
|
||||
span,
|
||||
message: message.into(),
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a secondary label.
|
||||
///
|
||||
/// One sentence, starting with lowercase letter, no period at the end.
|
||||
pub fn secondary_label(mut self, span: AstSpan, message: impl Into<String>) -> Self {
|
||||
self.diagnostic.secondary_labels.push(Label {
|
||||
span,
|
||||
message: message.into(),
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a free-form note line.
|
||||
///
|
||||
/// Can be several sentences, starting with uppercase letter and with period
|
||||
/// at the end.
|
||||
pub fn note(mut self, message: impl Into<String>) -> Self {
|
||||
self.diagnostic.notes.push(message.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the help message.
|
||||
///
|
||||
/// Can be several sentences, starting with uppercase letter and with period
|
||||
/// at the end.
|
||||
pub fn help(mut self, message: impl Into<String>) -> Self {
|
||||
self.diagnostic.help = Some(message.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Finishes building and returns the diagnostic.
|
||||
pub fn build(self) -> Diagnostic {
|
||||
self.diagnostic
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DiagnosticBuilder> for Diagnostic {
|
||||
fn from(diagnostic_builder: DiagnosticBuilder) -> Self {
|
||||
diagnostic_builder.build()
|
||||
}
|
||||
}
|
@ -47,7 +47,7 @@ impl<'src> DebugTools for super::TokenizedFile<'src> {
|
||||
// the remainder of a multi-line token that started earlier.
|
||||
(Some(origin_row), None) => {
|
||||
println!(
|
||||
"\t[Continued from line {} – no new tokens here]",
|
||||
"\t[Continued from line {} - no new tokens here]",
|
||||
origin_row + 1
|
||||
);
|
||||
}
|
||||
@ -69,13 +69,13 @@ impl<'src> DebugTools for super::TokenizedFile<'src> {
|
||||
|
||||
/// Helper that prints every span in `spans` together with its UTF-16
|
||||
/// column boundaries.
|
||||
fn dump_spans<'a>(spans: &[super::TokenPiece<'a>]) {
|
||||
fn dump_spans<'src>(spans: &[super::TokenPiece<'src>]) {
|
||||
let mut col_utf16 = 0usize;
|
||||
for span in spans {
|
||||
let start = col_utf16;
|
||||
let end = start + span.length_utf16;
|
||||
println!(
|
||||
"\t\t{:?} @ {}–{}: {:?}",
|
||||
"\t\t{:?} @ {}-{}: {:?}",
|
||||
span.token, start, end, span.lexeme
|
||||
);
|
||||
col_utf16 = end;
|
||||
|
@ -28,9 +28,7 @@ use super::{TokenLocation, TokenPiece, TokenizedFile};
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Tokens<'src> {
|
||||
/// [`TokenLocation`] of the next token to be returned.
|
||||
///
|
||||
/// [`None`] means the iterator has been exhausted.
|
||||
cursor: Option<TokenLocation>,
|
||||
cursor: TokenLocation,
|
||||
/// [`TokenizedFile`] whose tokens we're iterating over.
|
||||
source_file: &'src TokenizedFile<'src>,
|
||||
/// When `true`, whitespace tokens are skipped.
|
||||
@ -51,50 +49,57 @@ impl<'src> Tokens<'src> {
|
||||
|
||||
// Returns the position of the next new token, skipping carried-over pieces
|
||||
// and blank lines.
|
||||
fn advance_position(&self, mut position: TokenLocation) -> Option<TokenLocation> {
|
||||
if let Some(current_line) = self.source_file.lines.get(position.line) {
|
||||
fn advance_position(&self, position: TokenLocation) -> TokenLocation {
|
||||
let TokenLocation::Position {
|
||||
mut line,
|
||||
mut column,
|
||||
} = position
|
||||
else {
|
||||
return TokenLocation::EndOfFile;
|
||||
};
|
||||
if let Some(current_line) = self.source_file.lines.get(line) {
|
||||
// `Line::len()` also counts a possible token that continued from
|
||||
// the previous line.
|
||||
if position.column + 1 < current_line.len() {
|
||||
position.column += 1;
|
||||
return Some(position);
|
||||
if column + 1 < current_line.len() {
|
||||
column += 1;
|
||||
return TokenLocation::Position { line, column };
|
||||
}
|
||||
}
|
||||
// Current line is exhausted: walk downward until we find the first line
|
||||
// that **owns local tokens**, because we only want *new* token,
|
||||
// not continued from previous lines (they were already iterated over).
|
||||
position.line += 1;
|
||||
while let Some(next_line) = self.source_file.lines.get(position.line) {
|
||||
line += 1;
|
||||
while let Some(next_line) = self.source_file.lines.get(line) {
|
||||
if next_line.local_range().is_some() {
|
||||
// Start at the first *local* token,
|
||||
// skipping any carried-over one
|
||||
position.column = if next_line.continued_from.is_some() {
|
||||
column = if next_line.continued_from.is_some() {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
return Some(position);
|
||||
return TokenLocation::Position { line, column };
|
||||
}
|
||||
position.line += 1; // keep skipping empty / pure-carried lines
|
||||
line += 1; // keep skipping empty / pure-carried lines
|
||||
}
|
||||
// No more tokens.
|
||||
None
|
||||
TokenLocation::EndOfFile
|
||||
}
|
||||
|
||||
// Creates a new iterator.
|
||||
fn new(source_file: &'src TokenizedFile) -> Tokens<'src> {
|
||||
let mut new_iterator = Tokens {
|
||||
source_file,
|
||||
cursor: Some(TokenLocation { line: 0, column: 0 }),
|
||||
cursor: TokenLocation::Position { line: 0, column: 0 },
|
||||
skip_whitespace: false,
|
||||
};
|
||||
// We need to land on the first existing token so [`Iterator::next`]
|
||||
// can assume cursor is valid.
|
||||
while let Some(token_position) = new_iterator.cursor {
|
||||
if new_iterator.source_file.get(token_position).is_some() {
|
||||
while new_iterator.cursor != TokenLocation::EndOfFile {
|
||||
if new_iterator.source_file.get(new_iterator.cursor).is_some() {
|
||||
break;
|
||||
}
|
||||
new_iterator.cursor = new_iterator.advance_position(token_position);
|
||||
new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor);
|
||||
}
|
||||
new_iterator
|
||||
}
|
||||
@ -105,16 +110,17 @@ impl<'src> Iterator for Tokens<'src> {
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// We only ever loop to discard whitespaces when the flag is on
|
||||
loop {
|
||||
let current_cursor = self.cursor?;
|
||||
let token_piece = *self.source_file.get(current_cursor)?;
|
||||
self.cursor = self.advance_position(current_cursor);
|
||||
while self.cursor != TokenLocation::EndOfFile {
|
||||
let token_location = self.cursor;
|
||||
let token_piece = *self.source_file.get(self.cursor)?;
|
||||
self.cursor = self.advance_position(self.cursor);
|
||||
|
||||
// Optional whitespace-skip
|
||||
if !self.skip_whitespace || !token_piece.token.is_whitespace() {
|
||||
return Some((current_cursor, token_piece));
|
||||
return Some((token_location, token_piece));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@ -139,7 +145,7 @@ impl<'src> TokenizedFile<'src> {
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// use mycrate::{TokenizedFile, TokenLocation, Token};
|
||||
/// use super::{TokenizedFile, TokenLocation, Token};
|
||||
/// let file = TokenizedFile::from_str("0 / 0");
|
||||
/// assert_eq!(
|
||||
/// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),
|
||||
@ -148,8 +154,11 @@ impl<'src> TokenizedFile<'src> {
|
||||
/// ```
|
||||
#[track_caller]
|
||||
pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> {
|
||||
let line = self.lines.get(position.line)?;
|
||||
let column = position.column;
|
||||
let TokenLocation::Position { line, column } = position else {
|
||||
return None;
|
||||
};
|
||||
let line = self.lines.get(line)?;
|
||||
let column = column;
|
||||
if column >= line.len() {
|
||||
return None;
|
||||
}
|
||||
|
@ -2,15 +2,15 @@
|
||||
//!
|
||||
//! ## Notable details
|
||||
//!
|
||||
//! Lexer for UnrealScript that recognizes inline `cpptext { … }` blocks.
|
||||
//! Lexer for UnrealScript that recognizes inline `cpptext { ... }` blocks.
|
||||
//!
|
||||
//! In UnrealScript, `cpptext` lets authors embed raw C++ between braces.
|
||||
//! Because whitespace, newlines, or comments may appear between the
|
||||
//! `cpptext` keyword and the opening `{`, the lexer must remember that
|
||||
//! it has just seen `cpptext` - hence a state machine.
|
||||
//!
|
||||
//! Modes
|
||||
//! ------
|
||||
//! ## Modes
|
||||
//!
|
||||
//! - **Normal** - ordinary UnrealScript tokens.
|
||||
//! - **AwaitingCppBlock** - after `cpptext`, waiting for the next `{`.
|
||||
//!
|
||||
@ -170,6 +170,8 @@ pub enum Token {
|
||||
NativeReplication,
|
||||
|
||||
// # Control-flow keywords
|
||||
#[regex("(?i)goto")]
|
||||
Goto,
|
||||
#[regex("(?i)if")]
|
||||
If,
|
||||
#[regex("(?i)else")]
|
||||
@ -265,9 +267,9 @@ pub enum Token {
|
||||
Minus,
|
||||
// ## String manipulation
|
||||
#[token("@")]
|
||||
AtChar,
|
||||
ConcatSpace,
|
||||
#[token("$")]
|
||||
DollarChar,
|
||||
Concat,
|
||||
// ## Shifts
|
||||
#[token("<<")]
|
||||
LeftShift,
|
||||
@ -326,9 +328,9 @@ pub enum Token {
|
||||
|
||||
// # Punctuation & delimiters
|
||||
#[token("(")]
|
||||
LeftParen,
|
||||
LeftParenthesis,
|
||||
#[token(")")]
|
||||
RightParen,
|
||||
RightParenthesis,
|
||||
#[token("{", handle_brace)]
|
||||
Brace(BraceKind),
|
||||
#[token("}")]
|
||||
@ -356,7 +358,7 @@ pub enum Token {
|
||||
#[regex(r"/\*", handle_block_comment)]
|
||||
BlockComment,
|
||||
#[regex(r"\r\n|\n|\r")]
|
||||
NewLine,
|
||||
Newline,
|
||||
#[regex(r"[ \t]+")]
|
||||
Whitespace,
|
||||
|
||||
@ -367,7 +369,7 @@ pub enum Token {
|
||||
impl Token {
|
||||
/// Returns `true` if this token is a newline (`Token::NewLine`).
|
||||
pub fn is_newline(&self) -> bool {
|
||||
matches!(self, Token::NewLine)
|
||||
matches!(self, Token::Newline)
|
||||
}
|
||||
|
||||
/// Returns `true` if this token is trivia whitespace
|
||||
@ -375,7 +377,7 @@ impl Token {
|
||||
///
|
||||
/// Note: comments are **not** considered whitespace.
|
||||
pub fn is_whitespace(&self) -> bool {
|
||||
matches!(&self, Token::Whitespace | Token::NewLine)
|
||||
matches!(&self, Token::Whitespace | Token::Newline)
|
||||
}
|
||||
|
||||
/// Returns `true` if this token may span multiple physical lines
|
||||
@ -386,6 +388,22 @@ impl Token {
|
||||
Token::BlockComment | Token::Brace(BraceKind::CppBlock) | Token::Error
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns `true` if this token can appear in type position
|
||||
/// (either a built-in type keyword or an identifier).
|
||||
pub fn is_valid_type_name_token(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Token::Int
|
||||
| Token::Float
|
||||
| Token::Bool
|
||||
| Token::Byte
|
||||
| Token::String
|
||||
| Token::Array
|
||||
| Token::Name
|
||||
| Token::Identifier
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume a /* ... */ block comment with arbitrary nesting
|
||||
@ -476,7 +494,7 @@ fn consume_cpp_block(lexer: &mut Lexer<Token>) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Consume over a C-style `/* … */` comment (without nesting).
|
||||
/// Consume over a C-style `/* ... */` comment (without nesting).
|
||||
///
|
||||
/// Assumes that opener `/*` is already consumed.
|
||||
fn consume_c_comment(lexer: &mut Lexer<Token>) {
|
||||
|
@ -23,11 +23,11 @@
|
||||
//! compiled with `debug` feature enabled. They live in the [`debug_tools`]
|
||||
//! extension trait, implemented for [`TokenizedFile`].
|
||||
//!
|
||||
//! ```
|
||||
//! ```rust
|
||||
//! // bring the trait into scope
|
||||
//! use lexer::DebugTools;
|
||||
//!
|
||||
//! let file = TokenizedFile::from_str(src);
|
||||
//! let file = TokenizedFile::from_str("local int myValue;");
|
||||
//! file.debug_dump(); // pretty-print token layout
|
||||
//! let text = file.to_source(); // reconstruct original text
|
||||
//! ```
|
||||
@ -43,7 +43,7 @@ use logos::Logos;
|
||||
#[cfg(any(debug_assertions, feature = "debug"))]
|
||||
pub use debug_tools::DebugTools;
|
||||
pub use iterator::Tokens;
|
||||
pub use lexing::Token;
|
||||
pub use lexing::{BraceKind, Token};
|
||||
|
||||
/// Empirically chosen starting size for token buffer (used during tokenization)
|
||||
/// that provides good performance.
|
||||
@ -64,15 +64,22 @@ pub struct TokenPiece<'src> {
|
||||
pub length_utf16: usize,
|
||||
}
|
||||
|
||||
/// Defines location of a token inside [`TokenizedFile`] in a way, convenient
|
||||
/// Defines location of a token inside [`TokenizedFile`] in a form convenient
|
||||
/// for communicating through LSP.
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct TokenLocation {
|
||||
/// 0-based line number.
|
||||
pub line: usize,
|
||||
/// 0-based index of a token in the line, possibly including the token that
|
||||
/// has continued from the previous line.
|
||||
pub column: usize,
|
||||
pub enum TokenLocation {
|
||||
/// Actual position of some token in the file.
|
||||
Position {
|
||||
/// 0-based line number.
|
||||
line: usize,
|
||||
/// 0-based index of a token in the line, possibly including the token that
|
||||
/// has continued from the previous line.
|
||||
///
|
||||
/// Columns count tokens, not bytes or chars.
|
||||
column: usize,
|
||||
},
|
||||
/// Position af the end-of-file.
|
||||
EndOfFile,
|
||||
}
|
||||
|
||||
/// A tokenized, lossless representation of an UnrealScript source file.
|
||||
@ -102,6 +109,10 @@ struct Tokenizer<'src> {
|
||||
slice_start_index: usize,
|
||||
/// When a multi-line token is being scanned, stores the 0-based line
|
||||
/// on which it started; [`None`] otherwise.
|
||||
///
|
||||
/// `Some(line_idx)` iff the current line is within a multi-line token that
|
||||
/// started on `line_idx`; it is consumed exactly once by
|
||||
/// [`Self::commit_current_line`].
|
||||
multi_line_start: Option<usize>,
|
||||
/// Set to [`true`] if the lexer reported any error tokens.
|
||||
had_errors: bool,
|
||||
@ -141,7 +152,7 @@ impl<'src> TokenizedFile<'src> {
|
||||
/// ```rust
|
||||
/// let tokenized_file = TokenizedFile::from_str("function test() {}");
|
||||
/// if tokenized_file.has_errors() {
|
||||
/// println!("Error while parsing file: {}", path.display());
|
||||
/// println!("Error while parsing file.");
|
||||
/// }
|
||||
/// ```
|
||||
#[inline]
|
||||
@ -170,7 +181,7 @@ type TokenIdx = usize;
|
||||
|
||||
/// Representation of a single physical line of the source file.
|
||||
///
|
||||
/// [`Range<TokenIndex>`] are used instead of slices to avoid creating
|
||||
/// [`Range<TokenIdx>`] are used instead of slices to avoid creating
|
||||
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
|
||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||
struct Line {
|
||||
@ -214,7 +225,7 @@ impl Line {
|
||||
/// Returns a range of tokens inside [`TokenizedFile::buffer`] that start
|
||||
/// on this line.
|
||||
///
|
||||
/// [`None`] means there is no such tokens. Otherwise range is guaranteed
|
||||
/// [`None`] means there are no such tokens. Otherwise range is guaranteed
|
||||
/// to not be empty.
|
||||
#[inline]
|
||||
fn local_range(&self) -> Option<Range<TokenIdx>> {
|
||||
@ -225,7 +236,7 @@ impl Line {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns amount of tokens of the line.
|
||||
/// Returns the number of tokens on this line.
|
||||
///
|
||||
/// Counts both tokens that started on this line and tokens that continued
|
||||
/// from previous one.
|
||||
@ -246,7 +257,8 @@ impl<'src> Tokenizer<'src> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles tokens that never span multiple lines.
|
||||
/// Handles simple tokens that *never* span multiple lines, allowing us to
|
||||
/// skip a lot of work.
|
||||
fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
||||
if token_piece.token.is_newline() {
|
||||
self.line_number += 1;
|
||||
@ -257,7 +269,7 @@ impl<'src> Tokenizer<'src> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles tokens that may contain one or more newline characters.
|
||||
/// Handles tokens that might contain one or more newline characters.
|
||||
fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
||||
let start_line = self.line_number;
|
||||
let newline_count = count_line_breaks(token_piece.lexeme);
|
||||
@ -271,12 +283,15 @@ impl<'src> Tokenizer<'src> {
|
||||
// We only need to commit the line if this token actually ended the line
|
||||
if newline_count > 0 {
|
||||
self.commit_current_line();
|
||||
// We only need to insert one `Line::Spanned(base)` per *interior*
|
||||
// newline, so `newline_count - 1` such lines
|
||||
// (e.g. 2 line breaks in block comment -> it has
|
||||
// exactly `1` interior line)
|
||||
let insert_count = newline_count - 1;
|
||||
for _ in 0..insert_count {
|
||||
// We only need to insert one `Line::spanned(start_line)` per
|
||||
// *interior* line:
|
||||
//
|
||||
// standalone | local int i = /* Now we start long comment
|
||||
// spanned | with three line breaks and *exactly* two
|
||||
// spanned | inner lines that contain nothing but
|
||||
// spanned_with_tokens | comment bytes! */ = 0;
|
||||
let inner_lines_count = newline_count - 1;
|
||||
for _ in 0..inner_lines_count {
|
||||
self.lines.push(Line::spanned(start_line));
|
||||
}
|
||||
// This is called *after* `commit_current_line()` cleared previous
|
||||
@ -313,7 +328,7 @@ impl<'src> Tokenizer<'src> {
|
||||
/// Finishes tokenization, converting accumulated data into
|
||||
/// [`TokenizedFile`].
|
||||
fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
|
||||
// Commit any trailing tokens
|
||||
// Flush trailing tokens for which `commit` wasn't auto triggered
|
||||
self.commit_current_line();
|
||||
// If we still have a `multi_line_start`
|
||||
// (i.e. a pure multi-line token with no local tokens on its last line),
|
||||
@ -322,7 +337,6 @@ impl<'src> Tokenizer<'src> {
|
||||
self.lines.push(Line::spanned(from));
|
||||
}
|
||||
|
||||
// Optimize for size
|
||||
self.buffer.shrink_to_fit();
|
||||
self.lines.shrink_to_fit();
|
||||
|
||||
@ -343,7 +357,7 @@ fn make_token_piece<'src>(token: Token, text: &'src str) -> TokenPiece<'src> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of new lines in given text.
|
||||
/// Counts the number of newlines in given text.
|
||||
fn count_line_breaks(text: &str) -> usize {
|
||||
let mut bytes_iterator = text.as_bytes().iter().peekable();
|
||||
let mut newline_count = 0;
|
||||
|
@ -1,3 +1,7 @@
|
||||
#![allow(clippy::doc_overindented_list_items)]
|
||||
|
||||
pub mod arena;
|
||||
pub mod ast;
|
||||
pub mod diagnostics;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
|
230
rottlib/src/parser/cursor.rs
Normal file
230
rottlib/src/parser/cursor.rs
Normal file
@ -0,0 +1,230 @@
|
||||
//! Cursor utilities for a token stream.
|
||||
//!
|
||||
//! Provides memoized lookahead over significant tokens and attaches
|
||||
//! trivia to [`TriviaComponent`]. Significant tokens exclude whitespace and
|
||||
//! comments; see [`crate::parser::TriviaKind`].
|
||||
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::trivia::TriviaComponent;
|
||||
|
||||
/// Cursor over a token stream with memoized lookahead and trivia attachment.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct CursorComponent<'src> {
|
||||
/// Underlying token stream.
|
||||
tokens: crate::lexer::Tokens<'src>,
|
||||
/// Significant-token lookahead buffer.
|
||||
lookahead_buffer: std::collections::VecDeque<(TokenLocation, crate::lexer::TokenPiece<'src>)>,
|
||||
/// Location of the last consumed token.
|
||||
previous_location: Option<TokenLocation>,
|
||||
/// Location of the last significant token.
|
||||
///
|
||||
/// Used to associate following trivia with the correct token.
|
||||
last_significant_location: Option<TokenLocation>,
|
||||
/// Scratch space for [`CursorComponent::buffer_next_significant_token`],
|
||||
/// used to avoid reallocations.
|
||||
trivia_buffer: Vec<crate::parser::trivia::TriviaToken<'src>>,
|
||||
}
|
||||
|
||||
impl<'src> CursorComponent<'src> {
|
||||
/// Create a [`CursorComponent`] over the tokens of `file`.
|
||||
pub(crate) fn new(tokenized_file: &'src crate::lexer::TokenizedFile<'src>) -> Self {
|
||||
Self {
|
||||
tokens: tokenized_file.tokens(),
|
||||
lookahead_buffer: std::collections::VecDeque::new(),
|
||||
previous_location: None,
|
||||
last_significant_location: None,
|
||||
trivia_buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure the lookahead buffer contains at least `lookahead + 1`
|
||||
/// significant tokens.
|
||||
///
|
||||
/// May consume trivia from the underlying stream.
|
||||
/// Does not consume significant tokens.
|
||||
fn ensure_min_lookahead(&mut self, lookahead: usize, trivia: &mut TriviaComponent<'src>) {
|
||||
while self.lookahead_buffer.len() <= lookahead {
|
||||
if !self.buffer_next_significant_token(trivia) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scan to the next significant token, recording intervening trivia.
|
||||
///
|
||||
/// Returns `true` if a significant token was buffered,
|
||||
/// `false` on end of file.
|
||||
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaComponent<'src>) -> bool {
|
||||
self.trivia_buffer.clear();
|
||||
while let Some((token_location, token_piece)) = self.tokens.next() {
|
||||
if let Ok(trivia_kind) = crate::parser::TriviaKind::try_from(token_piece.token) {
|
||||
self.trivia_buffer.push(crate::parser::TriviaToken {
|
||||
kind: trivia_kind,
|
||||
text: token_piece.lexeme,
|
||||
location: token_location,
|
||||
});
|
||||
} else {
|
||||
// Attach trivia found after the previous significant token
|
||||
if !self.trivia_buffer.is_empty() {
|
||||
trivia.record_between_locations(
|
||||
self.last_significant_location,
|
||||
token_location,
|
||||
&mut self.trivia_buffer,
|
||||
);
|
||||
}
|
||||
self.lookahead_buffer
|
||||
.push_back((token_location, token_piece));
|
||||
self.last_significant_location = Some(token_location);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// Reached end-of-file: attach trailing trivia
|
||||
if !self.trivia_buffer.is_empty() {
|
||||
trivia.record_between_locations(
|
||||
self.last_significant_location,
|
||||
TokenLocation::EndOfFile,
|
||||
&mut self.trivia_buffer,
|
||||
);
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Returns the next token without consuming it.
|
||||
///
|
||||
/// Returns [`None`] if no tokens remain.
|
||||
#[must_use]
|
||||
pub(crate) fn peek_token(&mut self) -> Option<Token> {
|
||||
self.peek_entry().map(|(_, token_piece)| token_piece.token)
|
||||
}
|
||||
|
||||
/// Returns the next token, its lexeme, and its location
|
||||
/// without consuming it.
|
||||
///
|
||||
/// Returns [`None`] if no tokens remain.
|
||||
#[must_use]
|
||||
pub(crate) fn peek_token_lexeme_and_location(
|
||||
&mut self,
|
||||
) -> Option<(Token, &'src str, TokenLocation)> {
|
||||
self.peek_entry().map(|(token_location, token_piece)| {
|
||||
(token_piece.token, token_piece.lexeme, *token_location)
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the next token and its lexeme without consuming it.
|
||||
///
|
||||
/// Returns [`None`] if no tokens remain.
|
||||
#[must_use]
|
||||
pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> {
|
||||
self.peek_entry()
|
||||
.map(|(_, token_piece)| (token_piece.token, token_piece.lexeme))
|
||||
}
|
||||
|
||||
/// Returns the next token and its location without consuming it.
|
||||
///
|
||||
/// Returns [`None`] if no tokens remain.
|
||||
#[must_use]
|
||||
pub(crate) fn peek_token_and_location(&mut self) -> Option<(Token, TokenLocation)> {
|
||||
self.peek_entry()
|
||||
.map(|(token_location, token_piece)| (token_piece.token, *token_location))
|
||||
}
|
||||
|
||||
/// Returns the location of the next token, or [`TokenLocation::EndOfFile`]
|
||||
/// if none remain.
|
||||
#[must_use]
|
||||
pub(crate) fn peek_location(&mut self) -> TokenLocation {
|
||||
self.peek_entry()
|
||||
.map(|(token_location, _)| *token_location)
|
||||
.unwrap_or(TokenLocation::EndOfFile)
|
||||
}
|
||||
|
||||
/// Returns the location of the last token that was actually consumed
|
||||
/// by [`crate::parser::Parser::advance`].
|
||||
///
|
||||
/// Returns [`None`] if no tokens have been consumed yet.
|
||||
#[must_use]
|
||||
pub(crate) fn last_consumed_location(&self) -> Option<TokenLocation> {
|
||||
self.cursor.previous_location
|
||||
}
|
||||
|
||||
/// Returns the most recent location the parser is "at".
|
||||
///
|
||||
/// If at least one token has been consumed, this is the location of the
|
||||
/// last consumed token. Otherwise it falls back to the location of the
|
||||
/// first significant token in the stream (or [`TokenLocation::EndOfFile`]
|
||||
/// if the stream is empty).
|
||||
#[must_use]
|
||||
pub(crate) fn last_visited_location(&mut self) -> TokenLocation {
|
||||
// Only has to `unwrap` before *any* characters were consumed
|
||||
self.last_consumed_location()
|
||||
.unwrap_or_else(|| self.peek_location())
|
||||
}
|
||||
|
||||
/// Peeks the token at `lookahead` (`0` is the next token)
|
||||
/// without consuming.
|
||||
///
|
||||
/// Returns `None` if the stream ends before that position.
|
||||
#[must_use]
|
||||
pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> {
|
||||
self.cursor
|
||||
.ensure_min_lookahead(lookahead, &mut self.trivia);
|
||||
self.cursor
|
||||
.lookahead_buffer
|
||||
.get(lookahead)
|
||||
.map(|(_, token_piece)| token_piece.token)
|
||||
}
|
||||
|
||||
/// Advances by one significant token.
|
||||
///
|
||||
/// Trivia is internally handled and recorded.
|
||||
/// Does nothing at the end-of-file.
|
||||
pub(crate) fn advance(&mut self) {
|
||||
self.cursor.ensure_min_lookahead(0, &mut self.trivia);
|
||||
if let Some((location, _)) = self.cursor.lookahead_buffer.pop_front() {
|
||||
self.cursor.previous_location = Some(location);
|
||||
}
|
||||
}
|
||||
|
||||
/// If the next token equals `token`, consumes it and returns `true`.
|
||||
///
|
||||
/// Otherwise leaves the cursor unchanged and returns `false`.
|
||||
/// Trivia is recorded automatically.
|
||||
pub(crate) fn eat(&mut self, token: Token) -> bool {
|
||||
let correct_token = self.peek_token() == Some(token);
|
||||
if correct_token {
|
||||
self.advance();
|
||||
}
|
||||
correct_token
|
||||
}
|
||||
|
||||
/// Centralized peek used by public peekers.
|
||||
fn peek_entry(&mut self) -> Option<&(TokenLocation, crate::lexer::TokenPiece<'src>)> {
|
||||
self.cursor.ensure_min_lookahead(0, &mut self.trivia);
|
||||
self.cursor.lookahead_buffer.front()
|
||||
}
|
||||
|
||||
/// Expects `expected` at the current position.
|
||||
///
|
||||
/// On match consumes the token and returns its [`TokenLocation`].
|
||||
/// Otherwise returns a [`crate::parser::ParseError`] of
|
||||
/// the given [`crate::parser::ParseErrorKind`] that carries the current
|
||||
/// span for diagnostics.
|
||||
pub(crate) fn expect(
|
||||
&mut self,
|
||||
expected: Token,
|
||||
error_kind: crate::parser::ParseErrorKind,
|
||||
) -> crate::parser::ParseResult<'src, 'arena, TokenLocation> {
|
||||
let token_position = self.peek_location();
|
||||
// `Token` only includes type information, so comparison is valid
|
||||
if self.peek_token() == Some(expected) {
|
||||
self.advance();
|
||||
Ok(token_position)
|
||||
} else {
|
||||
Err(crate::parser::ParseError {
|
||||
kind: error_kind,
|
||||
source_span: crate::ast::AstSpan::new(token_position),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
95
rottlib/src/parser/errors.rs
Normal file
95
rottlib/src/parser/errors.rs
Normal file
@ -0,0 +1,95 @@
|
||||
//! Submodule with parsing related errors.
|
||||
|
||||
use crate::ast::AstSpan;
|
||||
|
||||
/// Internal parse error kinds.
|
||||
///
|
||||
/// Used by the parser as a compact signal for later construction of user-facing
|
||||
/// diagnostics.
|
||||
///
|
||||
/// Naming convention:
|
||||
/// - Prefix identifies the syntactic construct
|
||||
/// (`Expression`, `For`, `Switch`, etc.).
|
||||
/// - Suffix describes the exact problem (`MissingClosingParenthesis`,
|
||||
/// `UnexpectedToken`, `MultipleDefaults`, etc.).
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
pub enum ParseErrorKind {
|
||||
/// Expression inside `(...)` could not be parsed and no closing `)`
|
||||
/// was found.
|
||||
ExpressionMissingClosingParenthesis,
|
||||
/// A `do` block was not followed by a matching `until`.
|
||||
DoMissingUntil,
|
||||
/// Found an unexpected token while parsing an expression.
|
||||
ExpressionUnexpectedToken,
|
||||
/// A `for` loop is missing its opening `(`.
|
||||
ForMissingOpeningParenthesis,
|
||||
/// The first `;` in `for (init; cond; step)` is missing.
|
||||
ForMissingInitializationSemicolon,
|
||||
/// The second `;` in `for (init; cond; step)` is missing.
|
||||
ForMissingConditionSemicolon,
|
||||
/// The closing `)` of a `for` loop is missing.
|
||||
ForMissingClosingParenthesis,
|
||||
/// An expression inside a block is not terminated with `;`.
|
||||
BlockMissingSemicolonAfterExpression,
|
||||
/// A statement inside a block is not terminated with `;`.
|
||||
BlockMissingSemicolonAfterStatement,
|
||||
/// `switch` has no body (missing matching braces).
|
||||
SwitchMissingBody,
|
||||
/// The first top-level item in a `switch` body is not a `case`.
|
||||
SwitchTopLevelItemNotCase,
|
||||
/// A `case` arm is missing the trailing `:`.
|
||||
SwitchCaseMissingColon,
|
||||
/// Found more than one `default` branch.
|
||||
SwitchDuplicateDefault,
|
||||
/// Found `case` arms after a `default` branch.
|
||||
SwitchCasesAfterDefault,
|
||||
/// A `goto` was not followed by a label.
|
||||
GotoMissingLabel,
|
||||
/// Unexpected end of input while parsing.
|
||||
UnexpectedEndOfFile,
|
||||
/// Token looked like a numeric literal but could not be parsed as one.
|
||||
InvalidNumericLiteral,
|
||||
/// A bare expression appeared in a `switch` arm but was not the final arm.
|
||||
///
|
||||
/// Such an expression must be terminated with `;` or be the final arm.
|
||||
SwitchBareExpressionBeforeNextArm,
|
||||
/// A `local` declaration is missing its first identifier.
|
||||
///
|
||||
/// At least one variable name must follow the type.
|
||||
LocalMissingIdentifier,
|
||||
/// A `local` declaration was followed by a token that cannot serve
|
||||
/// as a type name.
|
||||
LocalInvalidTypeName,
|
||||
/// Invalid variable name identifier in `local` variable definition.
|
||||
LocalBadVariableIdentifier,
|
||||
/// An initializer appears in a `local` variable declaration.
|
||||
LocalInitializerNotAllowed,
|
||||
/// A non-`local` variable declaration is missing its first identifier.
|
||||
///
|
||||
/// At least one variable name must follow the type.
|
||||
DeclMissingIdentifier,
|
||||
/// Invalid variable name identifier in non-`local` variable definition.
|
||||
DeclBadVariableIdentifier,
|
||||
}
|
||||
|
||||
/// Enumerates all specific kinds of parsing errors that the parser can emit.
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
#[must_use]
|
||||
pub struct ParseError {
|
||||
/// The specific kind of parse error that occurred.
|
||||
pub kind: ParseErrorKind,
|
||||
/// The source span in which the error was detected.
|
||||
pub source_span: AstSpan,
|
||||
}
|
||||
|
||||
pub type ParseResult<'src, 'arena, T> = Result<T, ParseError>;
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
#[must_use]
|
||||
pub(crate) fn make_error_here(&mut self, error_kind: ParseErrorKind) -> ParseError {
|
||||
ParseError {
|
||||
kind: error_kind,
|
||||
source_span: AstSpan::new(self.peek_location()),
|
||||
}
|
||||
}
|
||||
}
|
60
rottlib/src/parser/grammar/block.rs
Normal file
60
rottlib/src/parser/grammar/block.rs
Normal file
@ -0,0 +1,60 @@
|
||||
use crate::ast::Expression;
|
||||
use crate::lexer::Token;
|
||||
use crate::parser::ParseErrorKind;
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Parses a block `{ ... }` after `{`.
|
||||
///
|
||||
/// Consumes tokens until the matching `}` and returns
|
||||
/// an [`Expression::Block`] spanning from the opening `{` to
|
||||
/// the closing `}`.
|
||||
/// Returns a best-effort block on premature end-of-file.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_block_cont(
|
||||
&mut self,
|
||||
block_start_location: crate::lexer::TokenLocation,
|
||||
) -> crate::ast::ExpressionRef<'src, 'arena> {
|
||||
let mut statements = self.arena.vec();
|
||||
let mut tail = None;
|
||||
loop {
|
||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
||||
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
|
||||
return self.arena.alloc(
|
||||
Expression::Block { statements, tail },
|
||||
crate::ast::AstSpan {
|
||||
from: block_start_location,
|
||||
to: self.peek_location(),
|
||||
},
|
||||
);
|
||||
};
|
||||
if let Token::RightBrace = token {
|
||||
self.advance(); // '}'
|
||||
let block_span = crate::ast::AstSpan {
|
||||
from: block_start_location,
|
||||
to: token_location,
|
||||
};
|
||||
return self
|
||||
.arena
|
||||
.alloc(Expression::Block { statements, tail }, block_span);
|
||||
}
|
||||
// We know that at this point:
|
||||
// 1. There is still a token and it is not end-of-file;
|
||||
// 2. It isn't end of the block.
|
||||
// So having a tail statement there is a problem!
|
||||
if let Some(tail_expression) = tail {
|
||||
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
|
||||
let tail_span = *tail_expression.span();
|
||||
let node = self.arena.alloc(
|
||||
crate::ast::Statement::Expression(tail_expression),
|
||||
tail_span,
|
||||
);
|
||||
statements.push(node);
|
||||
}
|
||||
tail = self.parse_block_item(&mut statements);
|
||||
// Ensure forward progress under errors to avoid infinite loops.
|
||||
if self.peek_location() <= token_location {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
180
rottlib/src/parser/grammar/control.rs
Normal file
180
rottlib/src/parser/grammar/control.rs
Normal file
@ -0,0 +1,180 @@
|
||||
use crate::ast::{AstSpan, Expression, ExpressionRef};
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Parses an `if` block, assuming that `if` token was consumed.
|
||||
///
|
||||
/// Produces an [`Expression::If`] spanning from the `if` keyword to
|
||||
/// the end of the last arm (`else` body if present,
|
||||
/// otherwise the `if` body).
|
||||
#[must_use]
|
||||
pub(crate) fn parse_if_cont(
|
||||
&mut self,
|
||||
if_start_location: TokenLocation,
|
||||
) -> ExpressionRef<'src, 'arena> {
|
||||
let condition = self.parse_expression();
|
||||
let body = self.parse_expression();
|
||||
|
||||
let (else_body, if_end_location) = if let Some(Token::Else) = self.peek_token() {
|
||||
self.advance(); // else
|
||||
let else_body = self.parse_expression();
|
||||
// Capture end before moving `else_body` to build the full `if` span
|
||||
let body_end = else_body.span().to;
|
||||
(Some(else_body), body_end)
|
||||
} else {
|
||||
(None, body.span().to)
|
||||
};
|
||||
|
||||
let span = AstSpan {
|
||||
from: if_start_location,
|
||||
to: if_end_location,
|
||||
};
|
||||
self.arena.alloc(
|
||||
Expression::If {
|
||||
condition,
|
||||
body,
|
||||
else_body,
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
/// Parses a `while` loop, assuming that `while` token was consumed.
|
||||
///
|
||||
/// Produces an [`Expression::While`] spanning from the `while` keyword
|
||||
/// to the end of the body.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_while_cont(
|
||||
&mut self,
|
||||
while_start_location: TokenLocation,
|
||||
) -> ExpressionRef<'src, 'arena> {
|
||||
let condition = self.parse_expression();
|
||||
let body = self.parse_expression();
|
||||
let span = AstSpan {
|
||||
from: while_start_location,
|
||||
to: body.span().to,
|
||||
};
|
||||
self.arena
|
||||
.alloc(Expression::While { condition, body }, span)
|
||||
}
|
||||
|
||||
/// Parses a `do ... until ...` loop after `do`, assuming that `do` token
|
||||
/// was consumed.
|
||||
///
|
||||
/// On a missing `until`, returns an error
|
||||
/// [`ParseErrorKind::DoMissingUntil`].
|
||||
/// On success, produces an [`Expression::DoUntil`] spanning from `do`
|
||||
/// to the end of the condition.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_do_until_cont(
|
||||
&mut self,
|
||||
do_start_location: TokenLocation,
|
||||
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
|
||||
let body = self.parse_expression();
|
||||
|
||||
self.expect(Token::Until, ParseErrorKind::DoMissingUntil)
|
||||
.widen_error_span_from(do_start_location)?;
|
||||
let condition = self.parse_expression();
|
||||
let span = AstSpan {
|
||||
from: do_start_location,
|
||||
to: condition.span().to,
|
||||
};
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc(Expression::DoUntil { condition, body }, span))
|
||||
}
|
||||
|
||||
/// Parses a `foreach` loop, assuming that `foreach` token was consumed.
|
||||
///
|
||||
/// Produces an [`Expression::ForEach`] spanning from `foreach`
|
||||
/// to the end of the body.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_foreach_cont(
|
||||
&mut self,
|
||||
foreach_start_location: TokenLocation,
|
||||
) -> ExpressionRef<'src, 'arena> {
|
||||
let iterator = self.parse_expression();
|
||||
|
||||
let body = self.parse_expression();
|
||||
let span = AstSpan {
|
||||
from: foreach_start_location,
|
||||
to: body.span().to,
|
||||
};
|
||||
self.arena
|
||||
.alloc(Expression::ForEach { iterator, body }, span)
|
||||
}
|
||||
|
||||
/// Parses a `for` loop after `for`, assuming that `for` token was consumed.
|
||||
///
|
||||
/// Grammar: `for (init?; condition?; step?) body`.
|
||||
/// Any of `init`, `condition`, or `step` may be omitted.
|
||||
/// Emits specific `ParseErrorKind` values for missing
|
||||
/// delimiters/separators.
|
||||
/// On success returns an [`Expression::For`] spanning from `for` to
|
||||
/// the end of the body.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_for_cont(
|
||||
&mut self,
|
||||
for_start_location: TokenLocation,
|
||||
) -> crate::parser::ParseResult<'src, 'arena, ExpressionRef<'src, 'arena>> {
|
||||
self.expect(
|
||||
Token::LeftParenthesis,
|
||||
ParseErrorKind::ForMissingOpeningParenthesis,
|
||||
)
|
||||
.widen_error_span_from(for_start_location)?;
|
||||
|
||||
let init = if let Some(Token::Semicolon) = self.peek_token() {
|
||||
self.advance();
|
||||
None
|
||||
} else {
|
||||
let init = self.parse_expression();
|
||||
self.expect(
|
||||
Token::Semicolon,
|
||||
ParseErrorKind::ForMissingInitializationSemicolon,
|
||||
)?;
|
||||
Some(init)
|
||||
};
|
||||
|
||||
let condition = if let Some(Token::Semicolon) = self.peek_token() {
|
||||
self.advance();
|
||||
None
|
||||
} else {
|
||||
let condition = self.parse_expression();
|
||||
self.expect(
|
||||
Token::Semicolon,
|
||||
ParseErrorKind::ForMissingConditionSemicolon,
|
||||
)?;
|
||||
Some(condition)
|
||||
};
|
||||
|
||||
let step = if let Some(Token::RightParenthesis) = self.peek_token() {
|
||||
self.advance();
|
||||
None
|
||||
} else {
|
||||
let step = self.parse_expression();
|
||||
self.expect(
|
||||
Token::RightParenthesis,
|
||||
ParseErrorKind::ForMissingClosingParenthesis,
|
||||
)
|
||||
.widen_error_span_from(for_start_location)
|
||||
.sync_error_until(self, crate::parser::SyncLevel::CloseParenthesis)?;
|
||||
Some(step)
|
||||
};
|
||||
|
||||
let body = self.parse_expression();
|
||||
let span = AstSpan {
|
||||
from: for_start_location,
|
||||
to: body.span().to,
|
||||
};
|
||||
Ok(self.arena.alloc(
|
||||
Expression::For {
|
||||
init,
|
||||
condition,
|
||||
step,
|
||||
body,
|
||||
},
|
||||
span,
|
||||
))
|
||||
}
|
||||
}
|
99
rottlib/src/parser/grammar/flow.rs
Normal file
99
rottlib/src/parser/grammar/flow.rs
Normal file
@ -0,0 +1,99 @@
|
||||
use crate::ast::{AstSpan, Expression};
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::ParseErrorKind;
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Parse the continuation of a `return` after its keyword was consumed.
|
||||
///
|
||||
/// Doesn't consume the terminating `;`.
|
||||
/// If the next token is not `;`, parses an expression as the optional
|
||||
/// value. Produces an [`Expression::Return`] whose span runs from
|
||||
/// the `return` keyword to the end of the value if present, otherwise to
|
||||
/// the `return` keyword.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_return_cont(
|
||||
&mut self,
|
||||
return_start_location: TokenLocation,
|
||||
) -> crate::ast::ExpressionRef<'src, 'arena> {
|
||||
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
|
||||
let value = self.parse_expression();
|
||||
|
||||
let span = AstSpan {
|
||||
from: return_start_location,
|
||||
to: value.span().to,
|
||||
};
|
||||
(Some(value), span)
|
||||
} else {
|
||||
(
|
||||
None,
|
||||
AstSpan {
|
||||
from: return_start_location,
|
||||
to: return_start_location,
|
||||
},
|
||||
)
|
||||
};
|
||||
self.arena.alloc(Expression::Return(value), span)
|
||||
}
|
||||
|
||||
/// Parse the continuation of a `break` after its keyword was consumed.
|
||||
///
|
||||
/// Doesn't consume the terminating `;`.
|
||||
/// If the next token is not `;`, parses an optional value expression.
|
||||
/// Produces an [`Expression::Break`] spanning from `break` to the end
|
||||
/// of the value if present, otherwise to the `break` keyword.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_break_cont(
|
||||
&mut self,
|
||||
break_start_location: TokenLocation,
|
||||
) -> crate::ast::ExpressionRef<'src, 'arena> {
|
||||
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
|
||||
let value = self.parse_expression();
|
||||
|
||||
let span = AstSpan {
|
||||
from: break_start_location,
|
||||
to: value.span().to,
|
||||
};
|
||||
(Some(value), span)
|
||||
} else {
|
||||
(
|
||||
None,
|
||||
AstSpan {
|
||||
from: break_start_location,
|
||||
to: break_start_location,
|
||||
},
|
||||
)
|
||||
};
|
||||
self.arena.alloc(Expression::Break(value), span)
|
||||
}
|
||||
|
||||
/// Parses a `goto` expression after `goto`, assuming that the `goto` token
|
||||
/// was consumed.
|
||||
///
|
||||
/// Requires the next token to be an identifier label.
|
||||
/// On missing token, returns [`ParseErrorKind::UnexpectedEndOfFile`].
|
||||
/// On a non-identifier next token,
|
||||
/// returns [`ParseErrorKind::GotoMissingLabel`].
|
||||
/// On success, produces an [`Expression::Goto`] spanning from `goto`
|
||||
/// to the label token.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_goto_cont(
|
||||
&mut self,
|
||||
goto_start_location: TokenLocation,
|
||||
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
|
||||
let Some((token, text, token_location)) = self.peek_token_lexeme_and_location() else {
|
||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||
};
|
||||
if token == Token::Identifier {
|
||||
let span = AstSpan {
|
||||
from: goto_start_location,
|
||||
to: token_location,
|
||||
};
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc(Expression::Goto(self.arena.string(text)), span))
|
||||
} else {
|
||||
Err(self.make_error_here(ParseErrorKind::GotoMissingLabel))
|
||||
}
|
||||
}
|
||||
}
|
7
rottlib/src/parser/grammar/mod.rs
Normal file
7
rottlib/src/parser/grammar/mod.rs
Normal file
@ -0,0 +1,7 @@
|
||||
mod block;
|
||||
mod control;
|
||||
mod flow;
|
||||
mod pratt;
|
||||
mod precedence;
|
||||
mod statements;
|
||||
mod switch;
|
406
rottlib/src/parser/grammar/pratt.rs
Normal file
406
rottlib/src/parser/grammar/pratt.rs
Normal file
@ -0,0 +1,406 @@
|
||||
//! Expression parsing for the language front-end.
|
||||
//!
|
||||
//! This module implements a Pratt-style parser for the language's expression
|
||||
//! grammar, supporting:
|
||||
//!
|
||||
//! * Primary expressions (literals, identifiers, parenthesized expressions)
|
||||
//! * Prefix operators
|
||||
//! * Postfix operators
|
||||
//! * Infix operators with precedence and associativity
|
||||
//!
|
||||
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
|
||||
//! operators bind. Infix parsing uses the pair of binding powers returned by
|
||||
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
|
||||
//! The parser infrastructure supports both left- and right-associative
|
||||
//! operators, but Fermented UnrealScript currently defines only
|
||||
//! right-associative ones.
|
||||
//!
|
||||
//! ## See also
|
||||
//!
|
||||
//! - [`crate::parser::Parser::parse_expression`] - main entry point
|
||||
//! - [`PrecedenceRank`] - operator binding strengths
|
||||
//! - [`super::precedence`] - operator precedence definitions
|
||||
|
||||
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::{
|
||||
ParseErrorKind, ParseExpressionResult, ParseResult, ResultRecoveryExt, SyncLevel,
|
||||
};
|
||||
|
||||
pub(crate) use super::precedence::PrecedenceRank;
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Parses an expression.
|
||||
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
|
||||
self.parse_expression_with_precedence(PrecedenceRank::LOOSEST)
|
||||
}
|
||||
|
||||
/// Parses an expression with operators of at least `min_precedence_rank`
|
||||
/// (as tight or tighter).
|
||||
fn parse_expression_with_precedence(
|
||||
&mut self,
|
||||
min_precedence_rank: PrecedenceRank,
|
||||
) -> ExpressionRef<'src, 'arena> {
|
||||
// Intentional order: (1) prefix/primary, (2) postfix (tighter than
|
||||
// any infix), (3) infix. We don't run a second postfix pass;
|
||||
// `(a+b)!` works because the parenthesized sub-expression had its own
|
||||
// postfix pass before returning.
|
||||
let mut left_hand_side = self
|
||||
.parse_prefix_or_primary()
|
||||
.sync_error_until(self, SyncLevel::Expression)
|
||||
.unwrap_or_fallback(self);
|
||||
// Postfix operators are tighter than any infix ones
|
||||
left_hand_side = self.parse_postfix_into(left_hand_side);
|
||||
left_hand_side = self.parse_infix_into(left_hand_side, min_precedence_rank);
|
||||
left_hand_side
|
||||
}
|
||||
|
||||
/// Parses a prefix or primary expression (Pratt parser's "nud" or
|
||||
/// null denotation).
|
||||
///
|
||||
/// Errors with [`ParseErrorKind::UnexpectedEndOfFile`] if the stream ends
|
||||
/// before a valid prefix/primary.
|
||||
fn parse_prefix_or_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
|
||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||
};
|
||||
|
||||
if let Ok(operator) = crate::ast::PrefixOperator::try_from(token) {
|
||||
self.advance();
|
||||
let right_hand_side = self.parse_expression_with_precedence(PrecedenceRank::TIGHTEST);
|
||||
Ok(Expression::new_prefix(
|
||||
self.arena,
|
||||
token_location,
|
||||
operator,
|
||||
right_hand_side,
|
||||
))
|
||||
} else {
|
||||
self.parse_primary()
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a primary expression: literals, identifiers, or a parenthesized
|
||||
/// sub-expression.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// [`ParseErrorKind::ExpressionUnexpectedToken`] if the next token
|
||||
/// cannot start a primary; [`ParseErrorKind::UnexpectedEndOfFile`]
|
||||
/// at end of input.
|
||||
fn parse_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
|
||||
// For diagnostics, we only advance *after* fully parsing the current
|
||||
// literal/token.
|
||||
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
|
||||
else {
|
||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||
};
|
||||
match token {
|
||||
Token::IntegerLiteral => {
|
||||
let value = self.parse_integer_literal(token_text)?;
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc_at(Expression::Integer(value), token_location))
|
||||
}
|
||||
Token::FloatLiteral => {
|
||||
let value = self.parse_float_literal(token_text)?;
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc_at(Expression::Float(value), token_location))
|
||||
}
|
||||
Token::StringLiteral => {
|
||||
let value = unescape_string_literal(self.arena, token_text);
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc_at(Expression::String(value), token_location))
|
||||
}
|
||||
Token::True => {
|
||||
self.advance();
|
||||
Ok(self.arena.alloc_at(Expression::Bool(true), token_location))
|
||||
}
|
||||
Token::False => {
|
||||
self.advance();
|
||||
Ok(self.arena.alloc_at(Expression::Bool(false), token_location))
|
||||
}
|
||||
Token::None => {
|
||||
self.advance();
|
||||
Ok(self.arena.alloc_at(Expression::None, token_location))
|
||||
}
|
||||
Token::Identifier => {
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc_at(Expression::Identifier(token_text), token_location))
|
||||
}
|
||||
Token::LeftParenthesis => {
|
||||
self.advance();
|
||||
self.parse_parenthesized_expression_cont(token_location)
|
||||
}
|
||||
Token::If => {
|
||||
self.advance();
|
||||
Ok(self.parse_if_cont(token_location))
|
||||
}
|
||||
Token::While => {
|
||||
self.advance();
|
||||
Ok(self.parse_while_cont(token_location))
|
||||
}
|
||||
Token::Do => {
|
||||
self.advance();
|
||||
self.parse_do_until_cont(token_location)
|
||||
}
|
||||
Token::ForEach => {
|
||||
self.advance();
|
||||
Ok(self.parse_foreach_cont(token_location))
|
||||
}
|
||||
Token::For => {
|
||||
self.advance();
|
||||
self.parse_for_cont(token_location)
|
||||
}
|
||||
Token::Brace(crate::lexer::BraceKind::Normal) => {
|
||||
self.advance();
|
||||
Ok(self.parse_block_cont(token_location))
|
||||
}
|
||||
Token::Return => {
|
||||
self.advance();
|
||||
Ok(self.parse_return_cont(token_location))
|
||||
}
|
||||
Token::Break => {
|
||||
self.advance();
|
||||
Ok(self.parse_break_cont(token_location))
|
||||
}
|
||||
Token::Continue => {
|
||||
self.advance();
|
||||
Ok(self.arena.alloc_at(Expression::Continue, token_location))
|
||||
}
|
||||
Token::Goto => {
|
||||
self.advance();
|
||||
self.parse_goto_cont(token_location)
|
||||
}
|
||||
Token::Switch => {
|
||||
self.advance();
|
||||
self.parse_switch_cont(token_location)
|
||||
}
|
||||
_ => {
|
||||
// Unexpected token in expression.
|
||||
Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses an expression in parentheses.
|
||||
///
|
||||
/// Assumes the `(` was already consumed; its location is
|
||||
/// `left_parenthesis_location`.
|
||||
/// On success, allocates a [`Expression::Parentheses`] node with a span
|
||||
/// covering from `(` to `)`.
|
||||
///
|
||||
/// Errors with [`ParseErrorKind::ExpressionMissingClosingParenthesis`] if
|
||||
/// a closing `)` is missing; the diagnostic is associated with
|
||||
/// the opening `(` via `left_parenthesis_location`.
|
||||
fn parse_parenthesized_expression_cont(
|
||||
&mut self,
|
||||
left_parenthesis_location: TokenLocation,
|
||||
) -> ParseExpressionResult<'src, 'arena> {
|
||||
let inner_expression = self.parse_expression();
|
||||
let right_parenthesis_location = self
|
||||
.expect(
|
||||
Token::RightParenthesis,
|
||||
ParseErrorKind::ExpressionMissingClosingParenthesis,
|
||||
)
|
||||
.widen_error_span_from(left_parenthesis_location)
|
||||
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
|
||||
Ok(self.arena.alloc_between(
|
||||
Expression::Parentheses(inner_expression),
|
||||
left_parenthesis_location,
|
||||
right_parenthesis_location,
|
||||
))
|
||||
}
|
||||
|
||||
/// Parses all postfix operators it can, creating a tree with
|
||||
/// `left_hand_side` as a child.
|
||||
fn parse_postfix_into(
|
||||
&mut self,
|
||||
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
||||
) -> ExpressionRef<'src, 'arena> {
|
||||
// Single peek that yields `(postfix_op, location)` so the postfix loop
|
||||
// can advance once per operator without extra matching/unwraps.
|
||||
while let Some((operator, operator_location)) = self.peek_postfix_with_location() {
|
||||
self.advance();
|
||||
left_hand_side =
|
||||
Expression::new_postfix(self.arena, left_hand_side, operator, operator_location);
|
||||
}
|
||||
left_hand_side
|
||||
}
|
||||
|
||||
/// Parses infix operators binding at least as tight as
|
||||
/// `min_precedence_rank`.
|
||||
///
|
||||
/// Associativity is encoded by
|
||||
/// [`super::precedence::infix_precedence_ranks`]: the right-hand
|
||||
/// side is parsed with `right_precedence_rank`, so `a - b - c` vs
|
||||
/// `a ^ b ^ c` associate correctly based on the pair
|
||||
/// `(left_rank, right_rank)`.
|
||||
///
|
||||
/// Stops when the next operator is looser than `min_precedence_rank`.
|
||||
fn parse_infix_into(
|
||||
&mut self,
|
||||
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
||||
min_precedence_rank: PrecedenceRank,
|
||||
) -> ExpressionRef<'src, 'arena> {
|
||||
while let Some((operator, right_precedence_rank)) =
|
||||
self.peek_infix_at_least(min_precedence_rank)
|
||||
{
|
||||
self.advance();
|
||||
let right_hand_side = self.parse_expression_with_precedence(right_precedence_rank);
|
||||
left_hand_side =
|
||||
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
|
||||
}
|
||||
left_hand_side
|
||||
}
|
||||
|
||||
/// Parses an integer literal as [`i128`].
|
||||
///
|
||||
/// Chosen to cover FerUS's integer range so constant folding
|
||||
/// remains precise.
|
||||
///
|
||||
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
|
||||
/// not a valid integer.
|
||||
fn parse_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
|
||||
text.parse::<i128>()
|
||||
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||
}
|
||||
|
||||
/// Parses a float literal as [`f64`].
|
||||
///
|
||||
/// Chosen to cover FerUS's float range so constant folding remains
|
||||
/// precise.
|
||||
///
|
||||
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
|
||||
/// not a valid float.
|
||||
fn parse_float_literal(&mut self, text: &str) -> ParseResult<f64> {
|
||||
if let Ok(parsed_value) = text.parse::<f64>() {
|
||||
Ok(parsed_value)
|
||||
} else {
|
||||
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next postfix operator and its location if present.
|
||||
///
|
||||
/// Helper to avoid peeking and mapping twice; used to drive the postfix
|
||||
/// loop without unwraps.
|
||||
fn peek_postfix_with_location(
|
||||
&mut self,
|
||||
) -> Option<(crate::ast::PostfixOperator, TokenLocation)> {
|
||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
||||
return None;
|
||||
};
|
||||
let Ok(operator) = crate::ast::PostfixOperator::try_from(token) else {
|
||||
return None;
|
||||
};
|
||||
Some((operator, token_location))
|
||||
}
|
||||
|
||||
/// If the next token is an infix operator with left binding power at least
|
||||
/// `min_precedence_rank`, returns its operator and precedence rank.
|
||||
///
|
||||
/// Otherwise return [`None`].
|
||||
fn peek_infix_at_least(
|
||||
&mut self,
|
||||
min_precedence_rank: PrecedenceRank,
|
||||
) -> Option<(crate::ast::InfixOperator, PrecedenceRank)> {
|
||||
let (left_precedence_rank, operator, right_precedence_rank) = self
|
||||
.peek_token()
|
||||
.and_then(super::precedence::infix_precedence_ranks)?;
|
||||
if left_precedence_rank.is_looser_than(min_precedence_rank) {
|
||||
return None;
|
||||
}
|
||||
Some((operator, right_precedence_rank))
|
||||
}
|
||||
|
||||
/// Parses one item inside a `{ ... }` block.
|
||||
///
|
||||
/// The item can be a statement (e.g. a variable declaration) or an
|
||||
/// expression. If the item is an expression without a following
|
||||
/// semicolon, it is returned as the block's current tail expression
|
||||
/// - the value considered to be the block's result. In well-formed
|
||||
/// code such a tail expression appears only at the very end of the block.
|
||||
///
|
||||
/// This method never consumes the closing `}` and is only meant to be
|
||||
/// called while parsing inside a block.
|
||||
pub(crate) fn parse_block_item(
|
||||
&mut self,
|
||||
statements: &mut crate::arena::ArenaVec<'arena, crate::ast::StatementRef<'src, 'arena>>,
|
||||
) -> Option<crate::ast::ExpressionRef<'src, 'arena>> {
|
||||
if let Some(mut next_statement) = self.parse_statement() {
|
||||
if next_statement.needs_semicolon() {
|
||||
// For statements we immediately know if lack of
|
||||
// semicolon is an issue
|
||||
if let Some(Token::Semicolon) = self.peek_token() {
|
||||
next_statement.span_mut().to = self.peek_location();
|
||||
self.advance(); // ';'
|
||||
} else {
|
||||
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterStatement);
|
||||
}
|
||||
}
|
||||
statements.push(next_statement);
|
||||
} else {
|
||||
let mut next_expression = self.parse_expression();
|
||||
if let Expression::Error = *next_expression {
|
||||
self.recover_until(SyncLevel::Statement);
|
||||
next_expression.span_mut().to = self.peek_location();
|
||||
}
|
||||
if let Some((Token::Semicolon, semicolon_location)) = self.peek_token_and_location() {
|
||||
self.advance(); // ;
|
||||
let span = crate::ast::AstSpan {
|
||||
from: next_expression.span().from,
|
||||
to: semicolon_location,
|
||||
};
|
||||
let expression_statement_node = self
|
||||
.arena
|
||||
.alloc(crate::ast::Statement::Expression(next_expression), span);
|
||||
statements.push(expression_statement_node);
|
||||
} else {
|
||||
return Some(next_expression);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Unescapes a tokenized string literal into an arena string.
|
||||
///
|
||||
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
|
||||
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
|
||||
///
|
||||
/// Note: this function assumes `raw` is the token text without surrounding
|
||||
/// quotes.
|
||||
fn unescape_string_literal<'arena>(
|
||||
arena: &'arena crate::arena::Arena,
|
||||
raw: &str,
|
||||
) -> crate::arena::ArenaString<'arena> {
|
||||
let mut buffer = String::with_capacity(raw.len());
|
||||
let mut characters = raw.chars();
|
||||
while let Some(next_character) = characters.next() {
|
||||
if next_character == '\\' {
|
||||
// The lexer never produces a trailing backslash in a string token,
|
||||
// so there's always a following character to inspect.
|
||||
if let Some(escaped_character) = characters.next() {
|
||||
match escaped_character {
|
||||
'n' => buffer.push('\n'),
|
||||
't' => buffer.push('\t'),
|
||||
'"' => buffer.push('"'),
|
||||
'\\' => buffer.push('\\'),
|
||||
// Simply leaving escaped character as-is is an expected
|
||||
// behavior by UnrealScript
|
||||
other => buffer.push(other),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer.push(next_character);
|
||||
}
|
||||
}
|
||||
arena.string(&buffer)
|
||||
}
|
185
rottlib/src/parser/grammar/precedence.rs
Normal file
185
rottlib/src/parser/grammar/precedence.rs
Normal file
@ -0,0 +1,185 @@
|
||||
//! Precedence tables for Fermented UnrealScript operators.
|
||||
//!
|
||||
//! These values don't follow the usual *binding power* convention for
|
||||
//! a Pratt parser, where tighter binding corresponds to a larger number.
|
||||
//! Here, the smaller the number, the tighter the binding power.
|
||||
//! For this reason, we use the term *precedence rank* instead.
|
||||
//!
|
||||
//! ## Operators sorted by precedence (lowest number = tighter binding)
|
||||
//!
|
||||
//! ### Infix operators
|
||||
//!
|
||||
//! All infix operators in UnrealScript are
|
||||
//! [left-associative](https://wiki.beyondunreal.com/Operators).
|
||||
//!
|
||||
//! 12: `**`
|
||||
//! 16: `*`, `/`, `Cross`, `Dot`
|
||||
//! 18: `%`
|
||||
//! 20: `+`, `-`
|
||||
//! 22: `<<`, `>>`, `>>>`
|
||||
//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom`
|
||||
//! 26: `!=`
|
||||
//! 28: `&`, `^`, `|`
|
||||
//! 30: `&&`, `^^`
|
||||
//! 32: `||`
|
||||
//! 34: `*=`, `/=`, `+=`, `-=`
|
||||
//! 40: `$`, `*`, `@`
|
||||
//! 44: `$=`, `*=`, `@=`
|
||||
//! 45: `-=`
|
||||
//!
|
||||
//! Some operator, such as `*`, appear twice with different precedence
|
||||
//! ranks because they were defined with different values for different types
|
||||
//! in separate script source files (as in the Killing Floor sources).
|
||||
//! However, UnrealScript uses only the first definition it encounters in
|
||||
//! `Object.uc`, which corresponds to the lower value.
|
||||
//!
|
||||
//! ### Prefix operators
|
||||
//!
|
||||
//! `!`, `~`, `-`, `++`, `--`.
|
||||
//!
|
||||
//! ### Postfix operators
|
||||
//!
|
||||
//! `++`, `--`.
|
||||
|
||||
use crate::ast::{InfixOperator, PostfixOperator, PrefixOperator};
|
||||
use crate::lexer::Token;
|
||||
|
||||
/// Compact precedence rank used by the Pratt Parser.
|
||||
///
|
||||
/// A smaller number means tighter binding, and a larger number means looser
|
||||
/// binding. This inverted scale matches how UnrealScript tables were recorded.
|
||||
#[must_use]
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct PrecedenceRank(u8);
|
||||
|
||||
impl PrecedenceRank {
|
||||
/// The loosest possible precedence rank.
|
||||
///
|
||||
/// In this inverted scale (smaller number = tighter binding),
|
||||
/// this is represented by the maximum [`u8`] value.
|
||||
pub const LOOSEST: Self = PrecedenceRank(u8::MAX);
|
||||
|
||||
/// The tightest possible precedence rank.
|
||||
///
|
||||
/// In this inverted scale (smaller number = tighter binding),
|
||||
/// this is represented by zero.
|
||||
pub const TIGHTEST: PrecedenceRank = PrecedenceRank(0);
|
||||
|
||||
/// Returns `true` if `other` has a looser binding than `self`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use crate::parser::expressions::PrecedenceRank;
|
||||
/// let a = PrecedenceRank(40);
|
||||
/// let b = PrecedenceRank(34);
|
||||
/// assert!(a.is_looser_than(b)); // 40 is looser than 34
|
||||
///
|
||||
/// let c = PrecedenceRank(20);
|
||||
/// let d = PrecedenceRank(24);
|
||||
/// assert!(!c.is_looser_than(d)); // 20 is tighter than 24
|
||||
/// ```
|
||||
pub fn is_looser_than(self, other: Self) -> bool {
|
||||
self.0 > other.0
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Token> for PrefixOperator {
|
||||
type Error = ();
|
||||
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||
use PrefixOperator::*;
|
||||
Ok(match token {
|
||||
Token::Not => Not,
|
||||
Token::Minus => Minus,
|
||||
Token::BitwiseNot => BitwiseNot,
|
||||
Token::Increment => Increment,
|
||||
Token::Decrement => Decrement,
|
||||
_ => return Err(()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Token> for PostfixOperator {
|
||||
type Error = ();
|
||||
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||
use PostfixOperator::*;
|
||||
Ok(match token {
|
||||
Token::Increment => Increment,
|
||||
Token::Decrement => Decrement,
|
||||
_ => return Err(()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Maps a token to its infix operator along with its left and right binding
|
||||
/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`.
|
||||
///
|
||||
/// Returns [`None`] if and only if `token` is not an infix operator.
|
||||
pub(crate) fn infix_precedence_ranks(
|
||||
token: Token,
|
||||
) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> {
|
||||
use crate::ast::InfixOperator::*;
|
||||
let (left_precedence_rank, operator) = match token {
|
||||
// 12: `**`
|
||||
Token::Exponentiation => (12, Exponentiation),
|
||||
// 16: `*`, `/`, `Cross`, `Dot` (left-assoc)
|
||||
Token::Multiply => (16, Multiply),
|
||||
Token::Divide => (16, Divide),
|
||||
Token::Cross => (16, Cross),
|
||||
Token::Dot => (16, Dot),
|
||||
// 18: `%`
|
||||
Token::Modulo => (18, Modulo),
|
||||
// 20: `+`, `-`
|
||||
Token::Plus => (20, Plus),
|
||||
Token::Minus => (20, Minus),
|
||||
// 22: `<<`, `>>`, `>>>`
|
||||
Token::LeftShift => (22, LeftShift),
|
||||
Token::RightShift => (22, RightShift),
|
||||
Token::LogicalRightShift => (22, LogicalRightShift),
|
||||
// 24: comparison operators
|
||||
Token::Less => (24, Less),
|
||||
Token::LessEqual => (24, LessEqual),
|
||||
Token::Greater => (24, Greater),
|
||||
Token::GreaterEqual => (24, GreaterEqual),
|
||||
Token::Equal => (24, Equal),
|
||||
Token::ApproximatelyEqual => (24, ApproximatelyEqual),
|
||||
Token::ClockwiseFrom => (24, ClockwiseFrom),
|
||||
// 26: `!=`
|
||||
Token::NotEqual => (26, NotEqual),
|
||||
// 28: bit-wise `&`, `^`, `|`
|
||||
Token::BitwiseAnd => (28, BitwiseAnd),
|
||||
Token::BitwiseXor => (28, BitwiseXor),
|
||||
Token::BitwiseOr => (28, BitwiseOr),
|
||||
// 30: logical `&&`, `^^`
|
||||
Token::And => (30, And),
|
||||
Token::Xor => (30, Xor),
|
||||
// 32: logical `||`
|
||||
Token::Or => (32, Or),
|
||||
// 34: `*=`, `/=`, `+=`, `-=`
|
||||
Token::MultiplyAssign => (34, MultiplyAssign),
|
||||
Token::DivideAssign => (34, DivideAssign),
|
||||
Token::PlusAssign => (34, PlusAssign),
|
||||
Token::MinusAssign => (34, MinusAssign),
|
||||
// Simple '=' treated with same precedence
|
||||
Token::Assign => (34, Assign),
|
||||
Token::ModuloAssign => (34, ModuloAssign),
|
||||
// 40: `$`, `@`
|
||||
Token::Concat => (40, Concat),
|
||||
Token::ConcatSpace => (40, ConcatSpace),
|
||||
// 44: `$=`, `@=`
|
||||
Token::ConcatAssign => (44, ConcatAssign),
|
||||
Token::ConcatSpaceAssign => (44, ConcatSpaceAssign),
|
||||
_ => return None,
|
||||
};
|
||||
// All operators are left-associative, so `right_precedence_rank` is set to
|
||||
// `left_binding_rank - 1` (with our "smaller is tighter" scale, this
|
||||
// enforces left associativity in Pratt parsing).
|
||||
//
|
||||
// Since all precedences are even, subtracting one won't actually cross
|
||||
// any boundary between operator groups.
|
||||
Some((
|
||||
PrecedenceRank(left_precedence_rank),
|
||||
operator,
|
||||
PrecedenceRank(left_precedence_rank - 1),
|
||||
))
|
||||
}
|
185
rottlib/src/parser/grammar/statements.rs
Normal file
185
rottlib/src/parser/grammar/statements.rs
Normal file
@ -0,0 +1,185 @@
|
||||
//! Statement parsing for the language front-end.
|
||||
//!
|
||||
//! Implements a simple recursive-descent parser for
|
||||
//! *Fermented UnrealScript statements*.
|
||||
|
||||
use crate::ast::{AstSpan, Statement, StatementRef};
|
||||
use crate::lexer::Token;
|
||||
use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel};
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Parses a single statement.
|
||||
///
|
||||
/// Does not consume a trailing `;` except for [`Statement::Empty`].
|
||||
/// The caller handles semicolons. Returns [`Some`] if a statement is
|
||||
/// recognized; otherwise [`None`].
|
||||
#[must_use]
|
||||
pub(crate) fn parse_statement(&mut self) -> Option<StatementRef<'src, 'arena>> {
|
||||
let Some((token, lexeme, location)) = self.peek_token_lexeme_and_location() else {
|
||||
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
|
||||
return None;
|
||||
};
|
||||
match token {
|
||||
// Empty statement
|
||||
Token::Semicolon => {
|
||||
self.advance(); // `;`
|
||||
Some(self.arena.alloc(Statement::Empty, AstSpan::new(location)))
|
||||
}
|
||||
// UnrealScript's standard `local` variable declaration
|
||||
Token::Local => {
|
||||
self.advance(); // `local`
|
||||
Some(
|
||||
self.parse_local_variable_declaration_cont()
|
||||
.widen_error_span_from(location)
|
||||
.sync_error_until(self, SyncLevel::Statement)
|
||||
.unwrap_or_fallback(self),
|
||||
)
|
||||
}
|
||||
// Label definition
|
||||
Token::Identifier if matches!(self.peek_token_at(1), Some(Token::Colon)) => {
|
||||
self.advance(); // `Token::Identifier`
|
||||
self.advance(); // `:`
|
||||
Some(self.arena.alloc(
|
||||
Statement::Label(self.arena.string(lexeme)),
|
||||
AstSpan::range(location, self.last_visited_location()),
|
||||
))
|
||||
}
|
||||
// C-like variable declaration
|
||||
token
|
||||
if token.is_valid_type_name_token()
|
||||
&& Some(Token::Identifier) == self.peek_token_at(1) =>
|
||||
{
|
||||
self.advance(); // `TYPE_NAME`
|
||||
// Next token is guaranteed to exist by the arm condition
|
||||
Some(self.parse_variable_declaration_cont(lexeme))
|
||||
}
|
||||
// Not a statement
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a local variable declaration after `local` has been consumed.
|
||||
///
|
||||
/// Requires the next token to be a type name. Initializers are not allowed.
|
||||
/// Reports and recovers from errors; the identifier list may be empty if
|
||||
/// recovery fails.
|
||||
fn parse_local_variable_declaration_cont(
|
||||
&mut self,
|
||||
) -> crate::parser::ParseResult<'src, 'arena, StatementRef<'src, 'arena>> {
|
||||
let Some((type_token, type_name)) = self.peek_token_and_lexeme() else {
|
||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||
};
|
||||
if !type_token.is_valid_type_name_token() {
|
||||
return Err(self.make_error_here(ParseErrorKind::LocalInvalidTypeName));
|
||||
}
|
||||
let declaration_start_location = self.last_visited_location();
|
||||
self.advance(); // `TYPE_NAME`
|
||||
|
||||
let type_name = self.arena.string(type_name);
|
||||
let identifiers = self.parse_local_identifier_list();
|
||||
if identifiers.is_empty() {
|
||||
self.make_error_here(ParseErrorKind::LocalMissingIdentifier)
|
||||
.widen_error_span_from(declaration_start_location)
|
||||
.report_error(self);
|
||||
}
|
||||
Ok(self.arena.alloc(
|
||||
Statement::LocalVariableDeclaration {
|
||||
type_name,
|
||||
identifiers,
|
||||
},
|
||||
AstSpan::range(declaration_start_location, self.last_visited_location()),
|
||||
))
|
||||
}
|
||||
|
||||
/// Parses a comma-separated list of identifiers for a local declaration.
|
||||
///
|
||||
/// Best-effort recovery from errors. Returns an empty list if no valid
|
||||
/// identifiers are found.
|
||||
fn parse_local_identifier_list(
|
||||
&mut self,
|
||||
) -> crate::arena::ArenaVec<'arena, crate::arena::ArenaString<'arena>> {
|
||||
let mut identifiers = self.arena.vec();
|
||||
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
|
||||
if token == Token::Identifier {
|
||||
identifiers.push(self.arena.string(next_variable_name));
|
||||
self.advance(); // `Token::Identifier`
|
||||
} else {
|
||||
self.report_error_here(ParseErrorKind::LocalBadVariableIdentifier);
|
||||
// Try to recover to the next variable name
|
||||
self.recover_until(SyncLevel::ListSeparator);
|
||||
}
|
||||
|
||||
// Disallow initializers in `local`.
|
||||
if let Some(Token::Assign) = self.peek_token() {
|
||||
self.report_error_here(ParseErrorKind::LocalInitializerNotAllowed);
|
||||
self.recover_until(SyncLevel::ListSeparator);
|
||||
}
|
||||
|
||||
// Can the list continue?
|
||||
// Loop cannot stall: each iteration consumes a token or breaks
|
||||
if !self.eat(Token::Comma) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// End-of-file branch
|
||||
identifiers
|
||||
}
|
||||
|
||||
/// Parses a non-local variable declaration after the type name token
|
||||
/// has been consumed.
|
||||
///
|
||||
/// The caller must guarantee that at least one declarator follows.
|
||||
/// Optional initializers are allowed.
|
||||
fn parse_variable_declaration_cont(
|
||||
&mut self,
|
||||
type_name: &'src str,
|
||||
) -> StatementRef<'src, 'arena> {
|
||||
let declaration_start_location = self.last_visited_location();
|
||||
let type_name = self.arena.string(type_name);
|
||||
let declarations = self.parse_variable_declaration_list();
|
||||
// An identifier required by method's condition
|
||||
debug_assert!(!declarations.is_empty());
|
||||
self.arena.alloc(
|
||||
Statement::VariableDeclaration {
|
||||
type_name,
|
||||
declarations,
|
||||
},
|
||||
AstSpan::range(declaration_start_location, self.last_visited_location()),
|
||||
)
|
||||
}
|
||||
|
||||
/// Parses a comma-separated list of declarators with optional `=`
|
||||
/// initializers.
|
||||
///
|
||||
/// Best-effort recovery on errors.
|
||||
/// The caller should invoke this when the next token starts a declarator.
|
||||
fn parse_variable_declaration_list(
|
||||
&mut self,
|
||||
) -> crate::arena::ArenaVec<'arena, crate::ast::VariableDeclarator<'src, 'arena>> {
|
||||
let mut variables = self.arena.vec();
|
||||
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
|
||||
if token == Token::Identifier {
|
||||
self.advance(); // `Token::Identifier`
|
||||
let name = self.arena.string(next_variable_name);
|
||||
let initializer = if self.eat(Token::Assign) {
|
||||
Some(self.parse_expression())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
variables.push(crate::ast::VariableDeclarator { name, initializer });
|
||||
} else {
|
||||
self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier);
|
||||
// Try to recover to the next variable name
|
||||
self.recover_until(SyncLevel::ListSeparator);
|
||||
}
|
||||
|
||||
// Can the list continue?
|
||||
// Loop cannot stall: each iteration consumes a token or breaks
|
||||
if !self.eat(Token::Comma) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// End-of-file branch
|
||||
variables
|
||||
}
|
||||
}
|
227
rottlib/src/parser/grammar/switch.rs
Normal file
227
rottlib/src/parser/grammar/switch.rs
Normal file
@ -0,0 +1,227 @@
|
||||
use crate::arena::ArenaVec;
|
||||
use crate::ast::{AstSpan, ExpressionRef, StatementRef};
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Parses a `switch` expression after the `switch` keyword was consumed.
|
||||
///
|
||||
/// Arm bodies accept statements and expressions. A last, expression without
|
||||
/// `;` in the last arm becomes the switch's tail value if none was
|
||||
/// captured yet.
|
||||
/// Only one `default` case arm is allowed.
|
||||
/// Returns a best-effort switch node on premature EOF.
|
||||
#[must_use]
|
||||
pub(crate) fn parse_switch_cont(
|
||||
&mut self,
|
||||
switch_start_location: TokenLocation,
|
||||
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
|
||||
let selector = self.parse_expression();
|
||||
self.expect(
|
||||
Token::Brace(crate::lexer::BraceKind::Normal),
|
||||
ParseErrorKind::SwitchMissingBody,
|
||||
)
|
||||
.report_error(self);
|
||||
let (mut cases, mut default_arm, mut tail) = (self.arena.vec(), None, None);
|
||||
let mut span = AstSpan::new(switch_start_location);
|
||||
loop {
|
||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
||||
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
|
||||
span.extend_to(self.peek_location());
|
||||
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
|
||||
};
|
||||
match token {
|
||||
Token::RightBrace => {
|
||||
self.advance(); // '}'
|
||||
span.extend_to(token_location);
|
||||
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
|
||||
}
|
||||
Token::Case => {
|
||||
if default_arm.is_some() {
|
||||
self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault);
|
||||
}
|
||||
let case_node = self.parse_switch_case_group(token_location, &mut tail);
|
||||
cases.push(case_node);
|
||||
}
|
||||
Token::Default => {
|
||||
if default_arm.is_some() {
|
||||
self.report_error_here(ParseErrorKind::SwitchDuplicateDefault);
|
||||
}
|
||||
// We still parse a duplicate default to surface all errors.
|
||||
// Bodies are effectively fused for error reporting;
|
||||
// compilation stops anyway, so this trades AST correctness
|
||||
// for diagnostics.
|
||||
self.parse_switch_default_arm(
|
||||
token_location,
|
||||
default_arm.get_or_insert_with(|| self.arena.vec()),
|
||||
&mut tail,
|
||||
);
|
||||
}
|
||||
// This can only be triggered before parsing any `case` or
|
||||
// `default` arms, since they stop either at the start of
|
||||
// another arm declaration (e.g. at `case`/`default`) or
|
||||
// at the `}` that ends switch body.
|
||||
_ => self.parse_switch_preamble_items(&mut tail),
|
||||
}
|
||||
// Ensure forward progress under errors to avoid infinite loops.
|
||||
if self.peek_location() <= token_location {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a stacked `case` group and its body:
|
||||
/// `case <expr>: (case <expr>:)* <arm-body-until-boundary>`.
|
||||
///
|
||||
/// Returns the allocated [`crate::ast::CaseRef`] node.
|
||||
#[must_use]
|
||||
fn parse_switch_case_group(
|
||||
&mut self,
|
||||
first_case_location: TokenLocation,
|
||||
tail: &mut Option<ExpressionRef<'src, 'arena>>,
|
||||
) -> crate::ast::CaseRef<'src, 'arena> {
|
||||
let mut labels = self.arena.vec();
|
||||
while let Some((Token::Case, case_location)) = self.peek_token_and_location() {
|
||||
// Guaranteed progress: we entered on `Token::Case`.
|
||||
self.advance(); // 'case'
|
||||
labels.push(self.parse_expression());
|
||||
|
||||
// Enforce `:` after each case with statement-level recovery.
|
||||
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
|
||||
.widen_error_span_from(case_location)
|
||||
.sync_error_until(self, crate::parser::SyncLevel::Statement)
|
||||
.report_error(self);
|
||||
}
|
||||
let mut body = self.arena.vec();
|
||||
self.parse_switch_arm_body(&mut body, tail);
|
||||
let case_span = compute_case_span(first_case_location, &labels, &body);
|
||||
self.arena
|
||||
.alloc(crate::ast::SwitchCase { labels, body }, case_span)
|
||||
}
|
||||
|
||||
/// Parses the `default :` arm and its body.
|
||||
///
|
||||
/// Does not consume a boundary token after the body.
|
||||
fn parse_switch_default_arm(
|
||||
&mut self,
|
||||
default_location: TokenLocation,
|
||||
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||
tail: &mut Option<ExpressionRef<'src, 'arena>>,
|
||||
) {
|
||||
self.advance(); // 'default'
|
||||
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
|
||||
.widen_error_span_from(default_location)
|
||||
.sync_error_until(self, crate::parser::SyncLevel::Statement)
|
||||
.report_error(self);
|
||||
self.parse_switch_arm_body(statements, tail);
|
||||
}
|
||||
|
||||
/// Parses items of a single switch arm body until a boundary token or EOF.
|
||||
///
|
||||
/// Boundary tokens: `case`, `default`, `}`.
|
||||
fn parse_switch_arm_body(
|
||||
&mut self,
|
||||
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||
tail: &mut Option<ExpressionRef<'src, 'arena>>,
|
||||
) {
|
||||
// No need to report end-of-file as it'll be done by
|
||||
// `parse_switch_cont`.
|
||||
while let Some((token, token_location)) = self.peek_token_and_location() {
|
||||
match token {
|
||||
// Complain about tail instruction if `switch` body
|
||||
// doesn't end here
|
||||
Token::Case | Token::Default => {
|
||||
if let Some(tail_expression) = tail.take() {
|
||||
self.report_error_here(ParseErrorKind::SwitchBareExpressionBeforeNextArm);
|
||||
let span = *tail_expression.span();
|
||||
let stmt = self
|
||||
.arena
|
||||
.alloc(crate::ast::Statement::Expression(tail_expression), span);
|
||||
statements.push(stmt);
|
||||
}
|
||||
break;
|
||||
}
|
||||
Token::RightBrace => break,
|
||||
_ => (),
|
||||
}
|
||||
// We know that at this point:
|
||||
// 1. There is still a token and it is not EOF;
|
||||
// 2. It isn't end of the block.
|
||||
// So having a tail statement there is a problem!
|
||||
if let Some(tail_expression) = tail.take() {
|
||||
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
|
||||
let tail_span = *tail_expression.span();
|
||||
let node = self.arena.alloc(
|
||||
crate::ast::Statement::Expression(tail_expression),
|
||||
tail_span,
|
||||
);
|
||||
statements.push(node);
|
||||
}
|
||||
*tail = self.parse_block_item(statements);
|
||||
// Ensure forward progress under errors to avoid infinite loops.
|
||||
if self.peek_location() <= token_location {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses items that were found in code *before* any arm (`case`/`default`)
|
||||
/// declaration.
|
||||
///
|
||||
/// These aren't allowed, but we still want to perform a proper parsing step
|
||||
/// to report whatever errors we can in case programmer simply forgot to put
|
||||
/// an arm declaration.
|
||||
///
|
||||
/// Boundary tokens: `case`, `default`, `}`.
|
||||
fn parse_switch_preamble_items(&mut self, tail: &mut Option<ExpressionRef<'src, 'arena>>) {
|
||||
// Report the spurious token.
|
||||
self.report_error_here(ParseErrorKind::SwitchTopLevelItemNotCase);
|
||||
|
||||
// Discard parsed statements into a sink vector.
|
||||
// This is a bit "hacky", but I don't want to adapt code to skip
|
||||
// production of AST nodes just to report errors in
|
||||
// one problematic case.
|
||||
let mut sink = self.arena.vec();
|
||||
self.parse_switch_arm_body(&mut sink, tail);
|
||||
}
|
||||
|
||||
/// Helper to allocate a `Switch` expression with the given span.
|
||||
#[must_use]
|
||||
fn alloc_switch_node(
|
||||
&mut self,
|
||||
selector: ExpressionRef<'src, 'arena>,
|
||||
cases: ArenaVec<'arena, crate::ast::CaseRef<'src, 'arena>>,
|
||||
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
|
||||
tail: Option<ExpressionRef<'src, 'arena>>,
|
||||
span: AstSpan,
|
||||
) -> ExpressionRef<'src, 'arena> {
|
||||
self.arena.alloc(
|
||||
crate::ast::Expression::Switch {
|
||||
selector,
|
||||
cases,
|
||||
default_arm,
|
||||
tail,
|
||||
},
|
||||
span,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes [`AstSpan`] covering all labels and the body.
|
||||
#[must_use]
|
||||
fn compute_case_span(
|
||||
labels_start_location: TokenLocation,
|
||||
labels: &[ExpressionRef],
|
||||
body: &[StatementRef],
|
||||
) -> AstSpan {
|
||||
let mut span = AstSpan {
|
||||
from: labels_start_location,
|
||||
to: labels_start_location,
|
||||
};
|
||||
if let Some(last_statement) = body.last() {
|
||||
span.extend_to(last_statement.span().to);
|
||||
} else if let Some(last_label) = labels.last() {
|
||||
span.extend_to(last_label.span().to);
|
||||
}
|
||||
span
|
||||
}
|
66
rottlib/src/parser/mod.rs
Normal file
66
rottlib/src/parser/mod.rs
Normal file
@ -0,0 +1,66 @@
|
||||
//! Parser for Fermented UnrealScript (FerUS).
|
||||
//!
|
||||
//! Consumes tokens from [`crate::lexer::TokenizedFile`] and allocates AST
|
||||
//! nodes in [`crate::arena::Arena`]. Basic expressions use a Pratt parser;
|
||||
//! the rest rely on recursive descent in [`crate::parser::grammar`].
|
||||
//! Non-fatal errors accumulate in `Parser::diagnostics` as
|
||||
//! [`crate::diagnostics::Diagnostic`]; recovery skips to sync points defined by
|
||||
//! [`crate::parser::recovery::SyncLevel`] and synthesizes error nodes while
|
||||
//! keeping the parse going.
|
||||
//!
|
||||
//! Components:
|
||||
//! - `cursor`: token I/O, `peek`/`advance`, and lazy trivia capture;
|
||||
//! - `trivia`: trailing comments and newline counts keyed to
|
||||
//! the previous significant token and BOF;
|
||||
//! - `recovery`: panic-mode skipping and recovery adapters for results;
|
||||
//! - `pretty`: printable trees (`ExprTree`, `StmtTree`) for messages and dumps;
|
||||
//! - `errors`: [`ParseError`] and [`ParseErrorKind`].
|
||||
//!
|
||||
//! Lifetimes: `'src` ties to lexer slices; `'arena` ties to AST allocation.
|
||||
//!
|
||||
//! Guarantees:
|
||||
//!
|
||||
//! - Parser does not abort on user input. It emits diagnostics and error nodes.
|
||||
//! - Trivia is recorded as you scan and can be queried by formatters/linters.
|
||||
//! - Public surface keeps [`Parser`] small;
|
||||
//! low-level plumbing lives in submodules.
|
||||
|
||||
use super::lexer;
|
||||
|
||||
pub use lexer::{TokenPiece, Tokens};
|
||||
|
||||
mod cursor;
|
||||
mod errors;
|
||||
mod grammar;
|
||||
pub mod pretty;
|
||||
mod recovery;
|
||||
mod trivia;
|
||||
|
||||
pub use pretty::{ExprTree, StmtTree};
|
||||
|
||||
pub use errors::ParseError;
|
||||
pub(crate) use errors::{ParseErrorKind, ParseResult};
|
||||
pub(crate) use recovery::{ResultRecoveryExt, SyncLevel};
|
||||
pub(crate) use trivia::{TriviaKind, TriviaToken};
|
||||
|
||||
pub type ParseExpressionResult<'src, 'arena> =
|
||||
ParseResult<'src, 'arena, crate::ast::ExpressionRef<'src, 'arena>>;
|
||||
|
||||
/// A recursive-descent parser over token from [`crate::lexer::TokenizedFile`].
|
||||
pub struct Parser<'src, 'arena> {
|
||||
arena: &'arena crate::arena::Arena,
|
||||
pub diagnostics: Vec<crate::diagnostics::Diagnostic>,
|
||||
cursor: cursor::CursorComponent<'src>,
|
||||
trivia: trivia::TriviaComponent<'src>,
|
||||
}
|
||||
|
||||
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
pub fn new(file: &'src lexer::TokenizedFile<'src>, arena: &'arena crate::arena::Arena) -> Self {
|
||||
Self {
|
||||
arena,
|
||||
diagnostics: Vec::new(),
|
||||
cursor: cursor::CursorComponent::new(file),
|
||||
trivia: trivia::TriviaComponent::default(),
|
||||
}
|
||||
}
|
||||
}
|
353
rottlib/src/parser/pretty.rs
Normal file
353
rottlib/src/parser/pretty.rs
Normal file
@ -0,0 +1,353 @@
|
||||
use crate::ast::{Expression, Statement, SwitchCase, VariableDeclarator};
|
||||
use core::fmt;
|
||||
|
||||
/// A borrow of either a statement or an expression node,
|
||||
/// plus helpers to enrich the printed tree.
|
||||
enum AnyNode<'src, 'a, 'b> {
|
||||
Stmt(&'b Statement<'src, 'a>),
|
||||
Expr(&'b Expression<'src, 'a>),
|
||||
Case(&'b SwitchCase<'src, 'a>),
|
||||
/// A leaf line with a preformatted label (e.g., variable names).
|
||||
Text(String),
|
||||
/// Wraps a child with a tag like "cond", "body", "else", "init".
|
||||
Tagged(&'static str, Box<AnyNode<'src, 'a, 'b>>),
|
||||
}
|
||||
|
||||
/// Public wrappers to print trees starting from either kind of node.
|
||||
pub struct StmtTree<'src, 'a, 'b>(pub &'b Statement<'src, 'a>);
|
||||
pub struct ExprTree<'src, 'a, 'b>(pub &'b Expression<'src, 'a>);
|
||||
|
||||
impl<'src, 'a, 'b> fmt::Display for StmtTree<'src, 'a, 'b> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt_node(AnyNode::Stmt(self.0), f, "", true)
|
||||
}
|
||||
}
|
||||
impl<'src, 'a, 'b> fmt::Display for ExprTree<'src, 'a, 'b> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt_node(AnyNode::Expr(self.0), f, "", true)
|
||||
}
|
||||
}
|
||||
|
||||
fn fmt_node<'src, 'a, 'b>(
|
||||
node: AnyNode<'src, 'a, 'b>,
|
||||
f: &mut fmt::Formatter<'_>,
|
||||
prefix: &str,
|
||||
is_last: bool,
|
||||
) -> fmt::Result {
|
||||
write!(f, "{}{}─ ", prefix, if is_last { "└" } else { "├" })?;
|
||||
writeln!(f, "{}", label(&node))?;
|
||||
|
||||
let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " });
|
||||
let kids = children(node);
|
||||
let len = kids.len();
|
||||
for (i, child) in kids.into_iter().enumerate() {
|
||||
let last = i + 1 == len;
|
||||
fmt_node(child, f, &new_prefix, last)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// ----- Labeling -----
|
||||
|
||||
fn label<'src, 'a, 'b>(node: &AnyNode<'src, 'a, 'b>) -> String {
|
||||
match node {
|
||||
AnyNode::Expr(e) => expr_label(e),
|
||||
AnyNode::Stmt(s) => stmt_label(s),
|
||||
AnyNode::Case(c) => case_label(c),
|
||||
AnyNode::Text(s) => s.clone(),
|
||||
AnyNode::Tagged(tag, inner) => format!("{tag}: {}", label(inner)),
|
||||
}
|
||||
}
|
||||
|
||||
fn quote_str(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len() + 2);
|
||||
out.push('"');
|
||||
for ch in s.chars() {
|
||||
match ch {
|
||||
'\\' => out.push_str("\\\\"),
|
||||
'"' => out.push_str("\\\""),
|
||||
'\n' => out.push_str("\\n"),
|
||||
'\r' => out.push_str("\\r"),
|
||||
'\t' => out.push_str("\\t"),
|
||||
c => out.push(c),
|
||||
}
|
||||
}
|
||||
out.push('"');
|
||||
out
|
||||
}
|
||||
|
||||
fn expr_label<'src, 'a>(e: &Expression<'src, 'a>) -> String {
|
||||
match e {
|
||||
Expression::Binary(_, op, _) => format!("Binary {op}"),
|
||||
Expression::LeftUnary(op, _) => format!("UnaryL {op}"),
|
||||
Expression::RightUnary(_, op) => format!("UnaryR {op}"),
|
||||
|
||||
Expression::Identifier(s) => format!("Ident {s}"),
|
||||
Expression::String(s) => {
|
||||
// Avoid assuming ArenaString exposes &str; go via Display -> String.
|
||||
format!("String {}", quote_str(&s.to_string()))
|
||||
}
|
||||
Expression::Integer(i) => format!("Int {i}"),
|
||||
Expression::Float(x) => format!("Float {x}"),
|
||||
Expression::Bool(true) => "Bool true".into(),
|
||||
Expression::Bool(false) => "Bool false".into(),
|
||||
Expression::None => "None".into(),
|
||||
Expression::Parentheses(_) => "Parentheses".into(),
|
||||
|
||||
Expression::Block { statements, tail } => {
|
||||
let n = statements.len() + usize::from(tail.is_some());
|
||||
let tail_s = if tail.is_some() { " tail" } else { "" };
|
||||
format!("BlockExpr ({n} items{tail_s})")
|
||||
}
|
||||
Expression::If { .. } => "IfExpr".into(),
|
||||
Expression::While { .. } => "WhileExpr".into(),
|
||||
Expression::DoUntil { .. } => "DoUntilExpr".into(),
|
||||
Expression::ForEach { .. } => "ForEachExpr".into(),
|
||||
Expression::For { .. } => "ForExpr".into(),
|
||||
Expression::Switch {
|
||||
cases,
|
||||
default_arm: default,
|
||||
..
|
||||
} => {
|
||||
let d = if default.is_some() { " yes" } else { " no" };
|
||||
format!("SwitchExpr cases={} default:{}", cases.len(), d)
|
||||
}
|
||||
Expression::Goto(label) => format!("Goto {}", label.to_string()),
|
||||
Expression::Continue => "Continue".into(),
|
||||
Expression::Break(Some(_)) => "Break value".into(),
|
||||
Expression::Break(None) => "Break".into(),
|
||||
Expression::Return(Some(_)) => "Return value".into(),
|
||||
Expression::Return(None) => "Return".into(),
|
||||
|
||||
Expression::Error => "Error".into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// ----- Children collection -----
|
||||
|
||||
fn children<'src, 'a, 'b>(node: AnyNode<'src, 'a, 'b>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
||||
match node {
|
||||
AnyNode::Expr(e) => expr_children(e),
|
||||
AnyNode::Stmt(s) => stmt_children(s),
|
||||
AnyNode::Case(c) => case_children(c),
|
||||
AnyNode::Text(_) => vec![],
|
||||
AnyNode::Tagged(_, inner) => children(*inner),
|
||||
}
|
||||
}
|
||||
|
||||
/// Expression children can include statements inside Block/Switch.
|
||||
fn expr_children<'src, 'a, 'b>(e: &'b Expression<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
||||
match e {
|
||||
// Purely expression subtrees
|
||||
Expression::Binary(lhs, _, rhs) => vec![AnyNode::Expr(&*lhs), AnyNode::Expr(&*rhs)],
|
||||
Expression::LeftUnary(_, expr) => vec![AnyNode::Expr(&*expr)],
|
||||
Expression::RightUnary(expr, _) => vec![AnyNode::Expr(&*expr)],
|
||||
Expression::Parentheses(expr) => vec![AnyNode::Expr(&*expr)],
|
||||
|
||||
// Structured expression forms
|
||||
Expression::Block { statements, tail } => {
|
||||
let mut out: Vec<AnyNode<'src, 'a, 'b>> = statements
|
||||
.iter()
|
||||
.map(|s| AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*s))))
|
||||
.collect();
|
||||
if let Some(t) = tail.as_ref() {
|
||||
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
Expression::If {
|
||||
condition,
|
||||
body,
|
||||
else_body,
|
||||
} => {
|
||||
let mut out = vec![
|
||||
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
|
||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
||||
];
|
||||
if let Some(e) = else_body {
|
||||
out.push(AnyNode::Tagged("else", Box::new(AnyNode::Expr(&*e))));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
Expression::While { condition, body } => vec![
|
||||
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
|
||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
||||
],
|
||||
|
||||
Expression::DoUntil { condition, body } => vec![
|
||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
||||
AnyNode::Tagged("until", Box::new(AnyNode::Expr(&*condition))),
|
||||
],
|
||||
|
||||
Expression::ForEach { iterator, body } => vec![
|
||||
AnyNode::Tagged("iter", Box::new(AnyNode::Expr(&*iterator))),
|
||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
||||
],
|
||||
|
||||
Expression::For {
|
||||
init,
|
||||
condition,
|
||||
step,
|
||||
body,
|
||||
} => {
|
||||
let mut out = Vec::with_capacity(4);
|
||||
if let Some(i) = init {
|
||||
out.push(AnyNode::Tagged("init", Box::new(AnyNode::Expr(&*i))));
|
||||
}
|
||||
if let Some(c) = condition {
|
||||
out.push(AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*c))));
|
||||
}
|
||||
if let Some(s) = step {
|
||||
out.push(AnyNode::Tagged("step", Box::new(AnyNode::Expr(&*s))));
|
||||
}
|
||||
out.push(AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))));
|
||||
out
|
||||
}
|
||||
|
||||
Expression::Switch {
|
||||
selector,
|
||||
cases,
|
||||
default_arm: default,
|
||||
tail,
|
||||
} => {
|
||||
let mut out: Vec<AnyNode<'src, 'a, 'b>> = vec![AnyNode::Tagged(
|
||||
"selector",
|
||||
Box::new(AnyNode::Expr(&*selector)),
|
||||
)];
|
||||
|
||||
for case in cases.iter() {
|
||||
out.push(AnyNode::Tagged("case", Box::new(AnyNode::Case(&*case))));
|
||||
}
|
||||
|
||||
if let Some(d) = default.as_ref() {
|
||||
for stmt in d.iter() {
|
||||
out.push(AnyNode::Tagged("default", Box::new(AnyNode::Stmt(&*stmt))));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(t) = tail.as_ref() {
|
||||
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
// Leaves
|
||||
Expression::Identifier(_)
|
||||
| Expression::String(_)
|
||||
| Expression::Integer(_)
|
||||
| Expression::Float(_)
|
||||
| Expression::Bool(_)
|
||||
| Expression::None
|
||||
| Expression::Goto(_)
|
||||
| Expression::Continue
|
||||
| Expression::Break(None)
|
||||
| Expression::Return(None)
|
||||
| Expression::Error => vec![],
|
||||
|
||||
// Single optional-child leaves
|
||||
Expression::Break(Some(v)) => vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))],
|
||||
Expression::Return(Some(v)) => {
|
||||
vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn stmt_label<'src, 'a>(s: &Statement<'src, 'a>) -> String {
|
||||
use Statement::*;
|
||||
match s {
|
||||
Empty => "Empty ;".into(),
|
||||
Expression(_) => "Expression".into(),
|
||||
|
||||
LocalVariableDeclaration {
|
||||
type_name,
|
||||
identifiers: variable_names,
|
||||
} => {
|
||||
let count = variable_names.len();
|
||||
let names = variable_names
|
||||
.iter()
|
||||
.map(|n| n.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
format!("LocalVarDecl type={type_name} count={count} names=[{names}]")
|
||||
}
|
||||
|
||||
VariableDeclaration {
|
||||
type_name,
|
||||
declarations: variable_names,
|
||||
} => {
|
||||
let total = variable_names.len();
|
||||
let inits = variable_names
|
||||
.iter()
|
||||
.filter(|v| v.initializer.is_some())
|
||||
.count();
|
||||
let names = variable_names
|
||||
.iter()
|
||||
.map(|VariableDeclarator { name, .. }| name.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
format!("VarDecl type={type_name} vars={total} inits={inits} names=[{names}]")
|
||||
}
|
||||
|
||||
Label(name) => format!("Label {name}"),
|
||||
|
||||
Error => "Error".into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn stmt_children<'src, 'a, 'b>(s: &'b Statement<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
||||
use Statement::*;
|
||||
match s {
|
||||
Empty | Label(_) | Error => vec![],
|
||||
|
||||
Expression(expr) => vec![AnyNode::Expr(&*expr)],
|
||||
|
||||
LocalVariableDeclaration {
|
||||
identifiers: variable_names,
|
||||
..
|
||||
} => variable_names
|
||||
.iter()
|
||||
.map(|n| AnyNode::Text(format!("name: {n}")))
|
||||
.collect(),
|
||||
|
||||
VariableDeclaration {
|
||||
declarations: variable_names,
|
||||
..
|
||||
} => {
|
||||
let mut out = Vec::new();
|
||||
for VariableDeclarator {
|
||||
name,
|
||||
initializer: initial_value,
|
||||
} in variable_names.iter()
|
||||
{
|
||||
out.push(AnyNode::Text(format!("var: {name}")));
|
||||
if let Some(init_expr) = initial_value {
|
||||
out.push(AnyNode::Tagged(
|
||||
"init",
|
||||
Box::new(AnyNode::Expr(&*init_expr)),
|
||||
));
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn case_children<'src, 'a, 'b>(c: &'b SwitchCase<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
||||
let mut out = Vec::new();
|
||||
for lbl in c.labels.iter() {
|
||||
out.push(AnyNode::Tagged("label", Box::new(AnyNode::Expr(&*lbl))));
|
||||
}
|
||||
for stmt in c.body.iter() {
|
||||
out.push(AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*stmt))));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn case_label<'src, 'a>(c: &SwitchCase<'src, 'a>) -> String {
|
||||
let l = c.labels.len();
|
||||
let b = c.body.len();
|
||||
format!("Case labels={l} body_items={b}")
|
||||
}
|
253
rottlib/src/parser/recovery.rs
Normal file
253
rottlib/src/parser/recovery.rs
Normal file
@ -0,0 +1,253 @@
|
||||
//! Best-effort error recovery utilities.
|
||||
//!
|
||||
//! The parser recovers from errors by skipping tokens until a synchronization
|
||||
//! token is found. The sync target is chosen from [`SyncLevel`] based on
|
||||
//! the error kind. Methods on [`ParseResult`] let callers widen the error span,
|
||||
//! synchronize, report, and produce fallback values.
|
||||
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::{ParseError, ParseResult, Parser};
|
||||
|
||||
/// Synchronization groups the parser can stop at during recovery.
|
||||
///
|
||||
/// Stronger levels subsume weaker ones. The enum's variant order defines this
|
||||
/// ordering of strength via [`Ord`]; changing it changes recovery behavior.
|
||||
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)]
|
||||
pub(crate) enum SyncLevel {
|
||||
/// Tokens that appear inside expressions.
|
||||
///
|
||||
/// Includes operators, member access `.`, ternary `? :`, an opening `(`,
|
||||
/// and identifiers.
|
||||
Expression,
|
||||
/// List separator `,`.
|
||||
ListSeparator,
|
||||
/// Close of a parenthesized subexpression `)`.
|
||||
CloseParenthesis,
|
||||
/// Close of an index or list `]`.
|
||||
CloseBracket,
|
||||
/// Statement boundary or starter.
|
||||
Statement,
|
||||
/// Block boundary braces (both `{` and `}`).
|
||||
BlockBoundary,
|
||||
/// Start of a top-level or class-level declaration.
|
||||
TopDeclaration,
|
||||
}
|
||||
|
||||
impl SyncLevel {
|
||||
/// Converts [`Token`] to its [`SyncLevel`], if it has one.
|
||||
fn for_token(token: Token) -> Option<SyncLevel> {
|
||||
use SyncLevel::*;
|
||||
use Token::*;
|
||||
|
||||
match token {
|
||||
Exponentiation | Increment | Decrement | Not | BitwiseNot | Dot | Cross | Multiply
|
||||
| Divide | Modulo | Plus | Minus | ConcatSpace | Concat | LeftShift
|
||||
| LogicalRightShift | RightShift | Less | LessEqual | Greater | GreaterEqual
|
||||
| Equal | NotEqual | ApproximatelyEqual | ClockwiseFrom | BitwiseAnd | BitwiseOr
|
||||
| BitwiseXor | And | Xor | Or | Assign | MultiplyAssign | DivideAssign
|
||||
| ModuloAssign | PlusAssign | MinusAssign | ConcatAssign | ConcatSpaceAssign
|
||||
| Period | Question | Colon | LeftParenthesis | Identifier => Some(Expression),
|
||||
|
||||
Comma => Some(ListSeparator),
|
||||
|
||||
RightParenthesis => Some(CloseParenthesis),
|
||||
RightBracket => Some(CloseBracket),
|
||||
|
||||
Case | Default | If | Else | Switch | For | ForEach | While | Do | Return | Break
|
||||
| Continue | Local | Semicolon => Some(Statement),
|
||||
|
||||
Brace(_) | RightBrace => Some(BlockBoundary),
|
||||
|
||||
Class | Struct | Enum | State | Function | Event | Delegate | Operator | Var
|
||||
| Replication | NativeReplication | DefaultProperties | CppText | ExecDirective => {
|
||||
Some(TopDeclaration)
|
||||
}
|
||||
|
||||
_ => Option::None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
/// Converts a parse error into a diagnostic and queues it.
|
||||
///
|
||||
/// Placeholder implementation.
|
||||
fn handle_error(&mut self, error: ParseError) {
|
||||
let diagnostic = crate::diagnostics::DiagnosticBuilder::error(format!(
|
||||
"error {:?} while parsing",
|
||||
error.kind
|
||||
))
|
||||
.primary_label(error.source_span, "happened here")
|
||||
.build();
|
||||
self.diagnostics.push(diagnostic);
|
||||
}
|
||||
|
||||
/// Reports a parser error with [`crate::parser::ParseErrorKind`] at
|
||||
/// the current location and queues an appropriate diagnostic.
|
||||
pub fn report_error_here(&mut self, error_kind: crate::parser::ParseErrorKind) {
|
||||
let new_error = self.make_error_here(error_kind);
|
||||
self.handle_error(new_error);
|
||||
}
|
||||
|
||||
/// Skips tokens until a token with `min_sync` level or stronger is found.
|
||||
///
|
||||
/// Reaches end-of-file if no qualifying token is found.
|
||||
pub(crate) fn recover_until(&mut self, min_sync: SyncLevel) {
|
||||
while let Some(next_token) = self.peek_token() {
|
||||
if let Some(next_token_sync_level) = SyncLevel::for_token(next_token)
|
||||
&& next_token_sync_level >= min_sync
|
||||
{
|
||||
break;
|
||||
}
|
||||
// Always advances when `peek_token()` is `Some(...)`,
|
||||
// so the loop cannot be infinite.
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Supplies a fallback value after a parse error so parsing can continue and
|
||||
/// reveal further errors.
|
||||
pub(crate) trait RecoveryFallback<'src, 'arena>: Sized {
|
||||
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self;
|
||||
}
|
||||
|
||||
/// Extends [`ParseResult`] with recovery-related methods for
|
||||
/// fluent error handling.
|
||||
pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized {
|
||||
/// Extends the left end of the error span to `from`.
|
||||
///
|
||||
/// Does nothing if `Self` is `Ok(...)`.
|
||||
#[must_use]
|
||||
fn widen_error_span_from(self, from: TokenLocation) -> Self;
|
||||
|
||||
/// Extends the right end of the error span up to but not including
|
||||
/// the next token of the given sync `level`.
|
||||
///
|
||||
/// Does nothing if `Self` is `Ok(...)`.
|
||||
#[must_use]
|
||||
fn sync_error_until(self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self;
|
||||
|
||||
/// Extends the right end of the error span to include the next token of
|
||||
/// the given sync `level`.
|
||||
///
|
||||
/// Does nothing if `Self` is `Ok(...)`.
|
||||
#[must_use]
|
||||
fn sync_error_at(self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self;
|
||||
|
||||
/// Either returns expected value or its best effort fallback.
|
||||
#[must_use]
|
||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T;
|
||||
|
||||
/// Produces the contained value if successful,
|
||||
/// or a fallback if an error occurred.
|
||||
fn report_error(self, parser: &mut Parser<'src, 'arena>);
|
||||
}
|
||||
|
||||
impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T>
|
||||
where
|
||||
T: RecoveryFallback<'src, 'arena>,
|
||||
{
|
||||
fn widen_error_span_from(mut self, from: TokenLocation) -> Self {
|
||||
if let Err(ref mut error) = self {
|
||||
error.source_span.from = std::cmp::min(error.source_span.from, from);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||
if let Err(ref mut error) = self {
|
||||
parser.recover_until(level);
|
||||
error.source_span.to = parser.last_visited_location();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||
if let Err(ref mut error) = self {
|
||||
parser.recover_until(level);
|
||||
error.source_span.to = parser.peek_location();
|
||||
// If we're at end-of-file, this'll simply do nothing.
|
||||
parser.advance();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T {
|
||||
self.unwrap_or_else(|error| {
|
||||
let value = T::fallback_value(parser, &error);
|
||||
parser.handle_error(error);
|
||||
value
|
||||
})
|
||||
}
|
||||
|
||||
fn report_error(self, parser: &mut Parser<'src, 'arena>) {
|
||||
if let Err(error) = self {
|
||||
parser.handle_error(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
|
||||
fn widen_error_span_from(mut self, from: TokenLocation) -> Self {
|
||||
self.source_span.from = std::cmp::min(self.source_span.from, from);
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||
parser.recover_until(level);
|
||||
self.source_span.to = parser.last_visited_location();
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||
parser.recover_until(level);
|
||||
self.source_span.to = parser.peek_location();
|
||||
// If we're at end-of-file, this'll simply do nothing.
|
||||
parser.advance();
|
||||
self
|
||||
}
|
||||
|
||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> () {
|
||||
parser.handle_error(self);
|
||||
}
|
||||
|
||||
fn report_error(self, parser: &mut Parser<'src, 'arena>) {
|
||||
parser.handle_error(self);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation {
|
||||
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||
error.source_span.to
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExpressionRef<'src, 'arena> {
|
||||
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||
crate::arena::ArenaNode::new_in(
|
||||
crate::ast::Expression::Error,
|
||||
error.source_span,
|
||||
parser.arena,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StatementRef<'src, 'arena> {
|
||||
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||
crate::arena::ArenaNode::new_in(
|
||||
crate::ast::Statement::Error,
|
||||
error.source_span,
|
||||
parser.arena,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option<T>
|
||||
where
|
||||
T: RecoveryFallback<'src, 'arena>,
|
||||
{
|
||||
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||
Some(T::fallback_value(parser, error))
|
||||
}
|
||||
}
|
297
rottlib/src/parser/trivia.rs
Normal file
297
rottlib/src/parser/trivia.rs
Normal file
@ -0,0 +1,297 @@
|
||||
//! This module provides trivia token collection mechanism that lets parser code
|
||||
//! iterate over significant tokens while ignoring trivia and preserving
|
||||
//! full information for linting, formatting, and documentation.
|
||||
//!
|
||||
//! Tokens considered *trivia* are:
|
||||
//!
|
||||
//! 1. [`crate::lexer::Token::LineComment`];
|
||||
//! 2. [`crate::lexer::Token::BlockComment`];
|
||||
//! 3. [`crate::lexer::Token::Newline`];
|
||||
//! 4. [`crate::lexer::Token::Whitespace`].
|
||||
//!
|
||||
//! Every other token is considered *significant*.
|
||||
|
||||
use crate::lexer::TokenLocation;
|
||||
|
||||
/// Types of trivia tokens, corresponding directly to the matching variants of
|
||||
/// [`crate::lexer::Token`].
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
pub(crate) enum TriviaKind {
|
||||
Whitespace,
|
||||
Newline,
|
||||
LineComment,
|
||||
BlockComment,
|
||||
}
|
||||
|
||||
impl std::convert::TryFrom<crate::lexer::Token> for TriviaKind {
|
||||
type Error = ();
|
||||
|
||||
fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> {
|
||||
use crate::lexer::Token;
|
||||
match token {
|
||||
Token::Whitespace => Ok(TriviaKind::Whitespace),
|
||||
Token::Newline => Ok(TriviaKind::Newline),
|
||||
Token::LineComment => Ok(TriviaKind::LineComment),
|
||||
Token::BlockComment => Ok(TriviaKind::BlockComment),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Complete description of a trivia token.
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
pub(crate) struct TriviaToken<'src> {
|
||||
/// Specific type of the trivia.
|
||||
pub kind: TriviaKind,
|
||||
/// Actual content of the token.
|
||||
pub text: &'src str,
|
||||
/// Location of this trivia token in the token stream.
|
||||
pub location: TokenLocation,
|
||||
}
|
||||
|
||||
type TriviaRange = std::ops::Range<usize>;
|
||||
type TriviaMap = std::collections::HashMap<TriviaLocation, TriviaRange>;
|
||||
|
||||
/// Immutable index over all recorded trivia.
|
||||
///
|
||||
/// Enables O(1) access to trivia immediately before/after any significant
|
||||
/// token, plus file-leading and file-trailing trivia. Returned slices alias
|
||||
/// internal storage and live for `'src`.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct TriviaIndex<'src> {
|
||||
/// All trivia tokens, stored contiguously in file order.
|
||||
tokens: Vec<TriviaToken<'src>>,
|
||||
/// Maps token location to the trivia tokens stored right after it.
|
||||
after_map: TriviaMap,
|
||||
/// Maps token location to the trivia tokens stored right before it.
|
||||
before_map: TriviaMap,
|
||||
}
|
||||
|
||||
/// Extends [`TokenLocation`] with *start of file* value.
|
||||
///
|
||||
/// Regular [`TokenLocation`] does not need this value, but trivia requires
|
||||
/// a way to express "trivia before any significant token".
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
enum TriviaLocation {
|
||||
/// Position before any tokens, trivia or otherwise.
|
||||
StartOfFile,
|
||||
/// This variant can also express "end of file" through
|
||||
/// [`TokenLocation::EndOfFile`].
|
||||
At(TokenLocation),
|
||||
}
|
||||
|
||||
/// Mutable builder for `TriviaIndex`.
|
||||
///
|
||||
/// Used inside the parser to record trivia between successive significant
|
||||
/// tokens in file order, then frozen via `into_index`.
|
||||
#[derive(Debug, Default)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct TriviaComponent<'src> {
|
||||
/// All trivia tokens, stored contiguously in file order.
|
||||
tokens: Vec<TriviaToken<'src>>,
|
||||
/// Maps token location to the trivia tokens stored right after it.
|
||||
after_map: TriviaMap,
|
||||
/// Maps token location to the trivia tokens stored right before it.
|
||||
before_map: TriviaMap,
|
||||
/// Location of the last gap's right boundary,
|
||||
/// for debug-time invariant checks.
|
||||
#[cfg(debug_assertions)]
|
||||
last_right_boundary: Option<TriviaLocation>,
|
||||
}
|
||||
|
||||
impl<'src> TriviaComponent<'src> {
|
||||
/// Records trivia tokens that lie strictly between
|
||||
/// `previous_token_location` and `next_token_location`.
|
||||
///
|
||||
/// [`None`] for `previous_token_location` means beginning of file;
|
||||
/// `next_token_location` may be [`TokenLocation::EndOfFile`].
|
||||
///
|
||||
/// Empties `gap_trivia` without changing its capacity.
|
||||
///
|
||||
/// Requirements (checked in debug builds):
|
||||
/// - previous_token_location < next_token_location;
|
||||
/// - calls are monotonic: each gap starts at or after the last end;
|
||||
/// - `collected` is nonempty and strictly ordered by `location`;
|
||||
/// - all `collected` lie strictly inside (prev, next).
|
||||
pub(crate) fn record_between_locations(
|
||||
&mut self,
|
||||
previous_token_location: Option<TokenLocation>,
|
||||
next_token_location: TokenLocation,
|
||||
gap_trivia: &mut Vec<TriviaToken<'src>>,
|
||||
) {
|
||||
#[cfg(debug_assertions)]
|
||||
self.debug_assert_valid_recording_batch(
|
||||
previous_token_location,
|
||||
next_token_location,
|
||||
&gap_trivia,
|
||||
);
|
||||
|
||||
if gap_trivia.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let previous_token_location = previous_token_location
|
||||
.map(TriviaLocation::At)
|
||||
.unwrap_or(TriviaLocation::StartOfFile);
|
||||
let next_token_location = TriviaLocation::At(next_token_location);
|
||||
|
||||
let trivia_start = self.tokens.len();
|
||||
self.tokens.append(gap_trivia);
|
||||
let trivia_end = self.tokens.len();
|
||||
|
||||
self.after_map
|
||||
.insert(previous_token_location, trivia_start..trivia_end);
|
||||
self.before_map
|
||||
.insert(next_token_location, trivia_start..trivia_end);
|
||||
}
|
||||
|
||||
/// Freezes into an immutable, shareable index.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn into_index(self) -> TriviaIndex<'src> {
|
||||
TriviaIndex {
|
||||
tokens: self.tokens,
|
||||
after_map: self.after_map,
|
||||
before_map: self.before_map,
|
||||
}
|
||||
}
|
||||
|
||||
/// Trivia immediately after the significant token at `location`.
|
||||
///
|
||||
/// Returns an empty slice if `location` is not pointing at
|
||||
/// a significant token or if no trivia was recorded after it.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(TriviaLocation::At(location), &self.after_map)
|
||||
}
|
||||
|
||||
/// Trivia immediately before the significant token at `location`.
|
||||
///
|
||||
/// Returns an empty slice if `location` is not pointing at
|
||||
/// a significant token or if no trivia was recorded before it.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(TriviaLocation::At(location), &self.before_map)
|
||||
}
|
||||
|
||||
/// Trivia before any significant token.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
|
||||
}
|
||||
|
||||
/// Trivia after the last significant token.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(
|
||||
TriviaLocation::At(TokenLocation::EndOfFile),
|
||||
&self.before_map,
|
||||
)
|
||||
}
|
||||
|
||||
// Helper: return the recorded slice or an empty slice if none.
|
||||
#[track_caller]
|
||||
#[allow(dead_code)]
|
||||
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] {
|
||||
if let Some(range) = map.get(&key) {
|
||||
// Ranges are guaranteed to be valid by construction
|
||||
&self.tokens[range.start..range.end]
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
|
||||
/// Debug-only validation for `record_between_locations`'s contract.
|
||||
#[cfg(debug_assertions)]
|
||||
fn debug_assert_valid_recording_batch(
|
||||
&mut self,
|
||||
previous_token_location: Option<TokenLocation>,
|
||||
next_token_location: TokenLocation,
|
||||
collected: &[TriviaToken<'src>],
|
||||
) {
|
||||
// Prevent zero-width or reversed gaps
|
||||
debug_assert!(previous_token_location < Some(next_token_location));
|
||||
let previous_token_location = previous_token_location
|
||||
.map(TriviaLocation::At)
|
||||
.unwrap_or(TriviaLocation::StartOfFile);
|
||||
let next_token_location = TriviaLocation::At(next_token_location);
|
||||
// Enforce monotonic gaps: we record in file order
|
||||
if let Some(last_right) = self.last_right_boundary {
|
||||
debug_assert!(previous_token_location >= last_right);
|
||||
}
|
||||
self.last_right_boundary = Some(next_token_location);
|
||||
let first_trivia_location = collected
|
||||
.first()
|
||||
.map(|token| TriviaLocation::At(token.location))
|
||||
.expect("Provided trivia tokens array should not be empty.");
|
||||
let last_trivia_location = collected
|
||||
.last()
|
||||
.map(|token| TriviaLocation::At(token.location))
|
||||
.expect("Provided trivia tokens array should not be empty.");
|
||||
// Ensure trivia lies strictly inside the gap
|
||||
debug_assert!(previous_token_location < first_trivia_location);
|
||||
debug_assert!(next_token_location > last_trivia_location);
|
||||
// Ensure trivia locations are strictly increasing
|
||||
debug_assert!(
|
||||
collected
|
||||
.windows(2)
|
||||
.all(|window| window[0].location < window[1].location)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src> TriviaIndex<'src> {
|
||||
/// Trivia immediately after the significant token at `location`.
|
||||
///
|
||||
/// Returns an empty slice if `location` is not pointing at
|
||||
/// a significant token or if no trivia was recorded after it.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(TriviaLocation::At(location), &self.after_map)
|
||||
}
|
||||
|
||||
/// Trivia immediately before the significant token at `location`.
|
||||
///
|
||||
/// Returns an empty slice if `location` is not pointing at
|
||||
/// a significant token or if no trivia was recorded before it.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(TriviaLocation::At(location), &self.before_map)
|
||||
}
|
||||
|
||||
/// Trivia before any significant token.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
|
||||
}
|
||||
|
||||
/// Trivia after the last significant token.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
|
||||
self.slice_for(
|
||||
TriviaLocation::At(TokenLocation::EndOfFile),
|
||||
&self.before_map,
|
||||
)
|
||||
}
|
||||
|
||||
// Helper: return the recorded slice or an empty slice if none.
|
||||
#[track_caller]
|
||||
#[allow(dead_code)]
|
||||
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] {
|
||||
if let Some(range) = map.get(&key) {
|
||||
// Ranges are guaranteed to be valid by construction
|
||||
&self.tokens[range.start..range.end]
|
||||
} else {
|
||||
&[]
|
||||
}
|
||||
}
|
||||
}
|
@ -37,7 +37,7 @@ impl tower_lsp::LanguageServer for RottLanguageServer {
|
||||
// Measure lexing performance to track parser responsiveness.
|
||||
let start_time = std::time::Instant::now();
|
||||
let has_errors =
|
||||
rottlib::lexer::TokenizedFile::from_source(¶ms.text_document.text).had_errors();
|
||||
rottlib::lexer::TokenizedFile::from_str(¶ms.text_document.text).has_errors();
|
||||
let elapsed_time = start_time.elapsed();
|
||||
|
||||
self.client
|
||||
|
Loading…
Reference in New Issue
Block a user