From 5bd9aadc5529fe71e4b0e39a66e7dbe192d0a340 Mon Sep 17 00:00:00 2001 From: dkanus Date: Tue, 23 Sep 2025 20:27:12 +0700 Subject: [PATCH] Add `DeclarationLiteral` parsing Added method for parsing simple literals for use in top-level class declarations. Along with this change we've also moved out methods specific for parsing low-level literals into the same file as new method, since that structure made more sense. --- rottlib/src/ast.rs | 11 +++ rottlib/src/parser/errors.rs | 5 ++ rottlib/src/parser/grammar/literals.rs | 119 +++++++++++++++++++++++++ rottlib/src/parser/grammar/mod.rs | 1 + rottlib/src/parser/grammar/pratt.rs | 72 +-------------- rottlib/src/parser/recovery.rs | 15 ++++ 6 files changed, 155 insertions(+), 68 deletions(-) create mode 100644 rottlib/src/parser/grammar/literals.rs diff --git a/rottlib/src/ast.rs b/rottlib/src/ast.rs index 31d670a..4a06f9d 100644 --- a/rottlib/src/ast.rs +++ b/rottlib/src/ast.rs @@ -246,6 +246,17 @@ impl<'src, 'arena> Expression<'src, 'arena> { } } +pub enum DeclarationLiteral<'src, 'arena> { + None, + Bool(bool), + Integer(i128), + Float(f64), + String(ArenaString<'arena>), + Identifier(&'src str), +} + +pub type DeclarationLiteralRef<'src, 'arena> = (DeclarationLiteral<'src, 'arena>, TokenLocation); + /// Returns `true` for expressions that require `;` when used as a statement /// (i.e., everything except blocky control-flow forms). pub trait NeedsSemi { diff --git a/rottlib/src/parser/errors.rs b/rottlib/src/parser/errors.rs index f5fd61d..c58c0e4 100644 --- a/rottlib/src/parser/errors.rs +++ b/rottlib/src/parser/errors.rs @@ -70,6 +70,11 @@ pub enum ParseErrorKind { DeclMissingIdentifier, /// Invalid variable name identifier in non-`local` variable definition. DeclBadVariableIdentifier, + /// Found an unexpected token while parsing a declaration literal. + /// + /// Expected one of: integer, float, string, `true`, `false`, `none` + /// or an identifier. + DeclarationLiteralUnexpectedToken, } /// Enumerates all specific kinds of parsing errors that the parser can emit. diff --git a/rottlib/src/parser/grammar/literals.rs b/rottlib/src/parser/grammar/literals.rs new file mode 100644 index 0000000..f9c6832 --- /dev/null +++ b/rottlib/src/parser/grammar/literals.rs @@ -0,0 +1,119 @@ +use crate::ast::DeclarationLiteral; +use crate::lexer::Token; +use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt}; + +impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { + /// Parses a simple literal value that can be used inside FerUS's top-level + /// class members' definitions. + /// + /// On success consumes exactly one token. If the next token is not + /// a supported literal, returns [`None`] and leaves the stream untouched. + pub(crate) fn parse_declaration_literal( + &mut self, + ) -> ParseResult { + let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location() + else { + return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile)); + }; + let declaration_literal = match token { + Token::IntegerLiteral => { + let value = self + .decode_integer_literal(token_text) + .unwrap_or_fallback(self); + self.advance(); + DeclarationLiteral::Integer(value) + } + Token::FloatLiteral => { + let value = self + .decode_float_literal(token_text) + .unwrap_or_fallback(self); + self.advance(); + DeclarationLiteral::Float(value) + } + Token::StringLiteral => { + let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text); + self.advance(); + DeclarationLiteral::String(value) + } + Token::True => { + self.advance(); + DeclarationLiteral::Bool(true) + } + Token::False => { + self.advance(); + DeclarationLiteral::Bool(false) + } + Token::None => { + self.advance(); + DeclarationLiteral::None + } + Token::Identifier => { + self.advance(); + DeclarationLiteral::Identifier(token_text) + } + _ => { + return Err(self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken)); + } + }; + Ok((declaration_literal, token_location)) + } + + /// Parses an integer literal as [`i128`]. + /// + /// Expects a normalized decimal string with optional leading sign. + /// Does not accept base prefixes or digit separators. + /// + /// [`i128`] type was chosen to cover FerUS's integer range so constant + /// folding remains precise. + pub(crate) fn decode_integer_literal(&mut self, text: &str) -> ParseResult { + text.parse::() + .map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) + } + + /// Parses a float literal as [`f64`]. + /// + /// Expects a normalized decimal float (optional sign, decimal point, + /// optional exponent). Special values like `inf`/`NaN` are rejected. + pub(crate) fn decode_float_literal(&mut self, text: &str) -> ParseResult { + if let Ok(parsed_value) = text.parse::() { + Ok(parsed_value) + } else { + Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) + } + } + + /// Unescapes a tokenized string literal into an arena string. + /// + /// Supported escapes: `\n`, `\t`, `\"`, `\\`. + /// Unknown escape sequences are preserved as-is (UnrealScript behavior). + /// + /// Note: this function assumes `raw` is the token text without surrounding + /// quotes. + pub(crate) fn unescape_string_literal( + arena: &'arena crate::arena::Arena, + raw: &str, + ) -> crate::arena::ArenaString<'arena> { + let mut buffer = String::with_capacity(raw.len()); + let mut characters = raw.chars(); + while let Some(next_character) = characters.next() { + if next_character == '\\' { + // The lexer never produces a trailing backslash in a string + // token, so there's always a following character to inspect. + if let Some(escaped_character) = characters.next() { + match escaped_character { + 'n' => buffer.push('\n'), + 't' => buffer.push('\t'), + '"' => buffer.push('"'), + '\\' => buffer.push('\\'), + // Simply leaving the escaped character as-is matches + // UnrealScript behavior. + other => buffer.push(other), + } + } + } else { + buffer.push(next_character); + } + } + arena.string(&buffer) + } +} diff --git a/rottlib/src/parser/grammar/mod.rs b/rottlib/src/parser/grammar/mod.rs index 4089279..58cb415 100644 --- a/rottlib/src/parser/grammar/mod.rs +++ b/rottlib/src/parser/grammar/mod.rs @@ -1,6 +1,7 @@ mod block; mod control; mod flow; +mod literals; mod pratt; mod precedence; mod statements; diff --git a/rottlib/src/parser/grammar/pratt.rs b/rottlib/src/parser/grammar/pratt.rs index 982281b..f45f708 100644 --- a/rottlib/src/parser/grammar/pratt.rs +++ b/rottlib/src/parser/grammar/pratt.rs @@ -23,9 +23,7 @@ use crate::ast::{Expression, ExpressionRef, NeedsSemi}; use crate::lexer::{Token, TokenLocation}; -use crate::parser::{ - ParseErrorKind, ParseExpressionResult, ParseResult, ResultRecoveryExt, SyncLevel, -}; +use crate::parser::{ParseErrorKind, ParseExpressionResult, ResultRecoveryExt, SyncLevel}; pub(crate) use super::precedence::PrecedenceRank; @@ -96,21 +94,21 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { }; match token { Token::IntegerLiteral => { - let value = self.parse_integer_literal(token_text)?; + let value = self.decode_integer_literal(token_text)?; self.advance(); Ok(self .arena .alloc_at(Expression::Integer(value), token_location)) } Token::FloatLiteral => { - let value = self.parse_float_literal(token_text)?; + let value = self.decode_float_literal(token_text)?; self.advance(); Ok(self .arena .alloc_at(Expression::Float(value), token_location)) } Token::StringLiteral => { - let value = unescape_string_literal(self.arena, token_text); + let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text); self.advance(); Ok(self .arena @@ -260,33 +258,6 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { left_hand_side } - /// Parses an integer literal as [`i128`]. - /// - /// Chosen to cover FerUS's integer range so constant folding - /// remains precise. - /// - /// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is - /// not a valid integer. - fn parse_integer_literal(&mut self, text: &str) -> ParseResult { - text.parse::() - .map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) - } - - /// Parses a float literal as [`f64`]. - /// - /// Chosen to cover FerUS's float range so constant folding remains - /// precise. - /// - /// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is - /// not a valid float. - fn parse_float_literal(&mut self, text: &str) -> ParseResult { - if let Ok(parsed_value) = text.parse::() { - Ok(parsed_value) - } else { - Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral)) - } - } - /// Returns the next postfix operator and its location if present. /// /// Helper to avoid peeking and mapping twice; used to drive the postfix @@ -369,38 +340,3 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { None } } - -/// Unescapes a tokenized string literal into an arena string. -/// -/// Supported escapes: `\n`, `\t`, `\"`, `\\`. -/// Unknown escape sequences are preserved as-is (UnrealScript behavior). -/// -/// Note: this function assumes `raw` is the token text without surrounding -/// quotes. -fn unescape_string_literal<'arena>( - arena: &'arena crate::arena::Arena, - raw: &str, -) -> crate::arena::ArenaString<'arena> { - let mut buffer = String::with_capacity(raw.len()); - let mut characters = raw.chars(); - while let Some(next_character) = characters.next() { - if next_character == '\\' { - // The lexer never produces a trailing backslash in a string token, - // so there's always a following character to inspect. - if let Some(escaped_character) = characters.next() { - match escaped_character { - 'n' => buffer.push('\n'), - 't' => buffer.push('\t'), - '"' => buffer.push('"'), - '\\' => buffer.push('\\'), - // Simply leaving escaped character as-is is an expected - // behavior by UnrealScript - other => buffer.push(other), - } - } - } else { - buffer.push(next_character); - } - } - arena.string(&buffer) -} diff --git a/rottlib/src/parser/recovery.rs b/rottlib/src/parser/recovery.rs index 84f95e5..e5d524d 100644 --- a/rottlib/src/parser/recovery.rs +++ b/rottlib/src/parser/recovery.rs @@ -4,6 +4,9 @@ //! token is found. The sync target is chosen from [`SyncLevel`] based on //! the error kind. Methods on [`ParseResult`] let callers widen the error span, //! synchronize, report, and produce fallback values. +//! +//! General idea is that any method that returns something other than an error +//! can be assumed to have reported it. use crate::lexer::{Token, TokenLocation}; use crate::parser::{ParseError, ParseResult, Parser}; @@ -217,6 +220,18 @@ impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError { } } +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 { + fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self { + i128::default() + } +} + +impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 { + fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self { + f64::default() + } +} + impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation { fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { error.source_span.to