Add DeclarationLiteral parsing

Added method for parsing simple literals for use in top-level class
declarations.

Along with this change we've also moved out methods specific for parsing
low-level literals into the same file as new method, since that
structure made more sense.
This commit is contained in:
dkanus 2025-09-23 20:27:12 +07:00
parent 47693fc5a5
commit 5bd9aadc55
6 changed files with 155 additions and 68 deletions

View File

@ -246,6 +246,17 @@ impl<'src, 'arena> Expression<'src, 'arena> {
}
}
pub enum DeclarationLiteral<'src, 'arena> {
None,
Bool(bool),
Integer(i128),
Float(f64),
String(ArenaString<'arena>),
Identifier(&'src str),
}
pub type DeclarationLiteralRef<'src, 'arena> = (DeclarationLiteral<'src, 'arena>, TokenLocation);
/// Returns `true` for expressions that require `;` when used as a statement
/// (i.e., everything except blocky control-flow forms).
pub trait NeedsSemi {

View File

@ -70,6 +70,11 @@ pub enum ParseErrorKind {
DeclMissingIdentifier,
/// Invalid variable name identifier in non-`local` variable definition.
DeclBadVariableIdentifier,
/// Found an unexpected token while parsing a declaration literal.
///
/// Expected one of: integer, float, string, `true`, `false`, `none`
/// or an identifier.
DeclarationLiteralUnexpectedToken,
}
/// Enumerates all specific kinds of parsing errors that the parser can emit.

View File

@ -0,0 +1,119 @@
use crate::ast::DeclarationLiteral;
use crate::lexer::Token;
use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt};
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
/// Parses a simple literal value that can be used inside FerUS's top-level
/// class members' definitions.
///
/// On success consumes exactly one token. If the next token is not
/// a supported literal, returns [`None`] and leaves the stream untouched.
pub(crate) fn parse_declaration_literal(
&mut self,
) -> ParseResult<crate::ast::DeclarationLiteralRef> {
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
else {
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
};
let declaration_literal = match token {
Token::IntegerLiteral => {
let value = self
.decode_integer_literal(token_text)
.unwrap_or_fallback(self);
self.advance();
DeclarationLiteral::Integer(value)
}
Token::FloatLiteral => {
let value = self
.decode_float_literal(token_text)
.unwrap_or_fallback(self);
self.advance();
DeclarationLiteral::Float(value)
}
Token::StringLiteral => {
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
self.advance();
DeclarationLiteral::String(value)
}
Token::True => {
self.advance();
DeclarationLiteral::Bool(true)
}
Token::False => {
self.advance();
DeclarationLiteral::Bool(false)
}
Token::None => {
self.advance();
DeclarationLiteral::None
}
Token::Identifier => {
self.advance();
DeclarationLiteral::Identifier(token_text)
}
_ => {
return Err(self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken));
}
};
Ok((declaration_literal, token_location))
}
/// Parses an integer literal as [`i128`].
///
/// Expects a normalized decimal string with optional leading sign.
/// Does not accept base prefixes or digit separators.
///
/// [`i128`] type was chosen to cover FerUS's integer range so constant
/// folding remains precise.
pub(crate) fn decode_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
text.parse::<i128>()
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
/// Parses a float literal as [`f64`].
///
/// Expects a normalized decimal float (optional sign, decimal point,
/// optional exponent). Special values like `inf`/`NaN` are rejected.
pub(crate) fn decode_float_literal(&mut self, text: &str) -> ParseResult<f64> {
if let Ok(parsed_value) = text.parse::<f64>() {
Ok(parsed_value)
} else {
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
}
/// Unescapes a tokenized string literal into an arena string.
///
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
///
/// Note: this function assumes `raw` is the token text without surrounding
/// quotes.
pub(crate) fn unescape_string_literal(
arena: &'arena crate::arena::Arena,
raw: &str,
) -> crate::arena::ArenaString<'arena> {
let mut buffer = String::with_capacity(raw.len());
let mut characters = raw.chars();
while let Some(next_character) = characters.next() {
if next_character == '\\' {
// The lexer never produces a trailing backslash in a string
// token, so there's always a following character to inspect.
if let Some(escaped_character) = characters.next() {
match escaped_character {
'n' => buffer.push('\n'),
't' => buffer.push('\t'),
'"' => buffer.push('"'),
'\\' => buffer.push('\\'),
// Simply leaving the escaped character as-is matches
// UnrealScript behavior.
other => buffer.push(other),
}
}
} else {
buffer.push(next_character);
}
}
arena.string(&buffer)
}
}

View File

@ -1,6 +1,7 @@
mod block;
mod control;
mod flow;
mod literals;
mod pratt;
mod precedence;
mod statements;

View File

@ -23,9 +23,7 @@
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
use crate::lexer::{Token, TokenLocation};
use crate::parser::{
ParseErrorKind, ParseExpressionResult, ParseResult, ResultRecoveryExt, SyncLevel,
};
use crate::parser::{ParseErrorKind, ParseExpressionResult, ResultRecoveryExt, SyncLevel};
pub(crate) use super::precedence::PrecedenceRank;
@ -96,21 +94,21 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
};
match token {
Token::IntegerLiteral => {
let value = self.parse_integer_literal(token_text)?;
let value = self.decode_integer_literal(token_text)?;
self.advance();
Ok(self
.arena
.alloc_at(Expression::Integer(value), token_location))
}
Token::FloatLiteral => {
let value = self.parse_float_literal(token_text)?;
let value = self.decode_float_literal(token_text)?;
self.advance();
Ok(self
.arena
.alloc_at(Expression::Float(value), token_location))
}
Token::StringLiteral => {
let value = unescape_string_literal(self.arena, token_text);
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
self.advance();
Ok(self
.arena
@ -260,33 +258,6 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
left_hand_side
}
/// Parses an integer literal as [`i128`].
///
/// Chosen to cover FerUS's integer range so constant folding
/// remains precise.
///
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
/// not a valid integer.
fn parse_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
text.parse::<i128>()
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
/// Parses a float literal as [`f64`].
///
/// Chosen to cover FerUS's float range so constant folding remains
/// precise.
///
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
/// not a valid float.
fn parse_float_literal(&mut self, text: &str) -> ParseResult<f64> {
if let Ok(parsed_value) = text.parse::<f64>() {
Ok(parsed_value)
} else {
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
}
}
/// Returns the next postfix operator and its location if present.
///
/// Helper to avoid peeking and mapping twice; used to drive the postfix
@ -369,38 +340,3 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
None
}
}
/// Unescapes a tokenized string literal into an arena string.
///
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
///
/// Note: this function assumes `raw` is the token text without surrounding
/// quotes.
fn unescape_string_literal<'arena>(
arena: &'arena crate::arena::Arena,
raw: &str,
) -> crate::arena::ArenaString<'arena> {
let mut buffer = String::with_capacity(raw.len());
let mut characters = raw.chars();
while let Some(next_character) = characters.next() {
if next_character == '\\' {
// The lexer never produces a trailing backslash in a string token,
// so there's always a following character to inspect.
if let Some(escaped_character) = characters.next() {
match escaped_character {
'n' => buffer.push('\n'),
't' => buffer.push('\t'),
'"' => buffer.push('"'),
'\\' => buffer.push('\\'),
// Simply leaving escaped character as-is is an expected
// behavior by UnrealScript
other => buffer.push(other),
}
}
} else {
buffer.push(next_character);
}
}
arena.string(&buffer)
}

View File

@ -4,6 +4,9 @@
//! token is found. The sync target is chosen from [`SyncLevel`] based on
//! the error kind. Methods on [`ParseResult`] let callers widen the error span,
//! synchronize, report, and produce fallback values.
//!
//! General idea is that any method that returns something other than an error
//! can be assumed to have reported it.
use crate::lexer::{Token, TokenLocation};
use crate::parser::{ParseError, ParseResult, Parser};
@ -217,6 +220,18 @@ impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 {
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
i128::default()
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 {
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
f64::default()
}
}
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation {
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
error.source_span.to