Add DeclarationLiteral parsing
Added method for parsing simple literals for use in top-level class declarations. Along with this change we've also moved out methods specific for parsing low-level literals into the same file as new method, since that structure made more sense.
This commit is contained in:
parent
47693fc5a5
commit
5bd9aadc55
@ -246,6 +246,17 @@ impl<'src, 'arena> Expression<'src, 'arena> {
|
||||
}
|
||||
}
|
||||
|
||||
pub enum DeclarationLiteral<'src, 'arena> {
|
||||
None,
|
||||
Bool(bool),
|
||||
Integer(i128),
|
||||
Float(f64),
|
||||
String(ArenaString<'arena>),
|
||||
Identifier(&'src str),
|
||||
}
|
||||
|
||||
pub type DeclarationLiteralRef<'src, 'arena> = (DeclarationLiteral<'src, 'arena>, TokenLocation);
|
||||
|
||||
/// Returns `true` for expressions that require `;` when used as a statement
|
||||
/// (i.e., everything except blocky control-flow forms).
|
||||
pub trait NeedsSemi {
|
||||
|
||||
@ -70,6 +70,11 @@ pub enum ParseErrorKind {
|
||||
DeclMissingIdentifier,
|
||||
/// Invalid variable name identifier in non-`local` variable definition.
|
||||
DeclBadVariableIdentifier,
|
||||
/// Found an unexpected token while parsing a declaration literal.
|
||||
///
|
||||
/// Expected one of: integer, float, string, `true`, `false`, `none`
|
||||
/// or an identifier.
|
||||
DeclarationLiteralUnexpectedToken,
|
||||
}
|
||||
|
||||
/// Enumerates all specific kinds of parsing errors that the parser can emit.
|
||||
|
||||
119
rottlib/src/parser/grammar/literals.rs
Normal file
119
rottlib/src/parser/grammar/literals.rs
Normal file
@ -0,0 +1,119 @@
|
||||
use crate::ast::DeclarationLiteral;
|
||||
use crate::lexer::Token;
|
||||
use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt};
|
||||
|
||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
/// Parses a simple literal value that can be used inside FerUS's top-level
|
||||
/// class members' definitions.
|
||||
///
|
||||
/// On success consumes exactly one token. If the next token is not
|
||||
/// a supported literal, returns [`None`] and leaves the stream untouched.
|
||||
pub(crate) fn parse_declaration_literal(
|
||||
&mut self,
|
||||
) -> ParseResult<crate::ast::DeclarationLiteralRef> {
|
||||
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
|
||||
else {
|
||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||
};
|
||||
let declaration_literal = match token {
|
||||
Token::IntegerLiteral => {
|
||||
let value = self
|
||||
.decode_integer_literal(token_text)
|
||||
.unwrap_or_fallback(self);
|
||||
self.advance();
|
||||
DeclarationLiteral::Integer(value)
|
||||
}
|
||||
Token::FloatLiteral => {
|
||||
let value = self
|
||||
.decode_float_literal(token_text)
|
||||
.unwrap_or_fallback(self);
|
||||
self.advance();
|
||||
DeclarationLiteral::Float(value)
|
||||
}
|
||||
Token::StringLiteral => {
|
||||
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
|
||||
self.advance();
|
||||
DeclarationLiteral::String(value)
|
||||
}
|
||||
Token::True => {
|
||||
self.advance();
|
||||
DeclarationLiteral::Bool(true)
|
||||
}
|
||||
Token::False => {
|
||||
self.advance();
|
||||
DeclarationLiteral::Bool(false)
|
||||
}
|
||||
Token::None => {
|
||||
self.advance();
|
||||
DeclarationLiteral::None
|
||||
}
|
||||
Token::Identifier => {
|
||||
self.advance();
|
||||
DeclarationLiteral::Identifier(token_text)
|
||||
}
|
||||
_ => {
|
||||
return Err(self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken));
|
||||
}
|
||||
};
|
||||
Ok((declaration_literal, token_location))
|
||||
}
|
||||
|
||||
/// Parses an integer literal as [`i128`].
|
||||
///
|
||||
/// Expects a normalized decimal string with optional leading sign.
|
||||
/// Does not accept base prefixes or digit separators.
|
||||
///
|
||||
/// [`i128`] type was chosen to cover FerUS's integer range so constant
|
||||
/// folding remains precise.
|
||||
pub(crate) fn decode_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
|
||||
text.parse::<i128>()
|
||||
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||
}
|
||||
|
||||
/// Parses a float literal as [`f64`].
|
||||
///
|
||||
/// Expects a normalized decimal float (optional sign, decimal point,
|
||||
/// optional exponent). Special values like `inf`/`NaN` are rejected.
|
||||
pub(crate) fn decode_float_literal(&mut self, text: &str) -> ParseResult<f64> {
|
||||
if let Ok(parsed_value) = text.parse::<f64>() {
|
||||
Ok(parsed_value)
|
||||
} else {
|
||||
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||
}
|
||||
}
|
||||
|
||||
/// Unescapes a tokenized string literal into an arena string.
|
||||
///
|
||||
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
|
||||
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
|
||||
///
|
||||
/// Note: this function assumes `raw` is the token text without surrounding
|
||||
/// quotes.
|
||||
pub(crate) fn unescape_string_literal(
|
||||
arena: &'arena crate::arena::Arena,
|
||||
raw: &str,
|
||||
) -> crate::arena::ArenaString<'arena> {
|
||||
let mut buffer = String::with_capacity(raw.len());
|
||||
let mut characters = raw.chars();
|
||||
while let Some(next_character) = characters.next() {
|
||||
if next_character == '\\' {
|
||||
// The lexer never produces a trailing backslash in a string
|
||||
// token, so there's always a following character to inspect.
|
||||
if let Some(escaped_character) = characters.next() {
|
||||
match escaped_character {
|
||||
'n' => buffer.push('\n'),
|
||||
't' => buffer.push('\t'),
|
||||
'"' => buffer.push('"'),
|
||||
'\\' => buffer.push('\\'),
|
||||
// Simply leaving the escaped character as-is matches
|
||||
// UnrealScript behavior.
|
||||
other => buffer.push(other),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer.push(next_character);
|
||||
}
|
||||
}
|
||||
arena.string(&buffer)
|
||||
}
|
||||
}
|
||||
@ -1,6 +1,7 @@
|
||||
mod block;
|
||||
mod control;
|
||||
mod flow;
|
||||
mod literals;
|
||||
mod pratt;
|
||||
mod precedence;
|
||||
mod statements;
|
||||
|
||||
@ -23,9 +23,7 @@
|
||||
|
||||
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::{
|
||||
ParseErrorKind, ParseExpressionResult, ParseResult, ResultRecoveryExt, SyncLevel,
|
||||
};
|
||||
use crate::parser::{ParseErrorKind, ParseExpressionResult, ResultRecoveryExt, SyncLevel};
|
||||
|
||||
pub(crate) use super::precedence::PrecedenceRank;
|
||||
|
||||
@ -96,21 +94,21 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
};
|
||||
match token {
|
||||
Token::IntegerLiteral => {
|
||||
let value = self.parse_integer_literal(token_text)?;
|
||||
let value = self.decode_integer_literal(token_text)?;
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc_at(Expression::Integer(value), token_location))
|
||||
}
|
||||
Token::FloatLiteral => {
|
||||
let value = self.parse_float_literal(token_text)?;
|
||||
let value = self.decode_float_literal(token_text)?;
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
.alloc_at(Expression::Float(value), token_location))
|
||||
}
|
||||
Token::StringLiteral => {
|
||||
let value = unescape_string_literal(self.arena, token_text);
|
||||
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
|
||||
self.advance();
|
||||
Ok(self
|
||||
.arena
|
||||
@ -260,33 +258,6 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
left_hand_side
|
||||
}
|
||||
|
||||
/// Parses an integer literal as [`i128`].
|
||||
///
|
||||
/// Chosen to cover FerUS's integer range so constant folding
|
||||
/// remains precise.
|
||||
///
|
||||
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
|
||||
/// not a valid integer.
|
||||
fn parse_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
|
||||
text.parse::<i128>()
|
||||
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||
}
|
||||
|
||||
/// Parses a float literal as [`f64`].
|
||||
///
|
||||
/// Chosen to cover FerUS's float range so constant folding remains
|
||||
/// precise.
|
||||
///
|
||||
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
|
||||
/// not a valid float.
|
||||
fn parse_float_literal(&mut self, text: &str) -> ParseResult<f64> {
|
||||
if let Ok(parsed_value) = text.parse::<f64>() {
|
||||
Ok(parsed_value)
|
||||
} else {
|
||||
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next postfix operator and its location if present.
|
||||
///
|
||||
/// Helper to avoid peeking and mapping twice; used to drive the postfix
|
||||
@ -369,38 +340,3 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Unescapes a tokenized string literal into an arena string.
|
||||
///
|
||||
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
|
||||
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
|
||||
///
|
||||
/// Note: this function assumes `raw` is the token text without surrounding
|
||||
/// quotes.
|
||||
fn unescape_string_literal<'arena>(
|
||||
arena: &'arena crate::arena::Arena,
|
||||
raw: &str,
|
||||
) -> crate::arena::ArenaString<'arena> {
|
||||
let mut buffer = String::with_capacity(raw.len());
|
||||
let mut characters = raw.chars();
|
||||
while let Some(next_character) = characters.next() {
|
||||
if next_character == '\\' {
|
||||
// The lexer never produces a trailing backslash in a string token,
|
||||
// so there's always a following character to inspect.
|
||||
if let Some(escaped_character) = characters.next() {
|
||||
match escaped_character {
|
||||
'n' => buffer.push('\n'),
|
||||
't' => buffer.push('\t'),
|
||||
'"' => buffer.push('"'),
|
||||
'\\' => buffer.push('\\'),
|
||||
// Simply leaving escaped character as-is is an expected
|
||||
// behavior by UnrealScript
|
||||
other => buffer.push(other),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer.push(next_character);
|
||||
}
|
||||
}
|
||||
arena.string(&buffer)
|
||||
}
|
||||
|
||||
@ -4,6 +4,9 @@
|
||||
//! token is found. The sync target is chosen from [`SyncLevel`] based on
|
||||
//! the error kind. Methods on [`ParseResult`] let callers widen the error span,
|
||||
//! synchronize, report, and produce fallback values.
|
||||
//!
|
||||
//! General idea is that any method that returns something other than an error
|
||||
//! can be assumed to have reported it.
|
||||
|
||||
use crate::lexer::{Token, TokenLocation};
|
||||
use crate::parser::{ParseError, ParseResult, Parser};
|
||||
@ -217,6 +220,18 @@ impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 {
|
||||
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
||||
i128::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 {
|
||||
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
||||
f64::default()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation {
|
||||
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||
error.source_span.to
|
||||
|
||||
Loading…
Reference in New Issue
Block a user