Add DeclarationLiteral parsing
Added method for parsing simple literals for use in top-level class declarations. Along with this change we've also moved out methods specific for parsing low-level literals into the same file as new method, since that structure made more sense.
This commit is contained in:
parent
47693fc5a5
commit
5bd9aadc55
@ -246,6 +246,17 @@ impl<'src, 'arena> Expression<'src, 'arena> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub enum DeclarationLiteral<'src, 'arena> {
|
||||||
|
None,
|
||||||
|
Bool(bool),
|
||||||
|
Integer(i128),
|
||||||
|
Float(f64),
|
||||||
|
String(ArenaString<'arena>),
|
||||||
|
Identifier(&'src str),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type DeclarationLiteralRef<'src, 'arena> = (DeclarationLiteral<'src, 'arena>, TokenLocation);
|
||||||
|
|
||||||
/// Returns `true` for expressions that require `;` when used as a statement
|
/// Returns `true` for expressions that require `;` when used as a statement
|
||||||
/// (i.e., everything except blocky control-flow forms).
|
/// (i.e., everything except blocky control-flow forms).
|
||||||
pub trait NeedsSemi {
|
pub trait NeedsSemi {
|
||||||
|
|||||||
@ -70,6 +70,11 @@ pub enum ParseErrorKind {
|
|||||||
DeclMissingIdentifier,
|
DeclMissingIdentifier,
|
||||||
/// Invalid variable name identifier in non-`local` variable definition.
|
/// Invalid variable name identifier in non-`local` variable definition.
|
||||||
DeclBadVariableIdentifier,
|
DeclBadVariableIdentifier,
|
||||||
|
/// Found an unexpected token while parsing a declaration literal.
|
||||||
|
///
|
||||||
|
/// Expected one of: integer, float, string, `true`, `false`, `none`
|
||||||
|
/// or an identifier.
|
||||||
|
DeclarationLiteralUnexpectedToken,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Enumerates all specific kinds of parsing errors that the parser can emit.
|
/// Enumerates all specific kinds of parsing errors that the parser can emit.
|
||||||
|
|||||||
119
rottlib/src/parser/grammar/literals.rs
Normal file
119
rottlib/src/parser/grammar/literals.rs
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
use crate::ast::DeclarationLiteral;
|
||||||
|
use crate::lexer::Token;
|
||||||
|
use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt};
|
||||||
|
|
||||||
|
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||||
|
/// Parses a simple literal value that can be used inside FerUS's top-level
|
||||||
|
/// class members' definitions.
|
||||||
|
///
|
||||||
|
/// On success consumes exactly one token. If the next token is not
|
||||||
|
/// a supported literal, returns [`None`] and leaves the stream untouched.
|
||||||
|
pub(crate) fn parse_declaration_literal(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<crate::ast::DeclarationLiteralRef> {
|
||||||
|
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
|
||||||
|
else {
|
||||||
|
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||||
|
};
|
||||||
|
let declaration_literal = match token {
|
||||||
|
Token::IntegerLiteral => {
|
||||||
|
let value = self
|
||||||
|
.decode_integer_literal(token_text)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Integer(value)
|
||||||
|
}
|
||||||
|
Token::FloatLiteral => {
|
||||||
|
let value = self
|
||||||
|
.decode_float_literal(token_text)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Float(value)
|
||||||
|
}
|
||||||
|
Token::StringLiteral => {
|
||||||
|
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::String(value)
|
||||||
|
}
|
||||||
|
Token::True => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Bool(true)
|
||||||
|
}
|
||||||
|
Token::False => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Bool(false)
|
||||||
|
}
|
||||||
|
Token::None => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::None
|
||||||
|
}
|
||||||
|
Token::Identifier => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Identifier(token_text)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok((declaration_literal, token_location))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an integer literal as [`i128`].
|
||||||
|
///
|
||||||
|
/// Expects a normalized decimal string with optional leading sign.
|
||||||
|
/// Does not accept base prefixes or digit separators.
|
||||||
|
///
|
||||||
|
/// [`i128`] type was chosen to cover FerUS's integer range so constant
|
||||||
|
/// folding remains precise.
|
||||||
|
pub(crate) fn decode_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
|
||||||
|
text.parse::<i128>()
|
||||||
|
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a float literal as [`f64`].
|
||||||
|
///
|
||||||
|
/// Expects a normalized decimal float (optional sign, decimal point,
|
||||||
|
/// optional exponent). Special values like `inf`/`NaN` are rejected.
|
||||||
|
pub(crate) fn decode_float_literal(&mut self, text: &str) -> ParseResult<f64> {
|
||||||
|
if let Ok(parsed_value) = text.parse::<f64>() {
|
||||||
|
Ok(parsed_value)
|
||||||
|
} else {
|
||||||
|
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unescapes a tokenized string literal into an arena string.
|
||||||
|
///
|
||||||
|
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
|
||||||
|
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
|
||||||
|
///
|
||||||
|
/// Note: this function assumes `raw` is the token text without surrounding
|
||||||
|
/// quotes.
|
||||||
|
pub(crate) fn unescape_string_literal(
|
||||||
|
arena: &'arena crate::arena::Arena,
|
||||||
|
raw: &str,
|
||||||
|
) -> crate::arena::ArenaString<'arena> {
|
||||||
|
let mut buffer = String::with_capacity(raw.len());
|
||||||
|
let mut characters = raw.chars();
|
||||||
|
while let Some(next_character) = characters.next() {
|
||||||
|
if next_character == '\\' {
|
||||||
|
// The lexer never produces a trailing backslash in a string
|
||||||
|
// token, so there's always a following character to inspect.
|
||||||
|
if let Some(escaped_character) = characters.next() {
|
||||||
|
match escaped_character {
|
||||||
|
'n' => buffer.push('\n'),
|
||||||
|
't' => buffer.push('\t'),
|
||||||
|
'"' => buffer.push('"'),
|
||||||
|
'\\' => buffer.push('\\'),
|
||||||
|
// Simply leaving the escaped character as-is matches
|
||||||
|
// UnrealScript behavior.
|
||||||
|
other => buffer.push(other),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buffer.push(next_character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
arena.string(&buffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,6 +1,7 @@
|
|||||||
mod block;
|
mod block;
|
||||||
mod control;
|
mod control;
|
||||||
mod flow;
|
mod flow;
|
||||||
|
mod literals;
|
||||||
mod pratt;
|
mod pratt;
|
||||||
mod precedence;
|
mod precedence;
|
||||||
mod statements;
|
mod statements;
|
||||||
|
|||||||
@ -23,9 +23,7 @@
|
|||||||
|
|
||||||
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
|
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
|
||||||
use crate::lexer::{Token, TokenLocation};
|
use crate::lexer::{Token, TokenLocation};
|
||||||
use crate::parser::{
|
use crate::parser::{ParseErrorKind, ParseExpressionResult, ResultRecoveryExt, SyncLevel};
|
||||||
ParseErrorKind, ParseExpressionResult, ParseResult, ResultRecoveryExt, SyncLevel,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub(crate) use super::precedence::PrecedenceRank;
|
pub(crate) use super::precedence::PrecedenceRank;
|
||||||
|
|
||||||
@ -96,21 +94,21 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|||||||
};
|
};
|
||||||
match token {
|
match token {
|
||||||
Token::IntegerLiteral => {
|
Token::IntegerLiteral => {
|
||||||
let value = self.parse_integer_literal(token_text)?;
|
let value = self.decode_integer_literal(token_text)?;
|
||||||
self.advance();
|
self.advance();
|
||||||
Ok(self
|
Ok(self
|
||||||
.arena
|
.arena
|
||||||
.alloc_at(Expression::Integer(value), token_location))
|
.alloc_at(Expression::Integer(value), token_location))
|
||||||
}
|
}
|
||||||
Token::FloatLiteral => {
|
Token::FloatLiteral => {
|
||||||
let value = self.parse_float_literal(token_text)?;
|
let value = self.decode_float_literal(token_text)?;
|
||||||
self.advance();
|
self.advance();
|
||||||
Ok(self
|
Ok(self
|
||||||
.arena
|
.arena
|
||||||
.alloc_at(Expression::Float(value), token_location))
|
.alloc_at(Expression::Float(value), token_location))
|
||||||
}
|
}
|
||||||
Token::StringLiteral => {
|
Token::StringLiteral => {
|
||||||
let value = unescape_string_literal(self.arena, token_text);
|
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
|
||||||
self.advance();
|
self.advance();
|
||||||
Ok(self
|
Ok(self
|
||||||
.arena
|
.arena
|
||||||
@ -260,33 +258,6 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|||||||
left_hand_side
|
left_hand_side
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses an integer literal as [`i128`].
|
|
||||||
///
|
|
||||||
/// Chosen to cover FerUS's integer range so constant folding
|
|
||||||
/// remains precise.
|
|
||||||
///
|
|
||||||
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
|
|
||||||
/// not a valid integer.
|
|
||||||
fn parse_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
|
|
||||||
text.parse::<i128>()
|
|
||||||
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a float literal as [`f64`].
|
|
||||||
///
|
|
||||||
/// Chosen to cover FerUS's float range so constant folding remains
|
|
||||||
/// precise.
|
|
||||||
///
|
|
||||||
/// Errors with [`ParseErrorKind::InvalidNumericLiteral`] if `text` is
|
|
||||||
/// not a valid float.
|
|
||||||
fn parse_float_literal(&mut self, text: &str) -> ParseResult<f64> {
|
|
||||||
if let Ok(parsed_value) = text.parse::<f64>() {
|
|
||||||
Ok(parsed_value)
|
|
||||||
} else {
|
|
||||||
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the next postfix operator and its location if present.
|
/// Returns the next postfix operator and its location if present.
|
||||||
///
|
///
|
||||||
/// Helper to avoid peeking and mapping twice; used to drive the postfix
|
/// Helper to avoid peeking and mapping twice; used to drive the postfix
|
||||||
@ -369,38 +340,3 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unescapes a tokenized string literal into an arena string.
|
|
||||||
///
|
|
||||||
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
|
|
||||||
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
|
|
||||||
///
|
|
||||||
/// Note: this function assumes `raw` is the token text without surrounding
|
|
||||||
/// quotes.
|
|
||||||
fn unescape_string_literal<'arena>(
|
|
||||||
arena: &'arena crate::arena::Arena,
|
|
||||||
raw: &str,
|
|
||||||
) -> crate::arena::ArenaString<'arena> {
|
|
||||||
let mut buffer = String::with_capacity(raw.len());
|
|
||||||
let mut characters = raw.chars();
|
|
||||||
while let Some(next_character) = characters.next() {
|
|
||||||
if next_character == '\\' {
|
|
||||||
// The lexer never produces a trailing backslash in a string token,
|
|
||||||
// so there's always a following character to inspect.
|
|
||||||
if let Some(escaped_character) = characters.next() {
|
|
||||||
match escaped_character {
|
|
||||||
'n' => buffer.push('\n'),
|
|
||||||
't' => buffer.push('\t'),
|
|
||||||
'"' => buffer.push('"'),
|
|
||||||
'\\' => buffer.push('\\'),
|
|
||||||
// Simply leaving escaped character as-is is an expected
|
|
||||||
// behavior by UnrealScript
|
|
||||||
other => buffer.push(other),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
buffer.push(next_character);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
arena.string(&buffer)
|
|
||||||
}
|
|
||||||
|
|||||||
@ -4,6 +4,9 @@
|
|||||||
//! token is found. The sync target is chosen from [`SyncLevel`] based on
|
//! token is found. The sync target is chosen from [`SyncLevel`] based on
|
||||||
//! the error kind. Methods on [`ParseResult`] let callers widen the error span,
|
//! the error kind. Methods on [`ParseResult`] let callers widen the error span,
|
||||||
//! synchronize, report, and produce fallback values.
|
//! synchronize, report, and produce fallback values.
|
||||||
|
//!
|
||||||
|
//! General idea is that any method that returns something other than an error
|
||||||
|
//! can be assumed to have reported it.
|
||||||
|
|
||||||
use crate::lexer::{Token, TokenLocation};
|
use crate::lexer::{Token, TokenLocation};
|
||||||
use crate::parser::{ParseError, ParseResult, Parser};
|
use crate::parser::{ParseError, ParseResult, Parser};
|
||||||
@ -217,6 +220,18 @@ impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 {
|
||||||
|
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
||||||
|
i128::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 {
|
||||||
|
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
||||||
|
f64::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation {
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation {
|
||||||
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||||
error.source_span.to
|
error.source_span.to
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user