258 lines
10 KiB
Rust
258 lines
10 KiB
Rust
//! Core of the expression parser for Fermented `UnrealScript`.
|
|
//!
|
|
//! This module implements a Pratt-style parser for the language's expression
|
|
//! grammar, supporting:
|
|
//!
|
|
//! * Primary expressions (see [`crate::parser::primary`] for details on what
|
|
//! we consider to be a primary expression);
|
|
//! * Prefix operators;
|
|
//! * Postfix operators;
|
|
//! * Infix operators with hard-coded precedence and associativity.
|
|
//!
|
|
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
|
|
//! operators bind. Infix parsing uses the pair of binding powers returned by
|
|
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
|
|
//! The parser infrastructure supports both left- and right-associative
|
|
//! operators, but Fermented `UnrealScript` currently defines only
|
|
//! left-associative ones.
|
|
//!
|
|
//! ## Postfix operator vs "selectors"
|
|
//!
|
|
//! Everywhere here we distinguish *selectors* like field accessor `.`,
|
|
//! function call `()` or array indices `[]` from other *postfix operators*
|
|
//! as they:
|
|
//!
|
|
//! 1. Have significantly different semantic meaning;
|
|
//! 2. Are not considered operators from `UnrealScript`'s viewpoint
|
|
//! (e.g. cannot be overloaded).
|
|
//!
|
|
//! ## See also
|
|
//!
|
|
//! - [`parser::Parser::parse_expression`] - main entry point
|
|
//! - [`PrecedenceRank`] - operator binding strengths
|
|
//! - [`super::precedence`] - operator precedence definitions
|
|
|
|
use crate::ast::{self, Expression, ExpressionRef};
|
|
use crate::lexer::TokenPosition;
|
|
use crate::parser::{
|
|
self, ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, diagnostic_labels,
|
|
};
|
|
|
|
pub use super::precedence::PrecedenceRank;
|
|
|
|
/// Returns whether postfix operators like `++` and `--` are disallowed
|
|
/// after this expression.
|
|
///
|
|
/// This restriction applies only to postfix operators. Selectors such as
|
|
/// field access `.x`, indexing `[i]`, and calls `(args)` remain allowed.
|
|
fn forbids_postfix_operators(expression: &ExpressionRef<'_, '_>) -> bool {
|
|
matches!(
|
|
**expression,
|
|
Expression::If { .. }
|
|
| Expression::While { .. }
|
|
| Expression::DoUntil { .. }
|
|
| Expression::For { .. }
|
|
| Expression::ForEach { .. }
|
|
| Expression::Switch { .. }
|
|
| Expression::Block { .. }
|
|
)
|
|
}
|
|
|
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
|
// TODO: success here guaranees progress
|
|
/// Parses an expression.
|
|
///
|
|
/// Always returns some expression node; any syntax errors are reported
|
|
/// through the parser's diagnostics.
|
|
#[must_use]
|
|
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
|
|
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
|
|
.sync_error_until(self, parser::SyncLevel::ExpressionStart)
|
|
.unwrap_or_fallback(self)
|
|
}
|
|
|
|
/// Parses an expression in a grammar position where an expression is
|
|
/// required.
|
|
///
|
|
/// This is the checked variant of [`Parser::parse_expression`]. If the next
|
|
/// token is known not to be a valid expression starter, this reports
|
|
/// `bad_start_error_kind`, consumes the bad token, and starts panic-mode
|
|
/// recovery until [`crate::parser::SyncLevel::ExpressionStart`].
|
|
///
|
|
/// `required_by_position` identifies the token or construct that created
|
|
/// the requirement for an expression. It is attached to the diagnostic with
|
|
/// the [`diagnostic_labels::EXPRESSION_REQUIRED_BY`] label.
|
|
///
|
|
/// `expression_context_position` identifies the local syntactic anchor after
|
|
/// which the expression was expected. It is attached to the diagnostic with
|
|
/// the [`diagnostic_labels::EXPRESSION_EXPECTED_AFTER`] label.
|
|
pub(super) fn parse_expression_with_start_error(
|
|
&mut self,
|
|
bad_start_error_kind: ParseErrorKind,
|
|
required_by_position: crate::lexer::TokenPosition,
|
|
expression_context_position: crate::lexer::TokenPosition,
|
|
) -> ParseExpressionResult<'src, 'arena> {
|
|
if self.next_token_definitely_cannot_start_expression() {
|
|
let error_position = self.peek_position_or_eof();
|
|
//self.advance();
|
|
|
|
return Err(self
|
|
.make_error_at(bad_start_error_kind, error_position)
|
|
.sync_error_until(self, crate::parser::SyncLevel::ExpressionStart)
|
|
.blame_token(error_position)
|
|
.related_token(
|
|
diagnostic_labels::EXPRESSION_REQUIRED_BY,
|
|
required_by_position,
|
|
)
|
|
.related_token(
|
|
diagnostic_labels::EXPRESSION_EXPECTED_AFTER,
|
|
expression_context_position,
|
|
));
|
|
}
|
|
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
|
|
}
|
|
|
|
pub(super) fn make_error_expression_at(
|
|
&self,
|
|
position: TokenPosition,
|
|
) -> ExpressionRef<'src, 'arena> {
|
|
crate::arena::ArenaNode::new_in(
|
|
Expression::Error,
|
|
crate::lexer::TokenSpan::new(position),
|
|
self.arena,
|
|
)
|
|
}
|
|
|
|
/// Parses an expression, including only operators with binding power
|
|
/// at least `min_precedence_rank` (as tight or tighter).
|
|
fn parse_expression_with_min_precedence_rank(
|
|
&mut self,
|
|
min_precedence_rank: PrecedenceRank,
|
|
) -> parser::ParseExpressionResult<'src, 'arena> {
|
|
let mut left_hand_side = self.parse_prefix_or_primary()?;
|
|
left_hand_side = self.parse_selectors_after(left_hand_side)?;
|
|
// We disallow only postfix operators after expression forms that
|
|
// represent control-flow or block constructs. Selectors are still
|
|
// parsed normally.
|
|
// This avoids ambiguities in cases like:
|
|
//
|
|
// ```unrealscript
|
|
// if test() { do_it(); }
|
|
// ++ counter;
|
|
// ```
|
|
//
|
|
// This wasn't a problem in UnrealScript, because such constructs were
|
|
// never treated as expressions. And it shouldn't be an issue for us
|
|
// because neither `--` or `++` (the only existing default postfix
|
|
// operators) make any sense after such expressions anyway.
|
|
if !forbids_postfix_operators(&left_hand_side) {
|
|
left_hand_side = self.parse_postfix_after(left_hand_side);
|
|
}
|
|
self.parse_infix_after(left_hand_side, min_precedence_rank)
|
|
}
|
|
|
|
/// Parses a prefix or primary expression (Pratt parser's "nud" or
|
|
/// null denotation).
|
|
fn parse_prefix_or_primary(&mut self) -> parser::ParseExpressionResult<'src, 'arena> {
|
|
let (token, token_lexeme, token_position) =
|
|
self.require_token_lexeme_and_position(ParseErrorKind::ExpressionExpected)?;
|
|
// Avoid advancing over an obviously wrong token;
|
|
// this prevents error cases like `new(Outer, Name, 7 +) SomeClass`.
|
|
if token.is_definitely_not_expression_start() {
|
|
return Err(
|
|
self.make_error_at(ParseErrorKind::ExpressionExpected, token_position)
|
|
);
|
|
}
|
|
self.advance();
|
|
if let Ok(operator) = ast::PrefixOperator::try_from(token) {
|
|
// In UnrealScript, prefix and postfix operators bind tighter than
|
|
// any infix operators, so we can safely parse the right hand side
|
|
// at the tightest precedence.
|
|
let right_hand_side = self
|
|
.parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST)
|
|
.related_token("prefix_operator", token_position)?;
|
|
Ok(Expression::new_prefix(
|
|
self.arena,
|
|
token_position,
|
|
operator,
|
|
right_hand_side,
|
|
))
|
|
} else {
|
|
self.parse_primary_from_current_token(token, token_lexeme, token_position)
|
|
}
|
|
}
|
|
|
|
/// Parses all postfix operators it can, creating a tree with
|
|
/// `left_hand_side` as a child.
|
|
fn parse_postfix_after(
|
|
&mut self,
|
|
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
|
) -> ExpressionRef<'src, 'arena> {
|
|
while let Some((operator, operator_position)) = self.peek_postfix_with_position() {
|
|
self.advance();
|
|
left_hand_side =
|
|
Expression::new_postfix(self.arena, left_hand_side, operator, operator_position);
|
|
}
|
|
left_hand_side
|
|
}
|
|
|
|
/// Parses infix operators binding at least as tight as
|
|
/// `min_precedence_rank`.
|
|
///
|
|
/// Associativity is encoded by
|
|
/// [`super::precedence::infix_precedence_ranks`].
|
|
///
|
|
/// Stops when the next operator is looser than `min_precedence_rank`.
|
|
fn parse_infix_after(
|
|
&mut self,
|
|
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
|
min_precedence_rank: PrecedenceRank,
|
|
) -> parser::ParseExpressionResult<'src, 'arena> {
|
|
while let Some((operator, right_precedence_rank)) =
|
|
self.peek_infix_with_min_precedence_rank(min_precedence_rank)
|
|
{
|
|
self.advance();
|
|
let infix_operator_position = self.last_consumed_position_or_start();
|
|
let right_hand_side = self
|
|
.parse_expression_with_min_precedence_rank(right_precedence_rank)
|
|
.related_token("infix_operator", infix_operator_position)?;
|
|
left_hand_side =
|
|
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
|
|
}
|
|
Ok(left_hand_side)
|
|
}
|
|
|
|
/// Returns the next postfix operator and its position if present.
|
|
///
|
|
/// Helper to avoid peeking and mapping twice; used to drive the postfix
|
|
/// loop without unwraps.
|
|
fn peek_postfix_with_position(
|
|
&mut self,
|
|
) -> Option<(ast::PostfixOperator, crate::lexer::TokenPosition)> {
|
|
let (token, token_position) = self.peek_token_and_position()?;
|
|
let Ok(operator) = ast::PostfixOperator::try_from(token) else {
|
|
return None;
|
|
};
|
|
Some((operator, token_position))
|
|
}
|
|
|
|
/// If the next token is an infix operator with left binding power at least
|
|
/// `min_precedence_rank`, returns its operator and the minimum precedence
|
|
/// rank to use when parsing the right-hand side (i.e. the operator's right
|
|
/// binding power).
|
|
///
|
|
/// Otherwise returns [`None`].
|
|
fn peek_infix_with_min_precedence_rank(
|
|
&mut self,
|
|
min_precedence_rank: PrecedenceRank,
|
|
) -> Option<(ast::InfixOperator, PrecedenceRank)> {
|
|
let (left_precedence_rank, operator, right_precedence_rank) = self
|
|
.peek_token()
|
|
.and_then(super::precedence::infix_precedence_ranks)?;
|
|
if left_precedence_rank.is_looser_than(min_precedence_rank) {
|
|
return None;
|
|
}
|
|
Some((operator, right_precedence_rank))
|
|
}
|
|
}
|