//! Core of the expression parser for Fermented `UnrealScript`. //! //! This module implements a Pratt-style parser for the language's expression //! grammar, supporting: //! //! * Primary expressions (see [`crate::parser::primary`] for details on what //! we consider to be a primary expression); //! * Prefix operators; //! * Postfix operators; //! * Infix operators with hard-coded precedence and associativity. //! //! Parsing is driven by [`PrecedenceRank`], which controls how tightly //! operators bind. Infix parsing uses the pair of binding powers returned by //! [`super::precedence::infix_precedence_ranks`] to encode associativity. //! The parser infrastructure supports both left- and right-associative //! operators, but Fermented `UnrealScript` currently defines only //! left-associative ones. //! //! ## Postfix operator vs "selectors" //! //! Everywhere here we distinguish *selectors* like field accessor `.`, //! function call `()` or array indices `[]` from other *postfix operators* //! as they: //! //! 1. Have significantly different semantic meaning; //! 2. Are not considered operators from `UnrealScript`'s viewpoint //! (e.g. cannot be overloaded). //! //! ## See also //! //! - [`parser::Parser::parse_expression`] - main entry point //! - [`PrecedenceRank`] - operator binding strengths //! - [`super::precedence`] - operator precedence definitions use crate::ast::{self, Expression, ExpressionRef}; use crate::lexer::TokenPosition; use crate::parser::{ self, ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, diagnostic_labels, }; pub use super::precedence::PrecedenceRank; /// Returns whether postfix operators like `++` and `--` are disallowed /// after this expression. /// /// This restriction applies only to postfix operators. Selectors such as /// field access `.x`, indexing `[i]`, and calls `(args)` remain allowed. fn forbids_postfix_operators(expression: &ExpressionRef<'_, '_>) -> bool { matches!( **expression, Expression::If { .. } | Expression::While { .. } | Expression::DoUntil { .. } | Expression::For { .. } | Expression::ForEach { .. } | Expression::Switch { .. } | Expression::Block { .. } ) } impl<'src, 'arena> Parser<'src, 'arena> { // TODO: success here guaranees progress /// Parses an expression. /// /// Always returns some expression node; any syntax errors are reported /// through the parser's diagnostics. #[must_use] pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> { self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST) .sync_error_until(self, parser::SyncLevel::ExpressionStart) .unwrap_or_fallback(self) } /// Parses an expression in a grammar position where an expression is /// required. /// /// This is the checked variant of [`Parser::parse_expression`]. If the next /// token is known not to be a valid expression starter, this reports /// `bad_start_error_kind`, consumes the bad token, and starts panic-mode /// recovery until [`crate::parser::SyncLevel::ExpressionStart`]. /// /// `required_by_position` identifies the token or construct that created /// the requirement for an expression. It is attached to the diagnostic with /// the [`diagnostic_labels::EXPRESSION_REQUIRED_BY`] label. /// /// `expression_context_position` identifies the local syntactic anchor after /// which the expression was expected. It is attached to the diagnostic with /// the [`diagnostic_labels::EXPRESSION_EXPECTED_AFTER`] label. pub(super) fn parse_expression_with_start_error( &mut self, bad_start_error_kind: ParseErrorKind, required_by_position: crate::lexer::TokenPosition, expression_context_position: crate::lexer::TokenPosition, ) -> ParseExpressionResult<'src, 'arena> { if self.next_token_definitely_cannot_start_expression() { let error_position = self.peek_position_or_eof(); //self.advance(); return Err(self .make_error_at(bad_start_error_kind, error_position) .sync_error_until(self, crate::parser::SyncLevel::ExpressionStart) .blame_token(error_position) .related_token( diagnostic_labels::EXPRESSION_REQUIRED_BY, required_by_position, ) .related_token( diagnostic_labels::EXPRESSION_EXPECTED_AFTER, expression_context_position, )); } self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST) } pub(super) fn make_error_expression_at( &self, position: TokenPosition, ) -> ExpressionRef<'src, 'arena> { crate::arena::ArenaNode::new_in( Expression::Error, crate::lexer::TokenSpan::new(position), self.arena, ) } /// Parses an expression, including only operators with binding power /// at least `min_precedence_rank` (as tight or tighter). fn parse_expression_with_min_precedence_rank( &mut self, min_precedence_rank: PrecedenceRank, ) -> parser::ParseExpressionResult<'src, 'arena> { let mut left_hand_side = self.parse_prefix_or_primary()?; left_hand_side = self.parse_selectors_after(left_hand_side)?; // We disallow only postfix operators after expression forms that // represent control-flow or block constructs. Selectors are still // parsed normally. // This avoids ambiguities in cases like: // // ```unrealscript // if test() { do_it(); } // ++ counter; // ``` // // This wasn't a problem in UnrealScript, because such constructs were // never treated as expressions. And it shouldn't be an issue for us // because neither `--` or `++` (the only existing default postfix // operators) make any sense after such expressions anyway. if !forbids_postfix_operators(&left_hand_side) { left_hand_side = self.parse_postfix_after(left_hand_side); } self.parse_infix_after(left_hand_side, min_precedence_rank) } /// Parses a prefix or primary expression (Pratt parser's "nud" or /// null denotation). fn parse_prefix_or_primary(&mut self) -> parser::ParseExpressionResult<'src, 'arena> { let (token, token_lexeme, token_position) = self.require_token_lexeme_and_position(ParseErrorKind::ExpressionExpected)?; // Avoid advancing over an obviously wrong token; // this prevents error cases like `new(Outer, Name, 7 +) SomeClass`. if token.is_definitely_not_expression_start() { return Err( self.make_error_at(ParseErrorKind::ExpressionExpected, token_position) ); } self.advance(); if let Ok(operator) = ast::PrefixOperator::try_from(token) { // In UnrealScript, prefix and postfix operators bind tighter than // any infix operators, so we can safely parse the right hand side // at the tightest precedence. let right_hand_side = self .parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST) .related_token("prefix_operator", token_position)?; Ok(Expression::new_prefix( self.arena, token_position, operator, right_hand_side, )) } else { self.parse_primary_from_current_token(token, token_lexeme, token_position) } } /// Parses all postfix operators it can, creating a tree with /// `left_hand_side` as a child. fn parse_postfix_after( &mut self, mut left_hand_side: ExpressionRef<'src, 'arena>, ) -> ExpressionRef<'src, 'arena> { while let Some((operator, operator_position)) = self.peek_postfix_with_position() { self.advance(); left_hand_side = Expression::new_postfix(self.arena, left_hand_side, operator, operator_position); } left_hand_side } /// Parses infix operators binding at least as tight as /// `min_precedence_rank`. /// /// Associativity is encoded by /// [`super::precedence::infix_precedence_ranks`]. /// /// Stops when the next operator is looser than `min_precedence_rank`. fn parse_infix_after( &mut self, mut left_hand_side: ExpressionRef<'src, 'arena>, min_precedence_rank: PrecedenceRank, ) -> parser::ParseExpressionResult<'src, 'arena> { while let Some((operator, right_precedence_rank)) = self.peek_infix_with_min_precedence_rank(min_precedence_rank) { self.advance(); let infix_operator_position = self.last_consumed_position_or_start(); let right_hand_side = self .parse_expression_with_min_precedence_rank(right_precedence_rank) .related_token("infix_operator", infix_operator_position)?; left_hand_side = Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side); } Ok(left_hand_side) } /// Returns the next postfix operator and its position if present. /// /// Helper to avoid peeking and mapping twice; used to drive the postfix /// loop without unwraps. fn peek_postfix_with_position( &mut self, ) -> Option<(ast::PostfixOperator, crate::lexer::TokenPosition)> { let (token, token_position) = self.peek_token_and_position()?; let Ok(operator) = ast::PostfixOperator::try_from(token) else { return None; }; Some((operator, token_position)) } /// If the next token is an infix operator with left binding power at least /// `min_precedence_rank`, returns its operator and the minimum precedence /// rank to use when parsing the right-hand side (i.e. the operator's right /// binding power). /// /// Otherwise returns [`None`]. fn peek_infix_with_min_precedence_rank( &mut self, min_precedence_rank: PrecedenceRank, ) -> Option<(ast::InfixOperator, PrecedenceRank)> { let (left_precedence_rank, operator, right_precedence_rank) = self .peek_token() .and_then(super::precedence::infix_precedence_ranks)?; if left_precedence_rank.is_looser_than(min_precedence_rank) { return None; } Some((operator, right_precedence_rank)) } }