rott/rottlib/src/parser/grammar/expression/selectors.rs

321 lines
13 KiB
Rust

//! Parser support for expression selectors.
//!
//! Selectors are suffix forms that require an already parsed left-hand side,
//! such as member access, indexing, and calls.
use crate::ast::{self, ExpressionRef};
use crate::lexer::{self, Token, TokenPosition};
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
// Lack of `Copy` is deliberate to avoid accidental reuse of parser state.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct CallArgumentListParseState {
/// Number of argument slots already yielded, including omitted slots.
pub(super) parsed_argument_slot_count: usize,
/// Whether the last yielded argument expression lacked a following
/// separator (',' or ')' or end-of-file).
pub(super) last_slot_missing_separator: bool,
}
impl CallArgumentListParseState {
#[must_use]
pub(super) fn new() -> Self {
Self {
parsed_argument_slot_count: 0,
last_slot_missing_separator: false,
}
}
#[must_use]
fn has_parsed_any_argument_slots(&self) -> bool {
self.parsed_argument_slot_count > 0
}
}
/// Represents the result of parsing one call argument slot.
#[must_use]
#[derive(Debug, PartialEq)]
pub(super) enum ParsedArgumentSlot<'src, 'arena> {
/// No further slots should be parsed.
NoMoreArguments,
/// A parsed slot. `None` represents an omitted argument.
Argument(ast::OptionalExpression<'src, 'arena>),
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses zero or more postfix selectors after `left_hand_side`.
///
/// Returns the resulting expression after all contiguous selectors.
pub(super) fn parse_selectors_after(
&mut self,
mut left_hand_side: ExpressionRef<'src, 'arena>,
) -> ParseExpressionResult<'src, 'arena> {
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
left_hand_side = match next_token {
Token::Period => {
self.advance(); // '.'
self.parse_member_access_selector_after(left_hand_side, next_token_position)?
}
Token::LeftBracket => {
self.advance(); // '['
self.parse_index_selector_after(left_hand_side, next_token_position)?
}
Token::LeftParenthesis => {
self.advance(); // '('
self.parse_call_selector_after(left_hand_side, next_token_position)
}
_ => break,
};
self.ensure_forward_progress(next_token_position);
}
Ok(left_hand_side)
}
/// Parses a member access selector after `left_hand_side`.
///
/// Expects the leading `.` to have already been consumed.
fn parse_member_access_selector_after(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
period_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
let member_access_start = left_hand_side.span().start;
let member_name_position = self.peek_position_or_eof();
let member_name = self
.parse_identifier(ParseErrorKind::MemberAccessMissingMemberName)
.blame_token(member_name_position)
.related_token("period", period_position)?;
let member_access_end = member_name.0;
Ok(self.arena.alloc_node(
ast::Expression::Member {
target: left_hand_side,
name: member_name,
},
lexer::TokenSpan::range(member_access_start, member_access_end),
))
}
/// Parses an index selector after `left_hand_side`.
///
/// Expects the leading `[` to have already been consumed.
fn parse_index_selector_after(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_bracket_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
let index_expression = self
.parse_expression_with_start_error(
ParseErrorKind::IndexMissingExpression,
left_hand_side.span().end,
left_bracket_position,
)
.sync_error_at_matching_delimiter(self, left_bracket_position)?;
let right_bracket_position = self
.expect(
Token::RightBracket,
ParseErrorKind::IndexMissingClosingBracket,
)
.widen_error_span_from(left_bracket_position)
.sync_error_at_matching_delimiter(self, left_bracket_position)
.related_token("left_bracket", left_bracket_position)?;
let expression_start = left_hand_side.span().start;
Ok(self.arena.alloc_node_between(
ast::Expression::Index {
target: left_hand_side,
index: index_expression,
},
expression_start,
right_bracket_position,
))
}
/// Parses a call selector after `left_hand_side`.
///
/// Expects the leading `(` to have already been consumed.
/// Reports malformed argument lists internally and still returns
/// a call expression.
fn parse_call_selector_after(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let callee_end_position = left_hand_side.span().end;
let argument_list =
self.parse_call_argument_list(callee_end_position, left_parenthesis_position);
let right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::FunctionCallMissingClosingParenthesis,
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
.related_token("callee", callee_end_position)
.related_token("left_parenthesis", left_parenthesis_position)
.unwrap_or_fallback(self);
let expression_start = left_hand_side.span().start;
self.arena.alloc_node_between(
ast::Expression::Call {
callee: left_hand_side,
arguments: argument_list,
},
expression_start,
right_parenthesis_position,
)
}
/// Parses a call argument list after an already-consumed `(`.
///
/// Returns all parsed argument slots, preserving omitted arguments
/// as `None`. Does not consume the closing `)`.
fn parse_call_argument_list(
&mut self,
callee_end_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ast::ArgumentList<'src, 'arena> {
let mut argument_list = crate::arena::ArenaVec::new_in(self.arena);
let mut argument_list_state = CallArgumentListParseState::new();
let mut progress_checkpoint = None;
while let ParsedArgumentSlot::Argument(argument) =
self.parse_next_call_argument_slot(&mut argument_list_state)
{
if let Some(progress_checkpoint) = progress_checkpoint {
self.ensure_forward_progress(progress_checkpoint);
}
let parsed_argument_span = argument.as_ref().map(|argument| *argument.span());
argument_list.push(argument);
if argument_list_state.last_slot_missing_separator {
if !self.recover_after_missing_function_call_argument_separator(
callee_end_position,
left_parenthesis_position,
parsed_argument_span,
) {
break;
}
}
progress_checkpoint = self.peek_position();
}
argument_list
}
/// Parses the next logical call-argument slot.
///
/// In UnrealScript, commas introduce follow-up argument slots, so `f(x,)`
/// means `f(x, <omitted>)`, not a call with a tolerated trailing separator.
///
/// Returns [`ParsedArgumentSlot::NoMoreArguments`] when the argument list
/// has ended or no safe recovery can continue it.
/// Returns [`ParsedArgumentSlot::Argument`] for a parsed slot, including
/// omitted slots.
///
/// Repeated calls with the same `state` are guaranteed to eventually return
/// [`ParsedArgumentSlot::NoMoreArguments`], even for malformed input.
///
/// Records per-slot status in `state`.
pub(super) fn parse_next_call_argument_slot(
&mut self,
state: &mut CallArgumentListParseState,
) -> ParsedArgumentSlot<'src, 'arena> {
state.last_slot_missing_separator = false;
// A comma belongs to the next slot because a final comma represents an
// omitted final argument, not a tolerated trailing separator.
match self.peek_token() {
None | Some(Token::RightParenthesis) => {
return ParsedArgumentSlot::NoMoreArguments;
}
Some(Token::Comma) => {
// In `f(,x)`, the leading comma both creates the omitted first
// slot and separates it from `x`, so the first slot must not
// consume it.
if state.has_parsed_any_argument_slots() {
self.advance();
}
if self.is_at_call_argument_boundary() {
state.parsed_argument_slot_count += 1;
return ParsedArgumentSlot::Argument(None);
}
}
_ => (),
}
let position_before_argument = self.peek_position_or_eof();
let mut argument = self.parse_expression();
let expression_recovery_made_no_progress =
self.peek_position_or_eof() == position_before_argument;
if expression_recovery_made_no_progress {
self.recover_until(SyncLevel::ListSeparator);
let list_level_recovery_made_no_progress =
self.peek_position_or_eof() == position_before_argument;
if list_level_recovery_made_no_progress {
return ParsedArgumentSlot::NoMoreArguments;
} else {
argument
.span_mut()
.extend_to(self.last_consumed_position_or_start());
}
}
state.parsed_argument_slot_count += 1;
state.last_slot_missing_separator = !self.is_at_call_argument_boundary();
ParsedArgumentSlot::Argument(Some(argument))
}
/// Reports and recovers from a missing call-argument separator.
///
/// Returns whether argument-list parsing can continue at
/// the recovered position.
#[must_use]
fn recover_after_missing_function_call_argument_separator(
&mut self,
callee_end_position: TokenPosition,
left_parenthesis_position: TokenPosition,
previous_argument_span: Option<lexer::TokenSpan>,
) -> bool {
if self.next_token_definitely_cannot_start_expression() {
let unexpected_token_position = self.peek_position_or_eof();
let mut error = self
.make_error_at(
ParseErrorKind::FunctionCallUnexpectedTokenInArgumentList,
unexpected_token_position,
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_until(self, SyncLevel::ListSeparator)
.blame_token(unexpected_token_position)
.related_token("callee", callee_end_position)
.related_token("left_parenthesis", left_parenthesis_position);
if let Some(previous_argument_span) = previous_argument_span {
error = error.related("argument", previous_argument_span);
}
error.report(self);
self.is_at_call_argument_boundary()
} else {
let next_argument_position = self.peek_position_or_eof();
let mut error = self
.make_error_at(
ParseErrorKind::FunctionCallArgumentMissingComma,
next_argument_position,
)
.blame_token(next_argument_position)
.related_token("callee", callee_end_position)
.related_token("left_parenthesis", left_parenthesis_position);
debug_assert!(previous_argument_span.is_some());
if let Some(previous_argument_span) = previous_argument_span {
error = error.related("previous_argument", previous_argument_span);
}
error.report(self);
true
}
}
/// Returns whether the current token is a call-argument boundary.
#[must_use]
fn is_at_call_argument_boundary(&mut self) -> bool {
matches!(
self.peek_token(),
None | Some(Token::Comma | Token::RightParenthesis)
)
}
}