rott/rottlib/src/parser/grammar/expression/selectors.rs

259 lines
9.8 KiB
Rust

//! Parser for expression selectors in Fermented `UnrealScript`.
//!
//! Selectors are suffix forms that extend an already parsed expression,
//! such as member access, indexing, and calls.
//!
//! Unlike primaries, selectors cannot be parsed on their own from the
//! current token. They always require a left-hand side expression.
use crate::arena::ArenaVec;
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{Token, TokenPosition, TokenSpan};
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
// TODO: think about importing/moving out these fucking structs a level up.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct CallArgumentListParseState {
/// Number of argument slots already returned as `Argument(...)`.
///
/// This counts omitted slots too, for example in `f(,x)` or `f(x,,z)`.
pub parsed_slot_count: usize,
/// Whether the most recently returned argument slot was not followed by
/// a valid argument boundary such as `,` or `)`.
///
/// This flag is reset at the start of each call, so after
/// `NoMoreArguments` it is always `false`.
pub last_slot_missing_boundary: bool,
}
impl CallArgumentListParseState {
#[must_use]
pub(crate) fn new() -> Self {
Self {
parsed_slot_count: 0,
last_slot_missing_boundary: false,
}
}
#[must_use]
pub(crate) fn is_first_slot(&self) -> bool {
self.parsed_slot_count == 0
}
}
/// Represents the result of parsing one call argument slot.
///
/// This distinguishes between the end of the argument list and a parsed
/// argument slot, including an omitted one.
#[must_use]
#[derive(Debug, PartialEq)]
pub enum ParsedCallArgumentSlot<'src, 'arena> {
/// Indicates that the argument list has ended.
NoMoreArguments,
/// The parsed argument for this slot.
///
/// `None` represents an omitted argument between commas.
Argument(OptionalExpression<'src, 'arena>),
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Parses zero or more postfix selectors after `left_hand_side`.
///
/// Returns the resulting expression after all contiguous selectors.
pub(crate) fn parse_selectors_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
) -> ParseExpressionResult<'src, 'arena> {
let mut left_hand_side = left_hand_side;
// `next_position` is used only to widen diagnostic spans.
while let Some((next_token, next_position)) = self.peek_token_and_position() {
left_hand_side = match next_token {
Token::Period => self.parse_selector_member_access_into(left_hand_side)?,
Token::LeftBracket => {
self.parse_selector_index_into(left_hand_side, next_position)?
}
Token::LeftParenthesis => {
self.parse_selector_call_into(left_hand_side, next_position)
}
_ => break,
};
}
Ok(left_hand_side)
}
/// Parses a member access selector after `left_hand_side`.
///
/// Expects the leading `.` to be the next token and returns the resulting
/// member access expression.
fn parse_selector_member_access_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
) -> ParseExpressionResult<'src, 'arena> {
self.advance(); // `.`
let member_access_start = left_hand_side.span().start;
let member_identifier = self.parse_identifier(ParseErrorKind::ExpressionUnexpectedToken)?;
let member_access_end = member_identifier.0;
Ok(self.arena.alloc_node(
Expression::Member {
target: left_hand_side,
name: member_identifier,
},
TokenSpan::range(member_access_start, member_access_end),
))
}
/// Parses an index selector after `left_hand_side`.
///
/// Expects the leading `[` to be the next token and returns the resulting
/// indexing expression.
fn parse_selector_index_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_bracket_position: TokenPosition,
) -> ParseExpressionResult<'src, 'arena> {
self.advance(); // '['
let index_expression = self.parse_expression();
let right_bracket_position = self
.expect(
Token::RightBracket,
ParseErrorKind::ExpressionUnexpectedToken,
)
.widen_error_span_from(left_bracket_position)
.sync_error_at(self, SyncLevel::CloseBracket)?;
let expression_start = left_hand_side.span().start;
Ok(self.arena.alloc_node_between(
Expression::Index {
target: left_hand_side,
index: index_expression,
},
expression_start,
right_bracket_position,
))
}
/// Parses a call selector after `left_hand_side`.
///
/// Expects the leading `(` to be the next token and returns the resulting
/// call expression.
fn parse_selector_call_into(
&mut self,
left_hand_side: ExpressionRef<'src, 'arena>,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
self.advance(); // '('
let argument_list = self.parse_call_argument_list(left_parenthesis_position);
let right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::FunctionCallMissingClosingParenthesis,
)
.widen_error_span_from(left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.unwrap_or_fallback(self);
let expression_start = left_hand_side.span().start;
self.arena.alloc_node_between(
Expression::Call {
callee: left_hand_side,
arguments: argument_list,
},
expression_start,
right_parenthesis_position,
)
}
// TODO: add note that `parsed_slot_count` is guaranteed to be incremented
// by 1 at most (and when).
// TODO: say that errors must be handled by caller.
/// Parses one call argument slot after an already consumed `(`.
///
/// In `UnrealScript`, every comma introduces a follow-up argument slot, so a
/// trailing comma immediately before `)` denotes an omitted final argument.
///
/// Returns [`ParsedCallArgumentSlot::NoMoreArguments`] when the argument list
/// ends, and `Argument(None)` for an omitted argument slot.
///
/// Per-call status is recorded into `state`.
pub(crate) fn parse_call_argument_slot(
&mut self,
state: &mut CallArgumentListParseState,
) -> ParsedCallArgumentSlot<'src, 'arena> {
state.last_slot_missing_boundary = false;
// This function consumes arguments one at a time and the way we chose
// to handle this is by consuming a comma *before* each new argument,
// not *after*.
// Normal (non-empty) case of special argument will simply skip this
// `match`. But first *empty* argument must be handled as
// a special case.
match self.peek_token() {
None | Some(Token::RightParenthesis) => {
return ParsedCallArgumentSlot::NoMoreArguments;
}
Some(Token::Comma) => {
// We handle special case of first empty argument by *not*
// consuming first comma (it will be consumed together with
// the second argument).
//
// We do change parsing state by incrementing
// `state.parsed_slot_count`, which ensures that
// `is_first_slot()` will return `false` from now on.
if !state.is_first_slot() {
self.advance();
}
// This `if`'s body is guaranteed to run if we've skipped
// `advance()` above.
if self.at_call_argument_boundary() {
state.parsed_slot_count += 1;
return ParsedCallArgumentSlot::Argument(None);
}
}
_ => (),
}
let argument = self.parse_expression();
state.parsed_slot_count += 1;
state.last_slot_missing_boundary = !self.at_call_argument_boundary();
ParsedCallArgumentSlot::Argument(Some(argument))
}
/// Parses a call argument list after an already-consumed `(`.
///
/// Returns all parsed argument slots, preserving omitted arguments
/// as `None`.
fn parse_call_argument_list(
&mut self,
left_parenthesis_position: TokenPosition,
) -> ArenaVec<'arena, Option<ExpressionRef<'src, 'arena>>> {
let mut argument_list = ArenaVec::new_in(self.arena);
let mut call_state = CallArgumentListParseState::new();
//let mut old_position = self.peek_position_or_eof();
// This caused infinite loop? (on eof?) what?
while let ParsedCallArgumentSlot::Argument(argument) =
self.parse_call_argument_slot(&mut call_state)
{
argument_list.push(argument);
// TODO: ensure progress here shouldn't be necessary actually
//self.ensure_forward_progress(old_position);
//old_position = self.peek_position_or_eof();
}
argument_list
}
/// Returns whether the current lookahead token ends the current call
/// argument slot.
///
/// This is true for `,`, which starts the next slot, and for `)`, which
/// ends the argument list.
fn at_call_argument_boundary(&mut self) -> bool {
matches!(
self.peek_token(),
Some(Token::Comma | Token::RightParenthesis)
)
}
}