//! Cursor utilities for a token stream. //! //! Provides memoized lookahead over significant tokens and records trivia in //! [`TriviaIndexBuilder`]. Significant tokens exclude whitespace and comments; //! see [`parser::TriviaKind`]. use std::collections::VecDeque; use crate::{ ast::AstSpan, lexer::{self, Keyword, Token, TokenPosition}, parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder}, }; /// Cursor over a token stream with memoized lookahead and trivia attachment. #[derive(Clone, Debug)] pub(crate) struct Cursor<'file, 'src> { tokens: lexer::Tokens<'file, 'src>, lookahead_buffer: VecDeque<(TokenPosition, lexer::TokenData<'src>)>, last_consumed_position: Option, } impl<'file, 'src> Cursor<'file, 'src> { /// Creates a [`Cursor`] over `tokenized_file`. pub(crate) const fn new(tokenized_file: &'file lexer::TokenizedFile<'src>) -> Self { Self { tokens: tokenized_file.iter(), lookahead_buffer: VecDeque::new(), last_consumed_position: None, } } /// Ensures that the lookahead buffer contains at least `lookahead + 1` /// significant tokens, if available. /// /// May consume trivia from the underlying stream without consuming /// significant tokens. fn ensure_lookahead_available( &mut self, lookahead: usize, trivia: &mut TriviaIndexBuilder<'src>, ) { while self.lookahead_buffer.len() <= lookahead { if !self.buffer_next_significant_token(trivia) { break; } } } /// Buffers the next significant token and records any preceding trivia. /// /// Returns `true` if a significant token was buffered, or `false` if the /// stream is exhausted. fn buffer_next_significant_token(&mut self, trivia: &mut TriviaIndexBuilder<'src>) -> bool { for (token_position, token_data) in self.tokens.by_ref() { if let Ok(trivia_kind) = parser::TriviaKind::try_from(token_data.token) { trivia.record_trivia(parser::TriviaToken { kind: trivia_kind, text: token_data.lexeme, position: token_position, }); } else { trivia.record_significant_token(token_position); self.lookahead_buffer .push_back((token_position, token_data)); return true; } } false } } impl<'src, 'arena> Parser<'src, 'arena> { fn peek_buffered_token(&mut self) -> Option<&(TokenPosition, lexer::TokenData<'src>)> { self.cursor.ensure_lookahead_available(0, &mut self.trivia); self.cursor.lookahead_buffer.front() } /// Returns the next significant token without consuming it. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token(&mut self) -> Option { self.peek_buffered_token() .map(|(_, token_data)| token_data.token) } /// Returns the next keyword without consuming it. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if no tokens remain or if the next token is not /// a keyword. #[must_use] pub(crate) fn peek_keyword(&mut self) -> Option { match self.peek_token() { Some(Token::Keyword(keyword)) => Some(keyword), _ => None, } } /// Returns the position of the next significant token without consuming it. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_position(&mut self) -> Option { self.peek_buffered_token() .map(|(token_position, _)| *token_position) } /// Returns the next significant token and its lexeme without consuming it. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> { self.peek_buffered_token() .map(|(_, token_data)| (token_data.token, token_data.lexeme)) } /// Returns the next significant token and its position without consuming /// it. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token_and_position(&mut self) -> Option<(Token, TokenPosition)> { self.peek_buffered_token() .map(|(token_position, token_data)| (token_data.token, *token_position)) } /// Returns the next keyword and its position without consuming it. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if next token isn't keyword or no tokens remain. #[must_use] pub(crate) fn peek_keyword_and_position(&mut self) -> Option<(Keyword, TokenPosition)> { let Some((Token::Keyword(keyword), keyword_position)) = self.peek_token_and_position() else { return None; }; Some((keyword, keyword_position)) } /// Returns the next significant token, its lexeme, and its position /// without consuming them. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token_lexeme_and_position( &mut self, ) -> Option<(Token, &'src str, TokenPosition)> { self.peek_buffered_token() .map(|(token_position, token_data)| { (token_data.token, token_data.lexeme, *token_position) }) } /// Returns the next significant token at `lookahead` without consuming it. /// /// `lookahead` counts significant tokens, with `0` referring to the next /// significant token. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if no tokens remain. #[must_use] pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option { self.cursor .ensure_lookahead_available(lookahead, &mut self.trivia); self.cursor .lookahead_buffer .get(lookahead) .map(|(_, token_data)| token_data.token) } /// Returns the keyword at `lookahead` without consuming it. /// /// `lookahead` counts significant tokens, with `0` referring to the next /// significant token. /// /// May buffer additional tokens and record skipped trivia, but does not /// consume any significant token. /// /// Returns [`None`] if the token at that position is not a keyword or if /// the stream ends before that position. #[must_use] pub(crate) fn peek_keyword_at(&mut self, lookahead: usize) -> Option { match self.peek_token_at(lookahead) { Some(Token::Keyword(keyword)) => Some(keyword), _ => None, } } /// Returns the position of the next significant token without consuming it. /// /// Generates an error with `error_kind` if no tokens remain. pub(crate) fn require_position( &mut self, error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, TokenPosition> { self.peek_position() .ok_or_else(|| self.make_error_here(error_kind)) } /// Returns the next significant token and its position without consuming /// it. /// /// Generates an error with `error_kind` if no tokens remain. pub(crate) fn require_token_and_position( &mut self, error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, (Token, TokenPosition)> { self.peek_token_and_position() .ok_or_else(|| self.make_error_here(error_kind)) } /// Returns the next significant token, its lexeme, and its position /// without consuming them. /// /// Generates an error with `error_kind` if no tokens remain. pub(crate) fn require_token_lexeme_and_position( &mut self, error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> { self.peek_token_lexeme_and_position() .ok_or_else(|| self.make_error_here(error_kind)) } /// Advances by one significant token. /// /// Records any skipped trivia and returns the consumed token position. /// Returns [`None`] if no significant tokens remain. pub(crate) fn advance(&mut self) -> Option { self.cursor.ensure_lookahead_available(0, &mut self.trivia); if let Some((token_position, _)) = self.cursor.lookahead_buffer.pop_front() { self.cursor.last_consumed_position = Some(token_position); Some(token_position) } else { None } } /// If the next significant token equals `token`, consumes it and /// returns `true`. /// /// Otherwise leaves the cursor unchanged and returns `false`. #[must_use] pub(crate) fn eat(&mut self, token: Token) -> bool { if self.peek_token() == Some(token) { self.advance(); true } else { false } } /// If the next significant token corresponds to the given keyword, /// consumes it and returns `true`. /// /// Otherwise leaves the cursor unchanged and returns `false`. #[must_use] pub(crate) fn eat_keyword(&mut self, keyword: Keyword) -> bool { self.eat(Token::Keyword(keyword)) } /// Expects `expected` token as the next significant one. /// /// On match consumes the token and returns its [`TokenPosition`]. /// Otherwise returns an error of `error_kind` anchored at /// the current token, or at the last consumed token if the stream is /// exhausted. That error also gets set a blame span that contains exactly /// that anchor point. pub(crate) fn expect( &mut self, expected: Token, error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, TokenPosition> { // Anchors EOF diagnostics at the last consumed token // when no current token exists. let anchor = self .peek_position() .unwrap_or_else(|| self.last_consumed_position_or_start()); // `Token` equality is enough here because lexeme and position // are stored separately. if self.peek_token() == Some(expected) { self.advance(); Ok(anchor) } else { Err(self .make_error_at(error_kind, anchor) .blame(AstSpan::new(anchor))) } } /// Expects `expected` keyword as the next significant token. /// /// On match consumes the keyword and returns its [`TokenPosition`]. /// Otherwise returns an error of `error_kind` anchored at the current /// token, or at the last consumed token if the stream is exhausted. pub(crate) fn expect_keyword( &mut self, expected: Keyword, error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, TokenPosition> { self.expect(Token::Keyword(expected), error_kind) } /// Returns position of the last significant token that was actually /// consumed by [`parser::Parser::advance`]. /// /// Returns [`None`] if no tokens have been consumed yet. #[must_use] pub(crate) const fn last_consumed_position(&self) -> Option { self.cursor.last_consumed_position } /// Returns the position of the last significant token consumed by /// [`parser::Parser::advance`], or the start of the stream if no token has /// been consumed yet. /// /// Useful when diagnostics need a stable anchor even at the beginning of /// input. #[must_use] pub(crate) fn last_consumed_position_or_start(&self) -> TokenPosition { self.cursor .last_consumed_position .unwrap_or(TokenPosition(0)) } /// Ensures that parsing has advanced past `old_position`. /// /// This is intended as a safeguard against infinite-loop bugs while /// recovering from invalid input. In debug builds it asserts that progress /// was made; in release builds it consumes one significant token when /// the parser stalls. #[track_caller] pub(crate) fn ensure_forward_progress(&mut self, old_position: TokenPosition) { if let Some(peeked_position) = self.peek_position() { debug_assert!( peeked_position > old_position, "parser made no forward progress" ); if peeked_position <= old_position { self.advance(); } } } }