rott/rottlib/src/parser/cursor.rs
dkanus 588790b9b4 Refactor everything
Huge dump of refactored code. Still in the middle of the changes that
are to be squashed later in a one huge monster commit, because there is
no value in anything atomic here.
2026-04-05 20:32:11 +07:00

367 lines
13 KiB
Rust

//! Cursor utilities for a token stream.
//!
//! Provides memoized lookahead over significant tokens and records trivia in
//! [`TriviaIndexBuilder`]. Significant tokens exclude whitespace and comments;
//! see [`parser::TriviaKind`].
use std::collections::VecDeque;
use crate::{
ast::AstSpan,
lexer::{self, Keyword, Token, TokenPosition},
parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder},
};
/// Cursor over a token stream with memoized lookahead and trivia attachment.
#[derive(Clone, Debug)]
pub(crate) struct Cursor<'file, 'src> {
tokens: lexer::Tokens<'file, 'src>,
lookahead_buffer: VecDeque<(TokenPosition, lexer::TokenData<'src>)>,
last_consumed_position: Option<TokenPosition>,
}
impl<'file, 'src> Cursor<'file, 'src> {
/// Creates a [`Cursor`] over `tokenized_file`.
pub(crate) const fn new(tokenized_file: &'file lexer::TokenizedFile<'src>) -> Self {
Self {
tokens: tokenized_file.iter(),
lookahead_buffer: VecDeque::new(),
last_consumed_position: None,
}
}
/// Ensures that the lookahead buffer contains at least `lookahead + 1`
/// significant tokens, if available.
///
/// May consume trivia from the underlying stream without consuming
/// significant tokens.
fn ensure_lookahead_available(
&mut self,
lookahead: usize,
trivia: &mut TriviaIndexBuilder<'src>,
) {
while self.lookahead_buffer.len() <= lookahead {
if !self.buffer_next_significant_token(trivia) {
break;
}
}
}
/// Buffers the next significant token and records any preceding trivia.
///
/// Returns `true` if a significant token was buffered, or `false` if the
/// stream is exhausted.
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaIndexBuilder<'src>) -> bool {
for (token_position, token_data) in self.tokens.by_ref() {
if let Ok(trivia_kind) = parser::TriviaKind::try_from(token_data.token) {
trivia.record_trivia(parser::TriviaToken {
kind: trivia_kind,
text: token_data.lexeme,
position: token_position,
});
} else {
trivia.record_significant_token(token_position);
self.lookahead_buffer
.push_back((token_position, token_data));
return true;
}
}
false
}
}
impl<'src, 'arena> Parser<'src, 'arena> {
fn peek_buffered_token(&mut self) -> Option<&(TokenPosition, lexer::TokenData<'src>)> {
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
self.cursor.lookahead_buffer.front()
}
/// Returns the next significant token without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token(&mut self) -> Option<Token> {
self.peek_buffered_token()
.map(|(_, token_data)| token_data.token)
}
/// Returns the next keyword without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain or if the next token is not
/// a keyword.
#[must_use]
pub(crate) fn peek_keyword(&mut self) -> Option<Keyword> {
match self.peek_token() {
Some(Token::Keyword(keyword)) => Some(keyword),
_ => None,
}
}
/// Returns the position of the next significant token without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_position(&mut self) -> Option<TokenPosition> {
self.peek_buffered_token()
.map(|(token_position, _)| *token_position)
}
/// Returns the next significant token and its lexeme without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> {
self.peek_buffered_token()
.map(|(_, token_data)| (token_data.token, token_data.lexeme))
}
/// Returns the next significant token and its position without consuming
/// it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_and_position(&mut self) -> Option<(Token, TokenPosition)> {
self.peek_buffered_token()
.map(|(token_position, token_data)| (token_data.token, *token_position))
}
/// Returns the next keyword and its position without consuming it.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if next token isn't keyword or no tokens remain.
#[must_use]
pub(crate) fn peek_keyword_and_position(&mut self) -> Option<(Keyword, TokenPosition)> {
let Some((Token::Keyword(keyword), keyword_position)) = self.peek_token_and_position()
else {
return None;
};
Some((keyword, keyword_position))
}
/// Returns the next significant token, its lexeme, and its position
/// without consuming them.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_lexeme_and_position(
&mut self,
) -> Option<(Token, &'src str, TokenPosition)> {
self.peek_buffered_token()
.map(|(token_position, token_data)| {
(token_data.token, token_data.lexeme, *token_position)
})
}
/// Returns the next significant token at `lookahead` without consuming it.
///
/// `lookahead` counts significant tokens, with `0` referring to the next
/// significant token.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if no tokens remain.
#[must_use]
pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> {
self.cursor
.ensure_lookahead_available(lookahead, &mut self.trivia);
self.cursor
.lookahead_buffer
.get(lookahead)
.map(|(_, token_data)| token_data.token)
}
/// Returns the keyword at `lookahead` without consuming it.
///
/// `lookahead` counts significant tokens, with `0` referring to the next
/// significant token.
///
/// May buffer additional tokens and record skipped trivia, but does not
/// consume any significant token.
///
/// Returns [`None`] if the token at that position is not a keyword or if
/// the stream ends before that position.
#[must_use]
pub(crate) fn peek_keyword_at(&mut self, lookahead: usize) -> Option<Keyword> {
match self.peek_token_at(lookahead) {
Some(Token::Keyword(keyword)) => Some(keyword),
_ => None,
}
}
/// Returns the position of the next significant token without consuming it.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
self.peek_position()
.ok_or_else(|| self.make_error_here(error_kind))
}
/// Returns the next significant token and its position without consuming
/// it.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_token_and_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, (Token, TokenPosition)> {
self.peek_token_and_position()
.ok_or_else(|| self.make_error_here(error_kind))
}
/// Returns the next significant token, its lexeme, and its position
/// without consuming them.
///
/// Generates an error with `error_kind` if no tokens remain.
pub(crate) fn require_token_lexeme_and_position(
&mut self,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> {
self.peek_token_lexeme_and_position()
.ok_or_else(|| self.make_error_here(error_kind))
}
/// Advances by one significant token.
///
/// Records any skipped trivia and returns the consumed token position.
/// Returns [`None`] if no significant tokens remain.
pub(crate) fn advance(&mut self) -> Option<TokenPosition> {
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
if let Some((token_position, _)) = self.cursor.lookahead_buffer.pop_front() {
self.cursor.last_consumed_position = Some(token_position);
Some(token_position)
} else {
None
}
}
/// If the next significant token equals `token`, consumes it and
/// returns `true`.
///
/// Otherwise leaves the cursor unchanged and returns `false`.
#[must_use]
pub(crate) fn eat(&mut self, token: Token) -> bool {
if self.peek_token() == Some(token) {
self.advance();
true
} else {
false
}
}
/// If the next significant token corresponds to the given keyword,
/// consumes it and returns `true`.
///
/// Otherwise leaves the cursor unchanged and returns `false`.
#[must_use]
pub(crate) fn eat_keyword(&mut self, keyword: Keyword) -> bool {
self.eat(Token::Keyword(keyword))
}
/// Expects `expected` token as the next significant one.
///
/// On match consumes the token and returns its [`TokenPosition`].
/// Otherwise returns an error of `error_kind` anchored at
/// the current token, or at the last consumed token if the stream is
/// exhausted. That error also gets set a blame span that contains exactly
/// that anchor point.
pub(crate) fn expect(
&mut self,
expected: Token,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
// Anchors EOF diagnostics at the last consumed token
// when no current token exists.
let anchor = self
.peek_position()
.unwrap_or_else(|| self.last_consumed_position_or_start());
// `Token` equality is enough here because lexeme and position
// are stored separately.
if self.peek_token() == Some(expected) {
self.advance();
Ok(anchor)
} else {
Err(self
.make_error_at(error_kind, anchor)
.blame(AstSpan::new(anchor)))
}
}
/// Expects `expected` keyword as the next significant token.
///
/// On match consumes the keyword and returns its [`TokenPosition`].
/// Otherwise returns an error of `error_kind` anchored at the current
/// token, or at the last consumed token if the stream is exhausted.
pub(crate) fn expect_keyword(
&mut self,
expected: Keyword,
error_kind: parser::ParseErrorKind,
) -> ParseResult<'src, 'arena, TokenPosition> {
self.expect(Token::Keyword(expected), error_kind)
}
/// Returns position of the last significant token that was actually
/// consumed by [`parser::Parser::advance`].
///
/// Returns [`None`] if no tokens have been consumed yet.
#[must_use]
pub(crate) const fn last_consumed_position(&self) -> Option<TokenPosition> {
self.cursor.last_consumed_position
}
/// Returns the position of the last significant token consumed by
/// [`parser::Parser::advance`], or the start of the stream if no token has
/// been consumed yet.
///
/// Useful when diagnostics need a stable anchor even at the beginning of
/// input.
#[must_use]
pub(crate) fn last_consumed_position_or_start(&self) -> TokenPosition {
self.cursor
.last_consumed_position
.unwrap_or(TokenPosition(0))
}
/// Ensures that parsing has advanced past `old_position`.
///
/// This is intended as a safeguard against infinite-loop bugs while
/// recovering from invalid input. In debug builds it asserts that progress
/// was made; in release builds it consumes one significant token when
/// the parser stalls.
#[track_caller]
pub(crate) fn ensure_forward_progress(&mut self, old_position: TokenPosition) {
if let Some(peeked_position) = self.peek_position() {
debug_assert!(
peeked_position > old_position,
"parser made no forward progress"
);
if peeked_position <= old_position {
self.advance();
}
}
}
}