Huge dump of refactored code. Still in the middle of the changes that are to be squashed later in a one huge monster commit, because there is no value in anything atomic here.
367 lines
13 KiB
Rust
367 lines
13 KiB
Rust
//! Cursor utilities for a token stream.
|
|
//!
|
|
//! Provides memoized lookahead over significant tokens and records trivia in
|
|
//! [`TriviaIndexBuilder`]. Significant tokens exclude whitespace and comments;
|
|
//! see [`parser::TriviaKind`].
|
|
|
|
use std::collections::VecDeque;
|
|
|
|
use crate::{
|
|
ast::AstSpan,
|
|
lexer::{self, Keyword, Token, TokenPosition},
|
|
parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder},
|
|
};
|
|
|
|
/// Cursor over a token stream with memoized lookahead and trivia attachment.
|
|
#[derive(Clone, Debug)]
|
|
pub(crate) struct Cursor<'file, 'src> {
|
|
tokens: lexer::Tokens<'file, 'src>,
|
|
lookahead_buffer: VecDeque<(TokenPosition, lexer::TokenData<'src>)>,
|
|
last_consumed_position: Option<TokenPosition>,
|
|
}
|
|
|
|
impl<'file, 'src> Cursor<'file, 'src> {
|
|
/// Creates a [`Cursor`] over `tokenized_file`.
|
|
pub(crate) const fn new(tokenized_file: &'file lexer::TokenizedFile<'src>) -> Self {
|
|
Self {
|
|
tokens: tokenized_file.iter(),
|
|
lookahead_buffer: VecDeque::new(),
|
|
last_consumed_position: None,
|
|
}
|
|
}
|
|
|
|
/// Ensures that the lookahead buffer contains at least `lookahead + 1`
|
|
/// significant tokens, if available.
|
|
///
|
|
/// May consume trivia from the underlying stream without consuming
|
|
/// significant tokens.
|
|
fn ensure_lookahead_available(
|
|
&mut self,
|
|
lookahead: usize,
|
|
trivia: &mut TriviaIndexBuilder<'src>,
|
|
) {
|
|
while self.lookahead_buffer.len() <= lookahead {
|
|
if !self.buffer_next_significant_token(trivia) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Buffers the next significant token and records any preceding trivia.
|
|
///
|
|
/// Returns `true` if a significant token was buffered, or `false` if the
|
|
/// stream is exhausted.
|
|
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaIndexBuilder<'src>) -> bool {
|
|
for (token_position, token_data) in self.tokens.by_ref() {
|
|
if let Ok(trivia_kind) = parser::TriviaKind::try_from(token_data.token) {
|
|
trivia.record_trivia(parser::TriviaToken {
|
|
kind: trivia_kind,
|
|
text: token_data.lexeme,
|
|
position: token_position,
|
|
});
|
|
} else {
|
|
trivia.record_significant_token(token_position);
|
|
self.lookahead_buffer
|
|
.push_back((token_position, token_data));
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
}
|
|
|
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
|
fn peek_buffered_token(&mut self) -> Option<&(TokenPosition, lexer::TokenData<'src>)> {
|
|
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
|
|
self.cursor.lookahead_buffer.front()
|
|
}
|
|
|
|
/// Returns the next significant token without consuming it.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if no tokens remain.
|
|
#[must_use]
|
|
pub(crate) fn peek_token(&mut self) -> Option<Token> {
|
|
self.peek_buffered_token()
|
|
.map(|(_, token_data)| token_data.token)
|
|
}
|
|
|
|
/// Returns the next keyword without consuming it.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if no tokens remain or if the next token is not
|
|
/// a keyword.
|
|
#[must_use]
|
|
pub(crate) fn peek_keyword(&mut self) -> Option<Keyword> {
|
|
match self.peek_token() {
|
|
Some(Token::Keyword(keyword)) => Some(keyword),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Returns the position of the next significant token without consuming it.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if no tokens remain.
|
|
#[must_use]
|
|
pub(crate) fn peek_position(&mut self) -> Option<TokenPosition> {
|
|
self.peek_buffered_token()
|
|
.map(|(token_position, _)| *token_position)
|
|
}
|
|
|
|
/// Returns the next significant token and its lexeme without consuming it.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if no tokens remain.
|
|
#[must_use]
|
|
pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> {
|
|
self.peek_buffered_token()
|
|
.map(|(_, token_data)| (token_data.token, token_data.lexeme))
|
|
}
|
|
|
|
/// Returns the next significant token and its position without consuming
|
|
/// it.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if no tokens remain.
|
|
#[must_use]
|
|
pub(crate) fn peek_token_and_position(&mut self) -> Option<(Token, TokenPosition)> {
|
|
self.peek_buffered_token()
|
|
.map(|(token_position, token_data)| (token_data.token, *token_position))
|
|
}
|
|
|
|
/// Returns the next keyword and its position without consuming it.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if next token isn't keyword or no tokens remain.
|
|
#[must_use]
|
|
pub(crate) fn peek_keyword_and_position(&mut self) -> Option<(Keyword, TokenPosition)> {
|
|
let Some((Token::Keyword(keyword), keyword_position)) = self.peek_token_and_position()
|
|
else {
|
|
return None;
|
|
};
|
|
Some((keyword, keyword_position))
|
|
}
|
|
|
|
/// Returns the next significant token, its lexeme, and its position
|
|
/// without consuming them.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if no tokens remain.
|
|
#[must_use]
|
|
pub(crate) fn peek_token_lexeme_and_position(
|
|
&mut self,
|
|
) -> Option<(Token, &'src str, TokenPosition)> {
|
|
self.peek_buffered_token()
|
|
.map(|(token_position, token_data)| {
|
|
(token_data.token, token_data.lexeme, *token_position)
|
|
})
|
|
}
|
|
|
|
/// Returns the next significant token at `lookahead` without consuming it.
|
|
///
|
|
/// `lookahead` counts significant tokens, with `0` referring to the next
|
|
/// significant token.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if no tokens remain.
|
|
#[must_use]
|
|
pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> {
|
|
self.cursor
|
|
.ensure_lookahead_available(lookahead, &mut self.trivia);
|
|
self.cursor
|
|
.lookahead_buffer
|
|
.get(lookahead)
|
|
.map(|(_, token_data)| token_data.token)
|
|
}
|
|
|
|
/// Returns the keyword at `lookahead` without consuming it.
|
|
///
|
|
/// `lookahead` counts significant tokens, with `0` referring to the next
|
|
/// significant token.
|
|
///
|
|
/// May buffer additional tokens and record skipped trivia, but does not
|
|
/// consume any significant token.
|
|
///
|
|
/// Returns [`None`] if the token at that position is not a keyword or if
|
|
/// the stream ends before that position.
|
|
#[must_use]
|
|
pub(crate) fn peek_keyword_at(&mut self, lookahead: usize) -> Option<Keyword> {
|
|
match self.peek_token_at(lookahead) {
|
|
Some(Token::Keyword(keyword)) => Some(keyword),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Returns the position of the next significant token without consuming it.
|
|
///
|
|
/// Generates an error with `error_kind` if no tokens remain.
|
|
pub(crate) fn require_position(
|
|
&mut self,
|
|
error_kind: parser::ParseErrorKind,
|
|
) -> ParseResult<'src, 'arena, TokenPosition> {
|
|
self.peek_position()
|
|
.ok_or_else(|| self.make_error_here(error_kind))
|
|
}
|
|
|
|
/// Returns the next significant token and its position without consuming
|
|
/// it.
|
|
///
|
|
/// Generates an error with `error_kind` if no tokens remain.
|
|
pub(crate) fn require_token_and_position(
|
|
&mut self,
|
|
error_kind: parser::ParseErrorKind,
|
|
) -> ParseResult<'src, 'arena, (Token, TokenPosition)> {
|
|
self.peek_token_and_position()
|
|
.ok_or_else(|| self.make_error_here(error_kind))
|
|
}
|
|
|
|
/// Returns the next significant token, its lexeme, and its position
|
|
/// without consuming them.
|
|
///
|
|
/// Generates an error with `error_kind` if no tokens remain.
|
|
pub(crate) fn require_token_lexeme_and_position(
|
|
&mut self,
|
|
error_kind: parser::ParseErrorKind,
|
|
) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> {
|
|
self.peek_token_lexeme_and_position()
|
|
.ok_or_else(|| self.make_error_here(error_kind))
|
|
}
|
|
|
|
/// Advances by one significant token.
|
|
///
|
|
/// Records any skipped trivia and returns the consumed token position.
|
|
/// Returns [`None`] if no significant tokens remain.
|
|
pub(crate) fn advance(&mut self) -> Option<TokenPosition> {
|
|
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
|
|
if let Some((token_position, _)) = self.cursor.lookahead_buffer.pop_front() {
|
|
self.cursor.last_consumed_position = Some(token_position);
|
|
Some(token_position)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// If the next significant token equals `token`, consumes it and
|
|
/// returns `true`.
|
|
///
|
|
/// Otherwise leaves the cursor unchanged and returns `false`.
|
|
#[must_use]
|
|
pub(crate) fn eat(&mut self, token: Token) -> bool {
|
|
if self.peek_token() == Some(token) {
|
|
self.advance();
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
/// If the next significant token corresponds to the given keyword,
|
|
/// consumes it and returns `true`.
|
|
///
|
|
/// Otherwise leaves the cursor unchanged and returns `false`.
|
|
#[must_use]
|
|
pub(crate) fn eat_keyword(&mut self, keyword: Keyword) -> bool {
|
|
self.eat(Token::Keyword(keyword))
|
|
}
|
|
|
|
/// Expects `expected` token as the next significant one.
|
|
///
|
|
/// On match consumes the token and returns its [`TokenPosition`].
|
|
/// Otherwise returns an error of `error_kind` anchored at
|
|
/// the current token, or at the last consumed token if the stream is
|
|
/// exhausted. That error also gets set a blame span that contains exactly
|
|
/// that anchor point.
|
|
pub(crate) fn expect(
|
|
&mut self,
|
|
expected: Token,
|
|
error_kind: parser::ParseErrorKind,
|
|
) -> ParseResult<'src, 'arena, TokenPosition> {
|
|
// Anchors EOF diagnostics at the last consumed token
|
|
// when no current token exists.
|
|
let anchor = self
|
|
.peek_position()
|
|
.unwrap_or_else(|| self.last_consumed_position_or_start());
|
|
// `Token` equality is enough here because lexeme and position
|
|
// are stored separately.
|
|
if self.peek_token() == Some(expected) {
|
|
self.advance();
|
|
Ok(anchor)
|
|
} else {
|
|
Err(self
|
|
.make_error_at(error_kind, anchor)
|
|
.blame(AstSpan::new(anchor)))
|
|
}
|
|
}
|
|
|
|
/// Expects `expected` keyword as the next significant token.
|
|
///
|
|
/// On match consumes the keyword and returns its [`TokenPosition`].
|
|
/// Otherwise returns an error of `error_kind` anchored at the current
|
|
/// token, or at the last consumed token if the stream is exhausted.
|
|
pub(crate) fn expect_keyword(
|
|
&mut self,
|
|
expected: Keyword,
|
|
error_kind: parser::ParseErrorKind,
|
|
) -> ParseResult<'src, 'arena, TokenPosition> {
|
|
self.expect(Token::Keyword(expected), error_kind)
|
|
}
|
|
|
|
/// Returns position of the last significant token that was actually
|
|
/// consumed by [`parser::Parser::advance`].
|
|
///
|
|
/// Returns [`None`] if no tokens have been consumed yet.
|
|
#[must_use]
|
|
pub(crate) const fn last_consumed_position(&self) -> Option<TokenPosition> {
|
|
self.cursor.last_consumed_position
|
|
}
|
|
|
|
/// Returns the position of the last significant token consumed by
|
|
/// [`parser::Parser::advance`], or the start of the stream if no token has
|
|
/// been consumed yet.
|
|
///
|
|
/// Useful when diagnostics need a stable anchor even at the beginning of
|
|
/// input.
|
|
#[must_use]
|
|
pub(crate) fn last_consumed_position_or_start(&self) -> TokenPosition {
|
|
self.cursor
|
|
.last_consumed_position
|
|
.unwrap_or(TokenPosition(0))
|
|
}
|
|
|
|
/// Ensures that parsing has advanced past `old_position`.
|
|
///
|
|
/// This is intended as a safeguard against infinite-loop bugs while
|
|
/// recovering from invalid input. In debug builds it asserts that progress
|
|
/// was made; in release builds it consumes one significant token when
|
|
/// the parser stalls.
|
|
#[track_caller]
|
|
pub(crate) fn ensure_forward_progress(&mut self, old_position: TokenPosition) {
|
|
if let Some(peeked_position) = self.peek_position() {
|
|
debug_assert!(
|
|
peeked_position > old_position,
|
|
"parser made no forward progress"
|
|
);
|
|
if peeked_position <= old_position {
|
|
self.advance();
|
|
}
|
|
}
|
|
}
|
|
}
|