rott/rottlib/src/parser/trivia.rs
dkanus 588790b9b4 Refactor everything
Huge dump of refactored code. Still in the middle of the changes that
are to be squashed later in a one huge monster commit, because there is
no value in anything atomic here.
2026-04-05 20:32:11 +07:00

258 lines
9.1 KiB
Rust

//! Records trivia separately from significant tokens so parser code can work
//! with significant tokens without losing comments, whitespace, or line
//! structure.
//!
//! Tokens considered *trivia* are:
//!
//! 1. [`crate::lexer::Token::LineComment`];
//! 2. [`crate::lexer::Token::BlockComment`];
//! 3. [`crate::lexer::Token::Newline`];
//! 4. [`crate::lexer::Token::Whitespace`].
//!
//! Every other token is considered *significant*.
//!
//! ## Required usage
//!
//! This is an internal helper. Callers must follow the protocol below.
//!
//! [`TriviaIndexBuilder`] must be driven over a single token stream in
//! strictly increasing [`TokenPosition`] order.
//! Call [`TriviaIndexBuilder::record_trivia`] for each trivia token in source
//! order, and call [`TriviaIndexBuilder::record_significant_token`] for each
//! significant token.
//!
//! After the last significant token has been processed, call
//! [`TriviaIndexBuilder::into_index`] to attach any trailing trivia.
//!
//! Violating this protocol is a logic error.
use crate::lexer::TokenPosition;
/// Kinds of trivia tokens corresponding to variants of [`crate::lexer::Token`].
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum TriviaKind {
Whitespace,
Newline,
LineComment,
BlockComment,
}
impl std::convert::TryFrom<crate::lexer::Token> for TriviaKind {
type Error = ();
fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> {
use crate::lexer::Token;
match token {
Token::Whitespace => Ok(Self::Whitespace),
Token::Newline => Ok(Self::Newline),
Token::LineComment => Ok(Self::LineComment),
Token::BlockComment => Ok(Self::BlockComment),
_ => Err(()),
}
}
}
/// A recorded trivia token.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct TriviaToken<'src> {
/// Kind of trivia token.
pub kind: TriviaKind,
/// Source text of the token.
pub text: &'src str,
/// Location of this trivia token in the token stream.
pub position: TokenPosition,
}
type TriviaRangeMap = std::collections::HashMap<BoundaryLocation, std::ops::Range<usize>>;
/// Extends [`TokenPosition`] with start-of-file and end-of-file markers.
///
/// Regular [`TokenPosition`] values are enough for significant tokens, but
/// trivia also needs to represent content before the first significant token
/// and after the last one.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
enum BoundaryLocation {
StartOfFile,
Token(TokenPosition),
EndOfFile,
}
/// Immutable index over recorded trivia.
///
/// Provides O(1) access to trivia immediately before or after any significant
/// token, as well as file-leading and file-trailing trivia. Returned slices
/// borrow the index, and the contained token texts live for `'src`.
#[derive(Clone, Debug, PartialEq, Eq, Default)]
#[allow(dead_code)]
pub struct TriviaIndex<'src> {
/// All trivia tokens, stored contiguously in file order.
tokens: Vec<TriviaToken<'src>>,
/// Maps a trivia boundary location to the trivia tokens stored right
/// after it.
trivia_after_boundary: TriviaRangeMap,
/// Maps a trivia boundary location to the trivia tokens stored right
/// before it.
trivia_before_boundary: TriviaRangeMap,
}
/// Mutable builder for [`TriviaIndex`].
///
/// Records trivia between successive significant tokens while the caller walks
/// a token stream in file order. Once all tokens have been processed, call
/// [`TriviaIndexBuilder::into_index`] to finalize the index.
#[derive(Debug)]
#[allow(dead_code)]
pub struct TriviaIndexBuilder<'src> {
/// All trivia tokens, stored contiguously in file order.
tokens: Vec<TriviaToken<'src>>,
/// Maps boundary location to the trivia tokens stored right after it.
trivia_after_boundary: TriviaRangeMap,
/// Maps boundary location to the trivia tokens stored right before it.
trivia_before_boundary: TriviaRangeMap,
/// Trivia collected since the last significant token (or file start),
/// not yet attached to a right boundary.
pending_trivia: Vec<TriviaToken<'src>>,
/// Left boundary of the currently open gap.
current_left_boundary: BoundaryLocation,
}
impl Default for TriviaIndexBuilder<'_> {
fn default() -> Self {
Self {
tokens: Vec::new(),
trivia_after_boundary: TriviaRangeMap::default(),
trivia_before_boundary: TriviaRangeMap::default(),
pending_trivia: Vec::new(),
current_left_boundary: BoundaryLocation::StartOfFile,
}
}
}
impl<'src> TriviaIndexBuilder<'src> {
/// Records `token` as trivia.
///
/// Tokens must be recorded in file order.
pub(crate) fn record_trivia(&mut self, token: TriviaToken<'src>) {
#[cfg(debug_assertions)]
self.debug_assert_position_is_in_order(token.position);
self.pending_trivia.push(token);
}
/// Records a significant token at `position`.
///
/// Positions must be recorded in file order.
pub(crate) fn record_significant_token(&mut self, position: TokenPosition) {
let right_boundary = BoundaryLocation::Token(position);
#[cfg(debug_assertions)]
self.debug_assert_position_is_in_order(position);
self.flush_pending_trivia_to_boundary(right_boundary);
self.current_left_boundary = right_boundary;
}
// Stores one trivia range under both neighboring boundaries so lookups
// from either side return the same slice.
fn flush_pending_trivia_to_boundary(&mut self, right_boundary: BoundaryLocation) {
if self.pending_trivia.is_empty() {
return;
}
let trivia_start = self.tokens.len();
self.tokens.append(&mut self.pending_trivia);
let trivia_end = self.tokens.len();
self.trivia_after_boundary
.insert(self.current_left_boundary, trivia_start..trivia_end);
self.trivia_before_boundary
.insert(right_boundary, trivia_start..trivia_end);
}
/// Finalizes the builder and returns the completed trivia index.
///
/// Any pending trivia is recorded as trailing trivia.
#[must_use]
#[allow(dead_code)]
pub(crate) fn into_index(mut self) -> TriviaIndex<'src> {
self.flush_pending_trivia_to_boundary(BoundaryLocation::EndOfFile);
TriviaIndex {
tokens: self.tokens,
trivia_after_boundary: self.trivia_after_boundary,
trivia_before_boundary: self.trivia_before_boundary,
}
}
// Catches out-of-order recording during development; the builder relies
// on this ordering invariant.
#[cfg(debug_assertions)]
fn debug_assert_position_is_in_order(&self, position: TokenPosition) {
let location = BoundaryLocation::Token(position);
debug_assert!(location > self.current_left_boundary);
if let Some(last) = self.pending_trivia.last() {
debug_assert!(last.position < position);
}
}
}
impl<'src> TriviaIndex<'src> {
/// Returns the trivia immediately after the significant token at
/// `position`.
///
/// Returns an empty slice if `position` does not identify a recorded
/// significant token or if no trivia was recorded after it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn trivia_after_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
self.slice_for(
BoundaryLocation::Token(position),
&self.trivia_after_boundary,
)
}
/// Returns the trivia immediately before the significant token at `position`.
///
/// Returns an empty slice if `position` does not identify a recorded
/// significant token or if no trivia was recorded before it.
#[must_use]
#[allow(dead_code)]
pub(crate) fn trivia_before_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
self.slice_for(
BoundaryLocation::Token(position),
&self.trivia_before_boundary,
)
}
/// Returns the trivia before the first significant token.
///
/// If no significant tokens were recorded, returns all recorded trivia.
#[must_use]
#[allow(dead_code)]
pub(crate) fn leading_trivia(&self) -> &[TriviaToken<'src>] {
self.slice_for(BoundaryLocation::StartOfFile, &self.trivia_after_boundary)
}
/// Returns the trivia after the last significant token.
///
/// If no significant tokens were recorded, returns all recorded trivia.
#[must_use]
#[allow(dead_code)]
pub(crate) fn trailing_trivia(&self) -> &[TriviaToken<'src>] {
self.slice_for(BoundaryLocation::EndOfFile, &self.trivia_before_boundary)
}
#[allow(dead_code)]
fn slice_for(&self, key: BoundaryLocation, map: &TriviaRangeMap) -> &[TriviaToken<'src>] {
match map.get(&key) {
Some(range) => {
// Ranges are guaranteed to be valid by construction
debug_assert!(range.start <= range.end);
debug_assert!(range.end <= self.tokens.len());
self.tokens.get(range.clone()).unwrap_or(&[])
}
None => &[],
}
}
}