Huge dump of refactored code. Still in the middle of the changes that are to be squashed later in a one huge monster commit, because there is no value in anything atomic here.
258 lines
9.1 KiB
Rust
258 lines
9.1 KiB
Rust
//! Records trivia separately from significant tokens so parser code can work
|
|
//! with significant tokens without losing comments, whitespace, or line
|
|
//! structure.
|
|
//!
|
|
//! Tokens considered *trivia* are:
|
|
//!
|
|
//! 1. [`crate::lexer::Token::LineComment`];
|
|
//! 2. [`crate::lexer::Token::BlockComment`];
|
|
//! 3. [`crate::lexer::Token::Newline`];
|
|
//! 4. [`crate::lexer::Token::Whitespace`].
|
|
//!
|
|
//! Every other token is considered *significant*.
|
|
//!
|
|
//! ## Required usage
|
|
//!
|
|
//! This is an internal helper. Callers must follow the protocol below.
|
|
//!
|
|
//! [`TriviaIndexBuilder`] must be driven over a single token stream in
|
|
//! strictly increasing [`TokenPosition`] order.
|
|
//! Call [`TriviaIndexBuilder::record_trivia`] for each trivia token in source
|
|
//! order, and call [`TriviaIndexBuilder::record_significant_token`] for each
|
|
//! significant token.
|
|
//!
|
|
//! After the last significant token has been processed, call
|
|
//! [`TriviaIndexBuilder::into_index`] to attach any trailing trivia.
|
|
//!
|
|
//! Violating this protocol is a logic error.
|
|
|
|
use crate::lexer::TokenPosition;
|
|
|
|
/// Kinds of trivia tokens corresponding to variants of [`crate::lexer::Token`].
|
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
|
pub enum TriviaKind {
|
|
Whitespace,
|
|
Newline,
|
|
LineComment,
|
|
BlockComment,
|
|
}
|
|
|
|
impl std::convert::TryFrom<crate::lexer::Token> for TriviaKind {
|
|
type Error = ();
|
|
|
|
fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> {
|
|
use crate::lexer::Token;
|
|
match token {
|
|
Token::Whitespace => Ok(Self::Whitespace),
|
|
Token::Newline => Ok(Self::Newline),
|
|
Token::LineComment => Ok(Self::LineComment),
|
|
Token::BlockComment => Ok(Self::BlockComment),
|
|
_ => Err(()),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A recorded trivia token.
|
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
|
pub struct TriviaToken<'src> {
|
|
/// Kind of trivia token.
|
|
pub kind: TriviaKind,
|
|
/// Source text of the token.
|
|
pub text: &'src str,
|
|
/// Location of this trivia token in the token stream.
|
|
pub position: TokenPosition,
|
|
}
|
|
|
|
type TriviaRangeMap = std::collections::HashMap<BoundaryLocation, std::ops::Range<usize>>;
|
|
|
|
/// Extends [`TokenPosition`] with start-of-file and end-of-file markers.
|
|
///
|
|
/// Regular [`TokenPosition`] values are enough for significant tokens, but
|
|
/// trivia also needs to represent content before the first significant token
|
|
/// and after the last one.
|
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
|
enum BoundaryLocation {
|
|
StartOfFile,
|
|
Token(TokenPosition),
|
|
EndOfFile,
|
|
}
|
|
|
|
/// Immutable index over recorded trivia.
|
|
///
|
|
/// Provides O(1) access to trivia immediately before or after any significant
|
|
/// token, as well as file-leading and file-trailing trivia. Returned slices
|
|
/// borrow the index, and the contained token texts live for `'src`.
|
|
#[derive(Clone, Debug, PartialEq, Eq, Default)]
|
|
#[allow(dead_code)]
|
|
pub struct TriviaIndex<'src> {
|
|
/// All trivia tokens, stored contiguously in file order.
|
|
tokens: Vec<TriviaToken<'src>>,
|
|
/// Maps a trivia boundary location to the trivia tokens stored right
|
|
/// after it.
|
|
trivia_after_boundary: TriviaRangeMap,
|
|
/// Maps a trivia boundary location to the trivia tokens stored right
|
|
/// before it.
|
|
trivia_before_boundary: TriviaRangeMap,
|
|
}
|
|
|
|
/// Mutable builder for [`TriviaIndex`].
|
|
///
|
|
/// Records trivia between successive significant tokens while the caller walks
|
|
/// a token stream in file order. Once all tokens have been processed, call
|
|
/// [`TriviaIndexBuilder::into_index`] to finalize the index.
|
|
#[derive(Debug)]
|
|
#[allow(dead_code)]
|
|
pub struct TriviaIndexBuilder<'src> {
|
|
/// All trivia tokens, stored contiguously in file order.
|
|
tokens: Vec<TriviaToken<'src>>,
|
|
/// Maps boundary location to the trivia tokens stored right after it.
|
|
trivia_after_boundary: TriviaRangeMap,
|
|
/// Maps boundary location to the trivia tokens stored right before it.
|
|
trivia_before_boundary: TriviaRangeMap,
|
|
|
|
/// Trivia collected since the last significant token (or file start),
|
|
/// not yet attached to a right boundary.
|
|
pending_trivia: Vec<TriviaToken<'src>>,
|
|
|
|
/// Left boundary of the currently open gap.
|
|
current_left_boundary: BoundaryLocation,
|
|
}
|
|
|
|
impl Default for TriviaIndexBuilder<'_> {
|
|
fn default() -> Self {
|
|
Self {
|
|
tokens: Vec::new(),
|
|
trivia_after_boundary: TriviaRangeMap::default(),
|
|
trivia_before_boundary: TriviaRangeMap::default(),
|
|
pending_trivia: Vec::new(),
|
|
current_left_boundary: BoundaryLocation::StartOfFile,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'src> TriviaIndexBuilder<'src> {
|
|
/// Records `token` as trivia.
|
|
///
|
|
/// Tokens must be recorded in file order.
|
|
pub(crate) fn record_trivia(&mut self, token: TriviaToken<'src>) {
|
|
#[cfg(debug_assertions)]
|
|
self.debug_assert_position_is_in_order(token.position);
|
|
|
|
self.pending_trivia.push(token);
|
|
}
|
|
|
|
/// Records a significant token at `position`.
|
|
///
|
|
/// Positions must be recorded in file order.
|
|
pub(crate) fn record_significant_token(&mut self, position: TokenPosition) {
|
|
let right_boundary = BoundaryLocation::Token(position);
|
|
|
|
#[cfg(debug_assertions)]
|
|
self.debug_assert_position_is_in_order(position);
|
|
|
|
self.flush_pending_trivia_to_boundary(right_boundary);
|
|
self.current_left_boundary = right_boundary;
|
|
}
|
|
|
|
// Stores one trivia range under both neighboring boundaries so lookups
|
|
// from either side return the same slice.
|
|
fn flush_pending_trivia_to_boundary(&mut self, right_boundary: BoundaryLocation) {
|
|
if self.pending_trivia.is_empty() {
|
|
return;
|
|
}
|
|
let trivia_start = self.tokens.len();
|
|
self.tokens.append(&mut self.pending_trivia);
|
|
let trivia_end = self.tokens.len();
|
|
self.trivia_after_boundary
|
|
.insert(self.current_left_boundary, trivia_start..trivia_end);
|
|
self.trivia_before_boundary
|
|
.insert(right_boundary, trivia_start..trivia_end);
|
|
}
|
|
|
|
/// Finalizes the builder and returns the completed trivia index.
|
|
///
|
|
/// Any pending trivia is recorded as trailing trivia.
|
|
#[must_use]
|
|
#[allow(dead_code)]
|
|
pub(crate) fn into_index(mut self) -> TriviaIndex<'src> {
|
|
self.flush_pending_trivia_to_boundary(BoundaryLocation::EndOfFile);
|
|
|
|
TriviaIndex {
|
|
tokens: self.tokens,
|
|
trivia_after_boundary: self.trivia_after_boundary,
|
|
trivia_before_boundary: self.trivia_before_boundary,
|
|
}
|
|
}
|
|
|
|
// Catches out-of-order recording during development; the builder relies
|
|
// on this ordering invariant.
|
|
#[cfg(debug_assertions)]
|
|
fn debug_assert_position_is_in_order(&self, position: TokenPosition) {
|
|
let location = BoundaryLocation::Token(position);
|
|
debug_assert!(location > self.current_left_boundary);
|
|
if let Some(last) = self.pending_trivia.last() {
|
|
debug_assert!(last.position < position);
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'src> TriviaIndex<'src> {
|
|
/// Returns the trivia immediately after the significant token at
|
|
/// `position`.
|
|
///
|
|
/// Returns an empty slice if `position` does not identify a recorded
|
|
/// significant token or if no trivia was recorded after it.
|
|
#[must_use]
|
|
#[allow(dead_code)]
|
|
pub(crate) fn trivia_after_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
|
|
self.slice_for(
|
|
BoundaryLocation::Token(position),
|
|
&self.trivia_after_boundary,
|
|
)
|
|
}
|
|
|
|
/// Returns the trivia immediately before the significant token at `position`.
|
|
///
|
|
/// Returns an empty slice if `position` does not identify a recorded
|
|
/// significant token or if no trivia was recorded before it.
|
|
#[must_use]
|
|
#[allow(dead_code)]
|
|
pub(crate) fn trivia_before_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
|
|
self.slice_for(
|
|
BoundaryLocation::Token(position),
|
|
&self.trivia_before_boundary,
|
|
)
|
|
}
|
|
|
|
/// Returns the trivia before the first significant token.
|
|
///
|
|
/// If no significant tokens were recorded, returns all recorded trivia.
|
|
#[must_use]
|
|
#[allow(dead_code)]
|
|
pub(crate) fn leading_trivia(&self) -> &[TriviaToken<'src>] {
|
|
self.slice_for(BoundaryLocation::StartOfFile, &self.trivia_after_boundary)
|
|
}
|
|
|
|
/// Returns the trivia after the last significant token.
|
|
///
|
|
/// If no significant tokens were recorded, returns all recorded trivia.
|
|
#[must_use]
|
|
#[allow(dead_code)]
|
|
pub(crate) fn trailing_trivia(&self) -> &[TriviaToken<'src>] {
|
|
self.slice_for(BoundaryLocation::EndOfFile, &self.trivia_before_boundary)
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
fn slice_for(&self, key: BoundaryLocation, map: &TriviaRangeMap) -> &[TriviaToken<'src>] {
|
|
match map.get(&key) {
|
|
Some(range) => {
|
|
// Ranges are guaranteed to be valid by construction
|
|
debug_assert!(range.start <= range.end);
|
|
debug_assert!(range.end <= self.tokens.len());
|
|
self.tokens.get(range.clone()).unwrap_or(&[])
|
|
}
|
|
None => &[],
|
|
}
|
|
}
|
|
}
|