//! Records trivia separately from significant tokens so parser code can work //! with significant tokens without losing comments, whitespace, or line //! structure. //! //! Tokens considered *trivia* are: //! //! 1. [`crate::lexer::Token::LineComment`]; //! 2. [`crate::lexer::Token::BlockComment`]; //! 3. [`crate::lexer::Token::Newline`]; //! 4. [`crate::lexer::Token::Whitespace`]. //! //! Every other token is considered *significant*. //! //! ## Required usage //! //! This is an internal helper. Callers must follow the protocol below. //! //! [`TriviaIndexBuilder`] must be driven over a single token stream in //! strictly increasing [`TokenPosition`] order. //! Call [`TriviaIndexBuilder::record_trivia`] for each trivia token in source //! order, and call [`TriviaIndexBuilder::record_significant_token`] for each //! significant token. //! //! After the last significant token has been processed, call //! [`TriviaIndexBuilder::into_index`] to attach any trailing trivia. //! //! Violating this protocol is a logic error. use crate::lexer::TokenPosition; /// Kinds of trivia tokens corresponding to variants of [`crate::lexer::Token`]. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum TriviaKind { Whitespace, Newline, LineComment, BlockComment, } impl std::convert::TryFrom for TriviaKind { type Error = (); fn try_from(token: crate::lexer::Token) -> Result { use crate::lexer::Token; match token { Token::Whitespace => Ok(Self::Whitespace), Token::Newline => Ok(Self::Newline), Token::LineComment => Ok(Self::LineComment), Token::BlockComment => Ok(Self::BlockComment), _ => Err(()), } } } /// A recorded trivia token. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub struct TriviaToken<'src> { /// Kind of trivia token. pub kind: TriviaKind, /// Source text of the token. pub text: &'src str, /// Location of this trivia token in the token stream. pub position: TokenPosition, } type TriviaRangeMap = std::collections::HashMap>; /// Extends [`TokenPosition`] with start-of-file and end-of-file markers. /// /// Regular [`TokenPosition`] values are enough for significant tokens, but /// trivia also needs to represent content before the first significant token /// and after the last one. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] enum BoundaryLocation { StartOfFile, Token(TokenPosition), EndOfFile, } /// Immutable index over recorded trivia. /// /// Provides O(1) access to trivia immediately before or after any significant /// token, as well as file-leading and file-trailing trivia. Returned slices /// borrow the index, and the contained token texts live for `'src`. #[derive(Clone, Debug, PartialEq, Eq, Default)] #[allow(dead_code)] pub struct TriviaIndex<'src> { /// All trivia tokens, stored contiguously in file order. tokens: Vec>, /// Maps a trivia boundary location to the trivia tokens stored right /// after it. trivia_after_boundary: TriviaRangeMap, /// Maps a trivia boundary location to the trivia tokens stored right /// before it. trivia_before_boundary: TriviaRangeMap, } /// Mutable builder for [`TriviaIndex`]. /// /// Records trivia between successive significant tokens while the caller walks /// a token stream in file order. Once all tokens have been processed, call /// [`TriviaIndexBuilder::into_index`] to finalize the index. #[derive(Debug)] #[allow(dead_code)] pub struct TriviaIndexBuilder<'src> { /// All trivia tokens, stored contiguously in file order. tokens: Vec>, /// Maps boundary location to the trivia tokens stored right after it. trivia_after_boundary: TriviaRangeMap, /// Maps boundary location to the trivia tokens stored right before it. trivia_before_boundary: TriviaRangeMap, /// Trivia collected since the last significant token (or file start), /// not yet attached to a right boundary. pending_trivia: Vec>, /// Left boundary of the currently open gap. current_left_boundary: BoundaryLocation, } impl Default for TriviaIndexBuilder<'_> { fn default() -> Self { Self { tokens: Vec::new(), trivia_after_boundary: TriviaRangeMap::default(), trivia_before_boundary: TriviaRangeMap::default(), pending_trivia: Vec::new(), current_left_boundary: BoundaryLocation::StartOfFile, } } } impl<'src> TriviaIndexBuilder<'src> { /// Records `token` as trivia. /// /// Tokens must be recorded in file order. pub(crate) fn record_trivia(&mut self, token: TriviaToken<'src>) { #[cfg(debug_assertions)] self.debug_assert_position_is_in_order(token.position); self.pending_trivia.push(token); } /// Records a significant token at `position`. /// /// Positions must be recorded in file order. pub(crate) fn record_significant_token(&mut self, position: TokenPosition) { let right_boundary = BoundaryLocation::Token(position); #[cfg(debug_assertions)] self.debug_assert_position_is_in_order(position); self.flush_pending_trivia_to_boundary(right_boundary); self.current_left_boundary = right_boundary; } // Stores one trivia range under both neighboring boundaries so lookups // from either side return the same slice. fn flush_pending_trivia_to_boundary(&mut self, right_boundary: BoundaryLocation) { if self.pending_trivia.is_empty() { return; } let trivia_start = self.tokens.len(); self.tokens.append(&mut self.pending_trivia); let trivia_end = self.tokens.len(); self.trivia_after_boundary .insert(self.current_left_boundary, trivia_start..trivia_end); self.trivia_before_boundary .insert(right_boundary, trivia_start..trivia_end); } /// Finalizes the builder and returns the completed trivia index. /// /// Any pending trivia is recorded as trailing trivia. #[must_use] #[allow(dead_code)] pub(crate) fn into_index(mut self) -> TriviaIndex<'src> { self.flush_pending_trivia_to_boundary(BoundaryLocation::EndOfFile); TriviaIndex { tokens: self.tokens, trivia_after_boundary: self.trivia_after_boundary, trivia_before_boundary: self.trivia_before_boundary, } } // Catches out-of-order recording during development; the builder relies // on this ordering invariant. #[cfg(debug_assertions)] fn debug_assert_position_is_in_order(&self, position: TokenPosition) { let location = BoundaryLocation::Token(position); debug_assert!(location > self.current_left_boundary); if let Some(last) = self.pending_trivia.last() { debug_assert!(last.position < position); } } } impl<'src> TriviaIndex<'src> { /// Returns the trivia immediately after the significant token at /// `position`. /// /// Returns an empty slice if `position` does not identify a recorded /// significant token or if no trivia was recorded after it. #[must_use] #[allow(dead_code)] pub(crate) fn trivia_after_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] { self.slice_for( BoundaryLocation::Token(position), &self.trivia_after_boundary, ) } /// Returns the trivia immediately before the significant token at `position`. /// /// Returns an empty slice if `position` does not identify a recorded /// significant token or if no trivia was recorded before it. #[must_use] #[allow(dead_code)] pub(crate) fn trivia_before_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] { self.slice_for( BoundaryLocation::Token(position), &self.trivia_before_boundary, ) } /// Returns the trivia before the first significant token. /// /// If no significant tokens were recorded, returns all recorded trivia. #[must_use] #[allow(dead_code)] pub(crate) fn leading_trivia(&self) -> &[TriviaToken<'src>] { self.slice_for(BoundaryLocation::StartOfFile, &self.trivia_after_boundary) } /// Returns the trivia after the last significant token. /// /// If no significant tokens were recorded, returns all recorded trivia. #[must_use] #[allow(dead_code)] pub(crate) fn trailing_trivia(&self) -> &[TriviaToken<'src>] { self.slice_for(BoundaryLocation::EndOfFile, &self.trivia_before_boundary) } #[allow(dead_code)] fn slice_for(&self, key: BoundaryLocation, map: &TriviaRangeMap) -> &[TriviaToken<'src>] { match map.get(&key) { Some(range) => { // Ranges are guaranteed to be valid by construction debug_assert!(range.start <= range.end); debug_assert!(range.end <= self.tokens.len()); self.tokens.get(range.clone()).unwrap_or(&[]) } None => &[], } } }