From d519ecab2e987fc62b56d2f43905545d036422da Mon Sep 17 00:00:00 2001 From: dkanus Date: Mon, 11 Aug 2025 03:31:26 +0700 Subject: [PATCH] Fix documentation and comments --- rottlib/src/lexer/iterator.rs | 2 +- rottlib/src/lexer/mod.rs | 45 +++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/rottlib/src/lexer/iterator.rs b/rottlib/src/lexer/iterator.rs index d990128..4fc991b 100644 --- a/rottlib/src/lexer/iterator.rs +++ b/rottlib/src/lexer/iterator.rs @@ -139,7 +139,7 @@ impl<'src> TokenizedFile<'src> { /// ## Examples /// /// ```rust - /// use mycrate::{TokenizedFile, TokenLocation, Token}; + /// use super::{TokenizedFile, TokenLocation, Token}; /// let file = TokenizedFile::from_str("0 / 0"); /// assert_eq!( /// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token), diff --git a/rottlib/src/lexer/mod.rs b/rottlib/src/lexer/mod.rs index 8c8d3da..6d34a33 100644 --- a/rottlib/src/lexer/mod.rs +++ b/rottlib/src/lexer/mod.rs @@ -23,11 +23,11 @@ //! compiled with `debug` feature enabled. They live in the [`debug_tools`] //! extension trait, implemented for [`TokenizedFile`]. //! -//! ``` +//! ```rust //! // bring the trait into scope //! use lexer::DebugTools; //! -//! let file = TokenizedFile::from_str(src); +//! let file = TokenizedFile::from_str("local int myValue;"); //! file.debug_dump(); // pretty-print token layout //! let text = file.to_source(); // reconstruct original text //! ``` @@ -64,7 +64,7 @@ pub struct TokenPiece<'src> { pub length_utf16: usize, } -/// Defines location of a token inside [`TokenizedFile`] in a way, convenient +/// Defines location of a token inside [`TokenizedFile`] in a form convenient /// for communicating through LSP. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct TokenLocation { @@ -72,6 +72,8 @@ pub struct TokenLocation { pub line: usize, /// 0-based index of a token in the line, possibly including the token that /// has continued from the previous line. + /// + /// Columns count tokens, not bytes or chars. pub column: usize, } @@ -102,6 +104,10 @@ struct Tokenizer<'src> { slice_start_index: usize, /// When a multi-line token is being scanned, stores the 0-based line /// on which it started; [`None`] otherwise. + /// + /// `Some(line_idx)` iff the current line is within a multi-line token that + /// started on `line_idx`; it is consumed exactly once by + /// [`Self::commit_current_line`]. multi_line_start: Option, /// Set to [`true`] if the lexer reported any error tokens. had_errors: bool, @@ -141,7 +147,7 @@ impl<'src> TokenizedFile<'src> { /// ```rust /// let tokenized_file = TokenizedFile::from_str("function test() {}"); /// if tokenized_file.has_errors() { - /// println!("Error while parsing file: {}", path.display()); + /// println!("Error while parsing file."); /// } /// ``` #[inline] @@ -170,7 +176,7 @@ type TokenIdx = usize; /// Representation of a single physical line of the source file. /// -/// [`Range`] are used instead of slices to avoid creating +/// [`Range`] are used instead of slices to avoid creating /// a self-referential struct (with [`TokenizedFile`]), which rust forbids. #[derive(Clone, Debug, Hash, PartialEq, Eq)] struct Line { @@ -214,7 +220,7 @@ impl Line { /// Returns a range of tokens inside [`TokenizedFile::buffer`] that start /// on this line. /// - /// [`None`] means there is no such tokens. Otherwise range is guaranteed + /// [`None`] means there are no such tokens. Otherwise range is guaranteed /// to not be empty. #[inline] fn local_range(&self) -> Option> { @@ -225,7 +231,7 @@ impl Line { } } - /// Returns amount of tokens of the line. + /// Returns the number of tokens on this line. /// /// Counts both tokens that started on this line and tokens that continued /// from previous one. @@ -246,7 +252,8 @@ impl<'src> Tokenizer<'src> { } } - /// Handles tokens that never span multiple lines. + /// Handles simple tokens that *never* span multiple lines, allowing us to + /// skip a lot of work. fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) { if token_piece.token.is_newline() { self.line_number += 1; @@ -257,7 +264,7 @@ impl<'src> Tokenizer<'src> { } } - /// Handles tokens that may contain one or more newline characters. + /// Handles tokens that might contain one or more newline characters. fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) { let start_line = self.line_number; let newline_count = count_line_breaks(token_piece.lexeme); @@ -271,12 +278,15 @@ impl<'src> Tokenizer<'src> { // We only need to commit the line if this token actually ended the line if newline_count > 0 { self.commit_current_line(); - // We only need to insert one `Line::Spanned(base)` per *interior* - // newline, so `newline_count - 1` such lines - // (e.g. 2 line breaks in block comment -> it has - // exactly `1` interior line) - let insert_count = newline_count - 1; - for _ in 0..insert_count { + // We only need to insert one `Line::spanned(start_line)` per + // *interior* line: + // + // standalone | local int i = /* Now we start long comment + // spanned | with three line breaks and *exactly* two + // spanned | inner lines that contain nothing but + // spanned_with_tokens | comment bytes! */ = 0; + let inner_lines_count = newline_count - 1; + for _ in 0..inner_lines_count { self.lines.push(Line::spanned(start_line)); } // This is called *after* `commit_current_line()` cleared previous @@ -313,7 +323,7 @@ impl<'src> Tokenizer<'src> { /// Finishes tokenization, converting accumulated data into /// [`TokenizedFile`]. fn into_tokenized_file(mut self) -> TokenizedFile<'src> { - // Commit any trailing tokens + // Flush trailing tokens for which `commit` wasn't auto triggered self.commit_current_line(); // If we still have a `multi_line_start` // (i.e. a pure multi-line token with no local tokens on its last line), @@ -322,7 +332,6 @@ impl<'src> Tokenizer<'src> { self.lines.push(Line::spanned(from)); } - // Optimize for size self.buffer.shrink_to_fit(); self.lines.shrink_to_fit(); @@ -343,7 +352,7 @@ fn make_token_piece<'src>(token: Token, text: &'src str) -> TokenPiece<'src> { } } -/// Counts the number of new lines in given text. +/// Counts the number of newlines in given text. fn count_line_breaks(text: &str) -> usize { let mut bytes_iterator = text.as_bytes().iter().peekable(); let mut newline_count = 0;