From d519ecab2e987fc62b56d2f43905545d036422da Mon Sep 17 00:00:00 2001
From: dkanus <dkanus@gmail.com>
Date: Mon, 11 Aug 2025 03:31:26 +0700
Subject: [PATCH] Fix documentation and comments

---
 rottlib/src/lexer/iterator.rs |  2 +-
 rottlib/src/lexer/mod.rs      | 45 +++++++++++++++++++++--------------
 2 files changed, 28 insertions(+), 19 deletions(-)
diff --git a/rottlib/src/lexer/iterator.rs b/rottlib/src/lexer/iterator.rs
index d990128..4fc991b 100644
--- a/rottlib/src/lexer/iterator.rs
+++ b/rottlib/src/lexer/iterator.rs
@@ -139,7 +139,7 @@ impl<'src> TokenizedFile<'src> {
     /// ## Examples
     ///
     /// ```rust
-    /// use mycrate::{TokenizedFile, TokenLocation, Token};
+    /// use super::{TokenizedFile, TokenLocation, Token};
     /// let file = TokenizedFile::from_str("0 / 0");
     /// assert_eq!(
     ///     file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),
diff --git a/rottlib/src/lexer/mod.rs b/rottlib/src/lexer/mod.rs
index 8c8d3da..6d34a33 100644
--- a/rottlib/src/lexer/mod.rs
+++ b/rottlib/src/lexer/mod.rs
@@ -23,11 +23,11 @@
 //! compiled with `debug` feature enabled. They live in the [`debug_tools`]
 //! extension trait, implemented for [`TokenizedFile`].
 //!
-//! ```
+//! ```rust
 //! // bring the trait into scope
 //! use lexer::DebugTools;
 //!
-//! let file = TokenizedFile::from_str(src);
+//! let file = TokenizedFile::from_str("local int myValue;");
 //! file.debug_dump();              // pretty-print token layout
 //! let text = file.to_source();    // reconstruct original text
 //! ```
@@ -64,7 +64,7 @@ pub struct TokenPiece<'src> {
     pub length_utf16: usize,
 }
 
-/// Defines location of a token inside [`TokenizedFile`] in a way, convenient
+/// Defines location of a token inside [`TokenizedFile`] in a form convenient
 /// for communicating through LSP.
 #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub struct TokenLocation {
@@ -72,6 +72,8 @@ pub struct TokenLocation {
     pub line: usize,
     /// 0-based index of a token in the line, possibly including the token that
     /// has continued from the previous line.
+    ///
+    /// Columns count tokens, not bytes or chars.
     pub column: usize,
 }
 
@@ -102,6 +104,10 @@ struct Tokenizer<'src> {
     slice_start_index: usize,
     /// When a multi-line token is being scanned, stores the 0-based line
     /// on which it started; [`None`] otherwise.
+    ///
+    /// `Some(line_idx)` iff the current line is within a multi-line token that
+    /// started on `line_idx`; it is consumed exactly once by
+    /// [`Self::commit_current_line`].
     multi_line_start: Option<usize>,
     /// Set to [`true`] if the lexer reported any error tokens.
     had_errors: bool,
@@ -141,7 +147,7 @@ impl<'src> TokenizedFile<'src> {
     /// ```rust
     /// let tokenized_file = TokenizedFile::from_str("function test() {}");
     /// if tokenized_file.has_errors() {
-    ///     println!("Error while parsing file: {}", path.display());
+    ///     println!("Error while parsing file.");
     /// }
     /// ```
     #[inline]
@@ -170,7 +176,7 @@ type TokenIdx = usize;
 
 /// Representation of a single physical line of the source file.
 ///
-/// [`Range<TokenIndex>`] are used instead of slices to avoid creating
+/// [`Range<TokenIdx>`] are used instead of slices to avoid creating
 /// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
 struct Line {
@@ -214,7 +220,7 @@ impl Line {
     /// Returns a range of tokens inside [`TokenizedFile::buffer`] that start
     /// on this line.
     ///
-    /// [`None`] means there is no such tokens. Otherwise range is guaranteed
+    /// [`None`] means there are no such tokens. Otherwise range is guaranteed
     /// to not be empty.
     #[inline]
     fn local_range(&self) -> Option<Range<TokenIdx>> {
@@ -225,7 +231,7 @@ impl Line {
         }
     }
 
-    /// Returns amount of tokens of the line.
+    /// Returns the number of tokens on this line.
     ///
     /// Counts both tokens that started on this line and tokens that continued
     /// from previous one.
@@ -246,7 +252,8 @@ impl<'src> Tokenizer<'src> {
         }
     }
 
-    /// Handles tokens that never span multiple lines.
+    /// Handles simple tokens that *never* span multiple lines, allowing us to
+    /// skip a lot of work.
     fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) {
         if token_piece.token.is_newline() {
             self.line_number += 1;
@@ -257,7 +264,7 @@ impl<'src> Tokenizer<'src> {
         }
     }
 
-    /// Handles tokens that may contain one or more newline characters.
+    /// Handles tokens that might contain one or more newline characters.
     fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) {
         let start_line = self.line_number;
         let newline_count = count_line_breaks(token_piece.lexeme);
@@ -271,12 +278,15 @@ impl<'src> Tokenizer<'src> {
         // We only need to commit the line if this token actually ended the line
         if newline_count > 0 {
             self.commit_current_line();
-            // We only need to insert one `Line::Spanned(base)` per *interior*
-            // newline, so `newline_count - 1` such lines
-            // (e.g. 2 line breaks in block comment -> it has
-            // exactly `1` interior line)
-            let insert_count = newline_count - 1;
-            for _ in 0..insert_count {
+            // We only need to insert one `Line::spanned(start_line)` per
+            // *interior* line:
+            //
+            // standalone           | local int i = /* Now we start long comment
+            // spanned              | with three line breaks and *exactly* two
+            // spanned              | inner lines that contain nothing but
+            // spanned_with_tokens  | comment bytes! */ = 0;
+            let inner_lines_count = newline_count - 1;
+            for _ in 0..inner_lines_count {
                 self.lines.push(Line::spanned(start_line));
             }
             // This is called *after* `commit_current_line()` cleared previous
@@ -313,7 +323,7 @@ impl<'src> Tokenizer<'src> {
     /// Finishes tokenization, converting accumulated data into
     /// [`TokenizedFile`].
     fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
-        // Commit any trailing tokens
+        // Flush trailing tokens for which `commit` wasn't auto triggered
         self.commit_current_line();
         // If we still have a `multi_line_start`
         // (i.e. a pure multi-line token with no local tokens on its last line),
@@ -322,7 +332,6 @@ impl<'src> Tokenizer<'src> {
             self.lines.push(Line::spanned(from));
         }
 
-        // Optimize for size
         self.buffer.shrink_to_fit();
         self.lines.shrink_to_fit();
 
@@ -343,7 +352,7 @@ fn make_token_piece<'src>(token: Token, text: &'src str) -> TokenPiece<'src> {
     }
 }
 
-/// Counts the number of new lines in given text.
+/// Counts the number of newlines in given text.
 fn count_line_breaks(text: &str) -> usize {
     let mut bytes_iterator = text.as_bytes().iter().peekable();
     let mut newline_count = 0;