Fix documentation and comments
This commit is contained in:
parent
933722bd42
commit
d519ecab2e
@ -139,7 +139,7 @@ impl<'src> TokenizedFile<'src> {
|
|||||||
/// ## Examples
|
/// ## Examples
|
||||||
///
|
///
|
||||||
/// ```rust
|
/// ```rust
|
||||||
/// use mycrate::{TokenizedFile, TokenLocation, Token};
|
/// use super::{TokenizedFile, TokenLocation, Token};
|
||||||
/// let file = TokenizedFile::from_str("0 / 0");
|
/// let file = TokenizedFile::from_str("0 / 0");
|
||||||
/// assert_eq!(
|
/// assert_eq!(
|
||||||
/// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),
|
/// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),
|
||||||
|
@ -23,11 +23,11 @@
|
|||||||
//! compiled with `debug` feature enabled. They live in the [`debug_tools`]
|
//! compiled with `debug` feature enabled. They live in the [`debug_tools`]
|
||||||
//! extension trait, implemented for [`TokenizedFile`].
|
//! extension trait, implemented for [`TokenizedFile`].
|
||||||
//!
|
//!
|
||||||
//! ```
|
//! ```rust
|
||||||
//! // bring the trait into scope
|
//! // bring the trait into scope
|
||||||
//! use lexer::DebugTools;
|
//! use lexer::DebugTools;
|
||||||
//!
|
//!
|
||||||
//! let file = TokenizedFile::from_str(src);
|
//! let file = TokenizedFile::from_str("local int myValue;");
|
||||||
//! file.debug_dump(); // pretty-print token layout
|
//! file.debug_dump(); // pretty-print token layout
|
||||||
//! let text = file.to_source(); // reconstruct original text
|
//! let text = file.to_source(); // reconstruct original text
|
||||||
//! ```
|
//! ```
|
||||||
@ -64,7 +64,7 @@ pub struct TokenPiece<'src> {
|
|||||||
pub length_utf16: usize,
|
pub length_utf16: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Defines location of a token inside [`TokenizedFile`] in a way, convenient
|
/// Defines location of a token inside [`TokenizedFile`] in a form convenient
|
||||||
/// for communicating through LSP.
|
/// for communicating through LSP.
|
||||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
pub struct TokenLocation {
|
pub struct TokenLocation {
|
||||||
@ -72,6 +72,8 @@ pub struct TokenLocation {
|
|||||||
pub line: usize,
|
pub line: usize,
|
||||||
/// 0-based index of a token in the line, possibly including the token that
|
/// 0-based index of a token in the line, possibly including the token that
|
||||||
/// has continued from the previous line.
|
/// has continued from the previous line.
|
||||||
|
///
|
||||||
|
/// Columns count tokens, not bytes or chars.
|
||||||
pub column: usize,
|
pub column: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,6 +104,10 @@ struct Tokenizer<'src> {
|
|||||||
slice_start_index: usize,
|
slice_start_index: usize,
|
||||||
/// When a multi-line token is being scanned, stores the 0-based line
|
/// When a multi-line token is being scanned, stores the 0-based line
|
||||||
/// on which it started; [`None`] otherwise.
|
/// on which it started; [`None`] otherwise.
|
||||||
|
///
|
||||||
|
/// `Some(line_idx)` iff the current line is within a multi-line token that
|
||||||
|
/// started on `line_idx`; it is consumed exactly once by
|
||||||
|
/// [`Self::commit_current_line`].
|
||||||
multi_line_start: Option<usize>,
|
multi_line_start: Option<usize>,
|
||||||
/// Set to [`true`] if the lexer reported any error tokens.
|
/// Set to [`true`] if the lexer reported any error tokens.
|
||||||
had_errors: bool,
|
had_errors: bool,
|
||||||
@ -141,7 +147,7 @@ impl<'src> TokenizedFile<'src> {
|
|||||||
/// ```rust
|
/// ```rust
|
||||||
/// let tokenized_file = TokenizedFile::from_str("function test() {}");
|
/// let tokenized_file = TokenizedFile::from_str("function test() {}");
|
||||||
/// if tokenized_file.has_errors() {
|
/// if tokenized_file.has_errors() {
|
||||||
/// println!("Error while parsing file: {}", path.display());
|
/// println!("Error while parsing file.");
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -170,7 +176,7 @@ type TokenIdx = usize;
|
|||||||
|
|
||||||
/// Representation of a single physical line of the source file.
|
/// Representation of a single physical line of the source file.
|
||||||
///
|
///
|
||||||
/// [`Range<TokenIndex>`] are used instead of slices to avoid creating
|
/// [`Range<TokenIdx>`] are used instead of slices to avoid creating
|
||||||
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
|
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
|
||||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||||
struct Line {
|
struct Line {
|
||||||
@ -214,7 +220,7 @@ impl Line {
|
|||||||
/// Returns a range of tokens inside [`TokenizedFile::buffer`] that start
|
/// Returns a range of tokens inside [`TokenizedFile::buffer`] that start
|
||||||
/// on this line.
|
/// on this line.
|
||||||
///
|
///
|
||||||
/// [`None`] means there is no such tokens. Otherwise range is guaranteed
|
/// [`None`] means there are no such tokens. Otherwise range is guaranteed
|
||||||
/// to not be empty.
|
/// to not be empty.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn local_range(&self) -> Option<Range<TokenIdx>> {
|
fn local_range(&self) -> Option<Range<TokenIdx>> {
|
||||||
@ -225,7 +231,7 @@ impl Line {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns amount of tokens of the line.
|
/// Returns the number of tokens on this line.
|
||||||
///
|
///
|
||||||
/// Counts both tokens that started on this line and tokens that continued
|
/// Counts both tokens that started on this line and tokens that continued
|
||||||
/// from previous one.
|
/// from previous one.
|
||||||
@ -246,7 +252,8 @@ impl<'src> Tokenizer<'src> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handles tokens that never span multiple lines.
|
/// Handles simple tokens that *never* span multiple lines, allowing us to
|
||||||
|
/// skip a lot of work.
|
||||||
fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
||||||
if token_piece.token.is_newline() {
|
if token_piece.token.is_newline() {
|
||||||
self.line_number += 1;
|
self.line_number += 1;
|
||||||
@ -257,7 +264,7 @@ impl<'src> Tokenizer<'src> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handles tokens that may contain one or more newline characters.
|
/// Handles tokens that might contain one or more newline characters.
|
||||||
fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
||||||
let start_line = self.line_number;
|
let start_line = self.line_number;
|
||||||
let newline_count = count_line_breaks(token_piece.lexeme);
|
let newline_count = count_line_breaks(token_piece.lexeme);
|
||||||
@ -271,12 +278,15 @@ impl<'src> Tokenizer<'src> {
|
|||||||
// We only need to commit the line if this token actually ended the line
|
// We only need to commit the line if this token actually ended the line
|
||||||
if newline_count > 0 {
|
if newline_count > 0 {
|
||||||
self.commit_current_line();
|
self.commit_current_line();
|
||||||
// We only need to insert one `Line::Spanned(base)` per *interior*
|
// We only need to insert one `Line::spanned(start_line)` per
|
||||||
// newline, so `newline_count - 1` such lines
|
// *interior* line:
|
||||||
// (e.g. 2 line breaks in block comment -> it has
|
//
|
||||||
// exactly `1` interior line)
|
// standalone | local int i = /* Now we start long comment
|
||||||
let insert_count = newline_count - 1;
|
// spanned | with three line breaks and *exactly* two
|
||||||
for _ in 0..insert_count {
|
// spanned | inner lines that contain nothing but
|
||||||
|
// spanned_with_tokens | comment bytes! */ = 0;
|
||||||
|
let inner_lines_count = newline_count - 1;
|
||||||
|
for _ in 0..inner_lines_count {
|
||||||
self.lines.push(Line::spanned(start_line));
|
self.lines.push(Line::spanned(start_line));
|
||||||
}
|
}
|
||||||
// This is called *after* `commit_current_line()` cleared previous
|
// This is called *after* `commit_current_line()` cleared previous
|
||||||
@ -313,7 +323,7 @@ impl<'src> Tokenizer<'src> {
|
|||||||
/// Finishes tokenization, converting accumulated data into
|
/// Finishes tokenization, converting accumulated data into
|
||||||
/// [`TokenizedFile`].
|
/// [`TokenizedFile`].
|
||||||
fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
|
fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
|
||||||
// Commit any trailing tokens
|
// Flush trailing tokens for which `commit` wasn't auto triggered
|
||||||
self.commit_current_line();
|
self.commit_current_line();
|
||||||
// If we still have a `multi_line_start`
|
// If we still have a `multi_line_start`
|
||||||
// (i.e. a pure multi-line token with no local tokens on its last line),
|
// (i.e. a pure multi-line token with no local tokens on its last line),
|
||||||
@ -322,7 +332,6 @@ impl<'src> Tokenizer<'src> {
|
|||||||
self.lines.push(Line::spanned(from));
|
self.lines.push(Line::spanned(from));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optimize for size
|
|
||||||
self.buffer.shrink_to_fit();
|
self.buffer.shrink_to_fit();
|
||||||
self.lines.shrink_to_fit();
|
self.lines.shrink_to_fit();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user