Add EndOfFile variant to TokenLocation

This commit is contained in:
dkanus 2025-09-16 08:19:42 +07:00
parent 7ed934e2b8
commit 0fa8140644
2 changed files with 47 additions and 33 deletions

View File

@ -28,9 +28,7 @@ use super::{TokenLocation, TokenPiece, TokenizedFile};
#[derive(Clone, Debug)]
pub struct Tokens<'src> {
/// [`TokenLocation`] of the next token to be returned.
///
/// [`None`] means the iterator has been exhausted.
cursor: Option<TokenLocation>,
cursor: TokenLocation,
/// [`TokenizedFile`] whose tokens we're iterating over.
source_file: &'src TokenizedFile<'src>,
/// When `true`, whitespace tokens are skipped.
@ -51,50 +49,57 @@ impl<'src> Tokens<'src> {
// Returns the position of the next new token, skipping carried-over pieces
// and blank lines.
fn advance_position(&self, mut position: TokenLocation) -> Option<TokenLocation> {
if let Some(current_line) = self.source_file.lines.get(position.line) {
fn advance_position(&self, position: TokenLocation) -> TokenLocation {
let TokenLocation::Position {
mut line,
mut column,
} = position
else {
return TokenLocation::EndOfFile;
};
if let Some(current_line) = self.source_file.lines.get(line) {
// `Line::len()` also counts a possible token that continued from
// the previous line.
if position.column + 1 < current_line.len() {
position.column += 1;
return Some(position);
if column + 1 < current_line.len() {
column += 1;
return TokenLocation::Position { line, column };
}
}
// Current line is exhausted: walk downward until we find the first line
// that **owns local tokens**, because we only want *new* token,
// not continued from previous lines (they were already iterated over).
position.line += 1;
while let Some(next_line) = self.source_file.lines.get(position.line) {
line += 1;
while let Some(next_line) = self.source_file.lines.get(line) {
if next_line.local_range().is_some() {
// Start at the first *local* token,
// skipping any carried-over one
position.column = if next_line.continued_from.is_some() {
column = if next_line.continued_from.is_some() {
1
} else {
0
};
return Some(position);
return TokenLocation::Position { line, column };
}
position.line += 1; // keep skipping empty / pure-carried lines
line += 1; // keep skipping empty / pure-carried lines
}
// No more tokens.
None
TokenLocation::EndOfFile
}
// Creates a new iterator.
fn new(source_file: &'src TokenizedFile) -> Tokens<'src> {
let mut new_iterator = Tokens {
source_file,
cursor: Some(TokenLocation { line: 0, column: 0 }),
cursor: TokenLocation::Position { line: 0, column: 0 },
skip_whitespace: false,
};
// We need to land on the first existing token so [`Iterator::next`]
// can assume cursor is valid.
while let Some(token_position) = new_iterator.cursor {
if new_iterator.source_file.get(token_position).is_some() {
while new_iterator.cursor != TokenLocation::EndOfFile {
if new_iterator.source_file.get(new_iterator.cursor).is_some() {
break;
}
new_iterator.cursor = new_iterator.advance_position(token_position);
new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor);
}
new_iterator
}
@ -105,16 +110,17 @@ impl<'src> Iterator for Tokens<'src> {
fn next(&mut self) -> Option<Self::Item> {
// We only ever loop to discard whitespaces when the flag is on
loop {
let current_cursor = self.cursor?;
let token_piece = *self.source_file.get(current_cursor)?;
self.cursor = self.advance_position(current_cursor);
while self.cursor != TokenLocation::EndOfFile {
let token_location = self.cursor;
let token_piece = *self.source_file.get(self.cursor)?;
self.cursor = self.advance_position(self.cursor);
// Optional whitespace-skip
if !self.skip_whitespace || !token_piece.token.is_whitespace() {
return Some((current_cursor, token_piece));
return Some((token_location, token_piece));
}
}
None
}
}
@ -148,8 +154,11 @@ impl<'src> TokenizedFile<'src> {
/// ```
#[track_caller]
pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> {
let line = self.lines.get(position.line)?;
let column = position.column;
let TokenLocation::Position { line, column } = position else {
return None;
};
let line = self.lines.get(line)?;
let column = column;
if column >= line.len() {
return None;
}

View File

@ -67,14 +67,19 @@ pub struct TokenPiece<'src> {
/// Defines location of a token inside [`TokenizedFile`] in a form convenient
/// for communicating through LSP.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct TokenLocation {
/// 0-based line number.
pub line: usize,
/// 0-based index of a token in the line, possibly including the token that
/// has continued from the previous line.
///
/// Columns count tokens, not bytes or chars.
pub column: usize,
pub enum TokenLocation {
/// Actual position of some token in the file.
Position {
/// 0-based line number.
line: usize,
/// 0-based index of a token in the line, possibly including the token that
/// has continued from the previous line.
///
/// Columns count tokens, not bytes or chars.
column: usize,
},
/// Position af the end-of-file.
EndOfFile,
}
/// A tokenized, lossless representation of an UnrealScript source file.