Rename TokenSpan to TokenPiece and tidy code layout
This commit is contained in:
parent
579c2a4d3d
commit
9ff20c7a60
@ -46,7 +46,7 @@ const DEFAULT_TOKEN_BUFFER_CAPACITY: usize = 20_000;
|
|||||||
///
|
///
|
||||||
/// *No absolute coordinates* are stored - they are recomputed per line.
|
/// *No absolute coordinates* are stored - they are recomputed per line.
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct TokenSpan<'src> {
|
pub struct TokenPiece<'src> {
|
||||||
pub lexeme: &'src str,
|
pub lexeme: &'src str,
|
||||||
pub token: Token,
|
pub token: Token,
|
||||||
pub length_utf16: usize,
|
pub length_utf16: usize,
|
||||||
@ -60,28 +60,32 @@ pub struct TokenLocation {
|
|||||||
column: usize,
|
column: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl PartialEq for TokenLocation {
|
||||||
|
fn eq(&self, other: &TokenLocation) -> bool {
|
||||||
|
self.line_number == other.line_number && self.column == other.column
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for TokenLocation {
|
||||||
|
fn partial_cmp(&self, other: &TokenLocation) -> Option<Ordering> {
|
||||||
|
if self.line_number == other.line_number {
|
||||||
|
self.column.partial_cmp(&other.column)
|
||||||
|
} else {
|
||||||
|
self.line_number.partial_cmp(&other.line_number)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Type for indexing lines in a [`TokenizedFile`].
|
/// Type for indexing lines in a [`TokenizedFile`].
|
||||||
type LineNumber = usize;
|
type LineNumber = usize;
|
||||||
|
|
||||||
/// Type for specific tokens inside each [`Line`].
|
/// Type for specific tokens inside each [`Line`].
|
||||||
type TokenIndex = usize;
|
type TokenIndex = usize;
|
||||||
|
|
||||||
/// Representation of a single physical line of the source file.
|
|
||||||
///
|
|
||||||
/// [`Range<TokenIndex>`] are used instead of slices to avoid creating
|
|
||||||
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
|
|
||||||
#[derive(Clone)]
|
|
||||||
struct Line {
|
|
||||||
/// Token that began on an earlier line (`None` for standalone lines).
|
|
||||||
continued_from: Option<LineNumber>,
|
|
||||||
/// Contiguous tokens that started on this line (`start >= end` iff empty).
|
|
||||||
local_range: Range<TokenIndex>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A tokenized, lossless representation of an UnrealScript source file.
|
/// A tokenized, lossless representation of an UnrealScript source file.
|
||||||
pub struct TokenizedFile<'src> {
|
pub struct TokenizedFile<'src> {
|
||||||
/// Arena of every token span in this file.
|
/// Arena of every token span in this file.
|
||||||
buffer: Vec<TokenSpan<'src>>,
|
buffer: Vec<TokenPiece<'src>>,
|
||||||
/// Mapping that provides an easy and efficient access to tokens by
|
/// Mapping that provides an easy and efficient access to tokens by
|
||||||
/// line number.
|
/// line number.
|
||||||
lines: Vec<Line>,
|
lines: Vec<Line>,
|
||||||
@ -91,8 +95,8 @@ pub struct TokenizedFile<'src> {
|
|||||||
|
|
||||||
/// Mutable state that encapsulates data needed during the tokenization loop.
|
/// Mutable state that encapsulates data needed during the tokenization loop.
|
||||||
struct Tokenizer<'src> {
|
struct Tokenizer<'src> {
|
||||||
/// Arena that owns every [`TokenSpan`] produced for the file.
|
/// Arena that owns every [`TokenPiece`] produced for the file.
|
||||||
buffer: Vec<TokenSpan<'src>>,
|
buffer: Vec<TokenPiece<'src>>,
|
||||||
/// Mapping from physical line number to the tokens that belong to it.
|
/// Mapping from physical line number to the tokens that belong to it.
|
||||||
lines: Vec<Line>,
|
lines: Vec<Line>,
|
||||||
/// The current 0-based physical line number.
|
/// The current 0-based physical line number.
|
||||||
@ -143,9 +147,71 @@ impl<'src> TokenizedFile<'src> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Representation of a single physical line of the source file.
|
||||||
|
///
|
||||||
|
/// [`Range<TokenIndex>`] are used instead of slices to avoid creating
|
||||||
|
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct Line {
|
||||||
|
/// Token that began on an earlier line (`None` for standalone lines).
|
||||||
|
continued_from: Option<LineNumber>,
|
||||||
|
/// Contiguous tokens that started on this line (`start >= end` iff empty).
|
||||||
|
local_range: Range<TokenIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Line {
|
||||||
|
/// Creates a standalone line that owns a contiguous slice in
|
||||||
|
/// the [`TokenizedFile::buffer`] arena.
|
||||||
|
fn standalone(locals: Range<TokenIndex>) -> Line {
|
||||||
|
Line {
|
||||||
|
continued_from: None,
|
||||||
|
local_range: locals,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a line that is part of a multi-line token started on
|
||||||
|
/// another line, referencing the 0-based index of its origin.
|
||||||
|
fn spanned(carried: LineNumber) -> Line {
|
||||||
|
Line {
|
||||||
|
continued_from: Some(carried),
|
||||||
|
local_range: 0..0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a line that is part of a multi-line token started on
|
||||||
|
/// another line and also contains additional tokens local to itself.
|
||||||
|
fn spanned_with_tokens(carried: LineNumber, locals: Range<TokenIndex>) -> Line {
|
||||||
|
Line {
|
||||||
|
continued_from: Some(carried),
|
||||||
|
local_range: locals,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a range of tokens inside [`TokenizedFile::buffer`] that start
|
||||||
|
/// on this line.
|
||||||
|
///
|
||||||
|
/// [`None`] means there is no such tokens. Otherwise range is guaranteed
|
||||||
|
/// to not be empty.
|
||||||
|
fn local_range(&self) -> Option<Range<TokenIndex>> {
|
||||||
|
if self.local_range.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(self.local_range.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns amount of tokens of the line.
|
||||||
|
///
|
||||||
|
/// Counts both tokens that started on this line and tokens that continued
|
||||||
|
/// from previous one.
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
(self.continued_from.is_some() as usize) + (self.local_range.end - self.local_range.start)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'src> Tokenizer<'src> {
|
impl<'src> Tokenizer<'src> {
|
||||||
/// Handles a token span and dispatches to the appropriate handler.
|
/// Handles a token span and dispatches to the appropriate handler.
|
||||||
fn process_token_span(&mut self, token_span: TokenSpan<'src>) {
|
fn process_token_span(&mut self, token_span: TokenPiece<'src>) {
|
||||||
if token_can_span_lines(&token_span.token) {
|
if token_can_span_lines(&token_span.token) {
|
||||||
self.process_multi_line_token(token_span);
|
self.process_multi_line_token(token_span);
|
||||||
} else {
|
} else {
|
||||||
@ -154,7 +220,7 @@ impl<'src> Tokenizer<'src> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Handles tokens that never span multiple lines.
|
/// Handles tokens that never span multiple lines.
|
||||||
fn process_single_line_token(&mut self, token_span: TokenSpan<'src>) {
|
fn process_single_line_token(&mut self, token_span: TokenPiece<'src>) {
|
||||||
if token_is_newline(&token_span.token) {
|
if token_is_newline(&token_span.token) {
|
||||||
self.line_number += 1;
|
self.line_number += 1;
|
||||||
self.buffer.push(token_span);
|
self.buffer.push(token_span);
|
||||||
@ -165,7 +231,7 @@ impl<'src> Tokenizer<'src> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Handles tokens that may contain one or more newline characters.
|
/// Handles tokens that may contain one or more newline characters.
|
||||||
fn process_multi_line_token(&mut self, token_span: TokenSpan<'src>) {
|
fn process_multi_line_token(&mut self, token_span: TokenPiece<'src>) {
|
||||||
let start_line = self.line_number;
|
let start_line = self.line_number;
|
||||||
let newline_count = count_newlines(token_span.lexeme);
|
let newline_count = count_newlines(token_span.lexeme);
|
||||||
|
|
||||||
@ -240,9 +306,9 @@ impl<'src> Tokenizer<'src> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_span<'src>(token: Token, text: &'src str) -> TokenSpan<'src> {
|
fn build_span<'src>(token: Token, text: &'src str) -> TokenPiece<'src> {
|
||||||
let length_utf16 = text.encode_utf16().count();
|
let length_utf16 = text.encode_utf16().count();
|
||||||
TokenSpan {
|
TokenPiece {
|
||||||
lexeme: text,
|
lexeme: text,
|
||||||
token,
|
token,
|
||||||
length_utf16,
|
length_utf16,
|
||||||
@ -260,72 +326,6 @@ fn token_can_span_lines(token: &Token) -> bool {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Line {
|
|
||||||
/// Creates a standalone line that owns a contiguous slice in
|
|
||||||
/// the [`TokenizedFile::buffer`] arena.
|
|
||||||
fn standalone(locals: Range<TokenIndex>) -> Line {
|
|
||||||
Line {
|
|
||||||
continued_from: None,
|
|
||||||
local_range: locals,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates a line that is part of a multi-line token started on
|
|
||||||
/// another line, referencing the 0-based index of its origin.
|
|
||||||
fn spanned(carried: LineNumber) -> Line {
|
|
||||||
Line {
|
|
||||||
continued_from: Some(carried),
|
|
||||||
local_range: 0..0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates a line that is part of a multi-line token started on
|
|
||||||
/// another line and also contains additional tokens local to itself.
|
|
||||||
fn spanned_with_tokens(carried: LineNumber, locals: Range<TokenIndex>) -> Line {
|
|
||||||
Line {
|
|
||||||
continued_from: Some(carried),
|
|
||||||
local_range: locals,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a range of tokens inside [`TokenizedFile::buffer`] that start
|
|
||||||
/// on this line.
|
|
||||||
///
|
|
||||||
/// [`None`] means there is no such tokens. Otherwise range is guaranteed
|
|
||||||
/// to not be empty.
|
|
||||||
fn local_range(&self) -> Option<Range<TokenIndex>> {
|
|
||||||
if self.local_range.is_empty() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(self.local_range.clone())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns amount of tokens of the line.
|
|
||||||
///
|
|
||||||
/// Counts both tokens that started on this line and tokens that continued
|
|
||||||
/// from previous one.
|
|
||||||
fn len(&self) -> usize {
|
|
||||||
(self.continued_from.is_some() as usize) + (self.local_range.end - self.local_range.start)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialEq for TokenLocation {
|
|
||||||
fn eq(&self, other: &TokenLocation) -> bool {
|
|
||||||
self.line_number == other.line_number && self.column == other.column
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PartialOrd for TokenLocation {
|
|
||||||
fn partial_cmp(&self, other: &TokenLocation) -> Option<Ordering> {
|
|
||||||
if self.line_number == other.line_number {
|
|
||||||
self.column.partial_cmp(&other.column)
|
|
||||||
} else {
|
|
||||||
self.line_number.partial_cmp(&other.line_number)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Counts the number of new lines in given text.
|
/// Counts the number of new lines in given text.
|
||||||
fn count_newlines(text: &str) -> usize {
|
fn count_newlines(text: &str) -> usize {
|
||||||
let mut bytes_iterator = text.as_bytes().iter().peekable();
|
let mut bytes_iterator = text.as_bytes().iter().peekable();
|
||||||
|
Loading…
Reference in New Issue
Block a user