rott/rottlib/src/lexer/debug_tools.rs
dkanus 579c2a4d3d Refactor Line
Previous definition of `Line` type was obnoxious and too difficult to
work with. This one should make iterator implementation much easier and
has clearer structure on its own.
2025-08-06 23:17:55 +07:00

84 lines
2.9 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Debug-only helpers for [`TokenizedFile`]
//!
//! This module is **compiled only if**
//!
//! * the current build profile has `debug_assertions` enabled, or
//! * the crate is built with the `debug` cargo feature.
//!
//! These checks have been moved to the parent module.
/// A technical trait that adds debug helpers to the lexer.
pub trait DebugTools {
/// Pretty-prints the internal layout of the tokenised file - useful when
/// writing new passes or hunting lexer bugs.
///
/// This method writes the layout directly to standard output.
///
/// The format is unspecified, may change, and is not intended for
/// external tools.
///
/// Each line in the printed layout starts with its 0-based number for
/// convenience.
fn dump_debug_layout(&self);
/// Reconstructs the exact, lossless source text that was fed to
/// [`super::TokenizedFile::from_source`] from internal representation -
/// useful for manually verifying that the lexer works.
fn reconstruct_source(&self) -> String;
}
impl<'src> DebugTools for super::TokenizedFile<'src> {
fn reconstruct_source(&self) -> String {
self.buffer.iter().map(|span| span.lexeme).collect()
}
fn dump_debug_layout(&self) {
for (row_idx, line) in self.lines.iter().enumerate() {
println!("Line {}", row_idx + 1);
match (line.continued_from, line.local_range()) {
// Stand-alone line (all tokens start here)
(None, Some(range)) => {
println!("\t[Standalone]");
dump_spans(&self.buffer[range.clone()]);
}
// Pure continuation - the only thing on this line is
// the remainder of a multi-line token that started earlier.
(Some(origin_row), None) => {
println!(
"\t[Continued from line {} no new tokens here]",
origin_row + 1
);
}
// Continuation **plus** some fresh tokens that begin here.
(Some(origin_row), Some(range)) => {
println!("\t[Continued from line {} + new tokens]", origin_row + 1);
dump_spans(&self.buffer[range.clone()]);
}
// An empty physical line (should be rare, but let's be safe).
(None, None) => {
println!("\t[Empty line]");
}
}
}
}
}
/// Helper that prints every span in `spans` together with its UTF-16
/// column boundaries.
fn dump_spans<'a>(spans: &[super::TokenPiece<'a>]) {
let mut col_utf16 = 0usize;
for span in spans {
let start = col_utf16;
let end = start + span.length_utf16;
println!(
"\t\t{:?} @ {}{}: {:?}",
span.token, start, end, span.lexeme
);
col_utf16 = end;
}
}