rott/dev_tests/src/dump_tokens.rs
2025-07-30 19:47:11 +07:00

77 lines
2.3 KiB
Rust

use std::{
fs,
path::{Path, PathBuf},
};
use encoding_rs::{Encoding, UTF_8};
use rottlib::lexer::{DebugTools, TokenizedFile};
/// Recursively search `root` for the first file whose *basename* matches
/// `needle` (case-sensitive).
///
/// Returns the absolute path.
fn find_file(root: &Path, needle: &str) -> Option<PathBuf> {
for entry in walkdir::WalkDir::new(root)
.into_iter()
.filter_map(Result::ok)
{
let path = entry.path();
if path.is_file() && (path.file_name().and_then(|name| name.to_str()) == Some(needle)) {
return fs::canonicalize(path).ok();
}
}
None
}
/// CLI: `dump_tokens <root_dir> <file_name>` - searches for `<file_name>`
/// recursively inside `<root_dir>`.
///
/// This utility takes *root directory* and *file name* instead of the full path
/// to help us avoid searching for them typing names out:
///
/// - We know where all the sources are;
/// - We usually just know the name of the file that is being problematic.
fn main() {
let mut args = std::env::args().skip(1);
let root_dir = args.next().unwrap_or_else(|| {
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
std::process::exit(1);
});
let file_name = args.next().unwrap_or_else(|| {
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
std::process::exit(1);
});
let root = PathBuf::from(&root_dir);
if !root.exists() {
eprintln!("Root directory '{root_dir}' does not exist.");
std::process::exit(1);
}
let found_path = find_file(&root, &file_name).map_or_else(
|| {
eprintln!("File '{file_name}' not found under '{root_dir}'.");
std::process::exit(1);
},
|path| path,
);
// Read & decode
let raw_bytes = match fs::read(&found_path) {
Ok(sources) => sources,
Err(error) => {
eprintln!("Could not read {}: {error}", found_path.display());
std::process::exit(1);
}
};
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
let encoding = Encoding::for_label(encoding_label.as_bytes()).unwrap_or(UTF_8);
let (decoded_str, _, _) = encoding.decode(&raw_bytes);
let source_text = decoded_str.to_string();
let tokenized_file = TokenizedFile::from_source(&source_text);
tokenized_file.dump_debug_layout();
}