77 lines
2.3 KiB
Rust
77 lines
2.3 KiB
Rust
use std::{
|
|
fs,
|
|
path::{Path, PathBuf},
|
|
};
|
|
|
|
use encoding_rs::{Encoding, UTF_8};
|
|
use rottlib::lexer::{DebugTools, TokenizedFile};
|
|
|
|
/// Recursively search `root` for the first file whose *basename* matches
|
|
/// `needle` (case-sensitive).
|
|
///
|
|
/// Returns the absolute path.
|
|
fn find_file(root: &Path, needle: &str) -> Option<PathBuf> {
|
|
for entry in walkdir::WalkDir::new(root)
|
|
.into_iter()
|
|
.filter_map(Result::ok)
|
|
{
|
|
let path = entry.path();
|
|
if path.is_file() && (path.file_name().and_then(|name| name.to_str()) == Some(needle)) {
|
|
return fs::canonicalize(path).ok();
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// CLI: `dump_tokens <root_dir> <file_name>` - searches for `<file_name>`
|
|
/// recursively inside `<root_dir>`.
|
|
///
|
|
/// This utility takes *root directory* and *file name* instead of the full path
|
|
/// to help us avoid searching for them typing names out:
|
|
///
|
|
/// - We know where all the sources are;
|
|
/// - We usually just know the name of the file that is being problematic.
|
|
fn main() {
|
|
let mut args = std::env::args().skip(1);
|
|
let root_dir = args.next().unwrap_or_else(|| {
|
|
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
|
|
std::process::exit(1);
|
|
});
|
|
let file_name = args.next().unwrap_or_else(|| {
|
|
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
|
|
std::process::exit(1);
|
|
});
|
|
|
|
let root = PathBuf::from(&root_dir);
|
|
if !root.exists() {
|
|
eprintln!("Root directory '{root_dir}' does not exist.");
|
|
std::process::exit(1);
|
|
}
|
|
|
|
let found_path = find_file(&root, &file_name).map_or_else(
|
|
|| {
|
|
eprintln!("File '{file_name}' not found under '{root_dir}'.");
|
|
std::process::exit(1);
|
|
},
|
|
|path| path,
|
|
);
|
|
|
|
// Read & decode
|
|
let raw_bytes = match fs::read(&found_path) {
|
|
Ok(sources) => sources,
|
|
Err(error) => {
|
|
eprintln!("Could not read {}: {error}", found_path.display());
|
|
std::process::exit(1);
|
|
}
|
|
};
|
|
|
|
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
|
|
let encoding = Encoding::for_label(encoding_label.as_bytes()).unwrap_or(UTF_8);
|
|
let (decoded_str, _, _) = encoding.decode(&raw_bytes);
|
|
|
|
let source_text = decoded_str.to_string();
|
|
let tokenized_file = TokenizedFile::from_source(&source_text);
|
|
|
|
tokenized_file.dump_debug_layout();
|
|
}
|