123 lines
4.3 KiB
Rust
123 lines
4.3 KiB
Rust
use std::{collections::HashSet, fs, path::PathBuf};
|
||
|
||
use rottlib::lexer::{DebugTools, TokenizedFile};
|
||
|
||
/// Read `ignore.txt` (one path per line, `#` for comments) from root directory
|
||
/// and turn it into a canonicalized [`HashSet<PathBuf>`].
|
||
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
|
||
let ignore_file = root.join("ignore.txt");
|
||
if !ignore_file.exists() {
|
||
return HashSet::new();
|
||
}
|
||
|
||
let content = match fs::read_to_string(&ignore_file) {
|
||
Ok(content) => content,
|
||
Err(error) => {
|
||
eprintln!("Could not read {}: {error}", ignore_file.display());
|
||
return HashSet::new();
|
||
}
|
||
};
|
||
|
||
content
|
||
.lines()
|
||
.map(str::trim)
|
||
.filter(|line| !line.is_empty() && !line.starts_with('#'))
|
||
.filter_map(|line| {
|
||
let next_path = PathBuf::from(line);
|
||
let absolute_path = if next_path.is_absolute() {
|
||
next_path
|
||
} else {
|
||
root.join(next_path)
|
||
};
|
||
fs::canonicalize(absolute_path).ok()
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
/// CLI: `verify_uc <root_dir>` - find all `.uc` files in the provided directory
|
||
/// (except those listed in `ignore.txt` in the root) and test them all.
|
||
///
|
||
/// Reported execution time is the tokenization time, without considering time
|
||
/// it takes to read files from disk.
|
||
///
|
||
/// `ignore.txt` is for listing specific files, not directories.
|
||
fn main() {
|
||
let root_dir = std::env::args().nth(1).unwrap(); // it is fine to crash debug utility
|
||
let root = PathBuf::from(&root_dir);
|
||
|
||
if !root.exists() {
|
||
eprintln!("Root directory '{root_dir}' does not exist.");
|
||
std::process::exit(1);
|
||
}
|
||
|
||
// Load files
|
||
let ignored_paths = load_ignore_set(&root);
|
||
let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
|
||
for entry in walkdir::WalkDir::new(&root)
|
||
.into_iter()
|
||
.filter_map(Result::ok) // for debug tool this is ok
|
||
.filter(|entry| {
|
||
let path = entry.path();
|
||
// Skip anything explicitly ignored
|
||
if let Ok(absolute_path) = fs::canonicalize(path) {
|
||
if ignored_paths.contains(&absolute_path) {
|
||
return false;
|
||
}
|
||
}
|
||
// Must be *.uc
|
||
path.is_file()
|
||
&& path
|
||
.extension()
|
||
.and_then(|extension| extension.to_str())
|
||
.is_some_and(|extension| extension.eq_ignore_ascii_case("uc"))
|
||
})
|
||
{
|
||
let path = entry.path();
|
||
match fs::read(path) {
|
||
Ok(raw_bytes) => {
|
||
// Auto‑detect encoding for old Unreal script sources
|
||
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
|
||
let encoding = encoding_rs::Encoding::for_label(encoding_label.as_bytes())
|
||
.unwrap_or(encoding_rs::UTF_8);
|
||
let (decoded_text, _, _) = encoding.decode(&raw_bytes);
|
||
uc_files.push((path.to_path_buf(), decoded_text.into_owned()));
|
||
}
|
||
Err(error) => {
|
||
eprintln!("Failed to read `{}`: {error}", path.display());
|
||
std::process::exit(1);
|
||
}
|
||
}
|
||
}
|
||
println!("Loaded {} .uc files into memory.", uc_files.len());
|
||
|
||
// Tokenize and measure performance
|
||
let start_time = std::time::Instant::now();
|
||
let tokenized_files: Vec<(PathBuf, TokenizedFile)> = uc_files
|
||
.iter()
|
||
.map(|(path, source_code)| {
|
||
let tokenized_file = TokenizedFile::from_source(source_code);
|
||
if tokenized_file.had_errors() {
|
||
println!("TK: {}", path.display());
|
||
}
|
||
(path.clone(), tokenized_file)
|
||
})
|
||
.collect();
|
||
let elapsed_time = start_time.elapsed();
|
||
println!(
|
||
"Tokenized {} files in {:.2?}",
|
||
tokenized_files.len(),
|
||
elapsed_time
|
||
);
|
||
|
||
// Round‑trip check
|
||
for ((path, original), (_, tokenized_file)) in uc_files.iter().zip(tokenized_files.iter()) {
|
||
let reconstructed = tokenized_file.reconstruct_source();
|
||
if original != &reconstructed {
|
||
eprintln!("Reconstruction mismatch in `{}`!", path.display());
|
||
std::process::exit(1);
|
||
}
|
||
}
|
||
|
||
println!("All .uc files matched successfully.");
|
||
}
|