rott/dev_tests/src/uc_lexer_verify.rs

339 lines
11 KiB
Rust

#![allow(
clippy::all,
clippy::pedantic,
clippy::nursery,
clippy::cargo,
clippy::restriction
)]
use std::{
collections::HashSet,
fs,
io::{self, Write},
path::PathBuf,
time::Instant,
};
use encoding_rs::Encoding;
use rottlib::diagnostics::Diagnostic as Diag;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::Parser;
mod pretty;
// ---------- CONFIG ----------
const FILE_LIMIT: usize = 10000; // cap on files scanned
const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics
const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics
/// If true, print the old debug struct dump after each pretty diagnostic.
const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true;
// Cargo.toml additions:
// is-terminal = "0.4"
// sysinfo = { version = "0.30", features = ["multithread"] }
// walkdir = "2"
// chardet = "0.2"
// encoding_rs = "0.8"
// Linux-only accurate RSS in MB. Fallback uses sysinfo.
fn rss_mb() -> u64 {
#[cfg(target_os = "linux")]
{
use std::io::Read;
let mut s = String::new();
if let Ok(mut f) = std::fs::File::open("/proc/self/statm")
&& f.read_to_string(&mut s).is_ok()
&& let Some(rss_pages) = s
.split_whitespace()
.nth(1)
.and_then(|x| x.parse::<u64>().ok())
{
let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 };
return (rss_pages * page) / (1024 * 1024);
}
}
use sysinfo::{System, get_current_pid};
let mut sys = System::new();
sys.refresh_processes();
let Ok(pid) = get_current_pid() else { return 0 };
sys.process(pid).map_or(0, |p| p.memory() / 1024)
}
fn mark(label: &str, t0: Instant) {
println!(
"[{:>14}] t={:>8.2?} rss={} MB",
label,
t0.elapsed(),
rss_mb()
);
}
/// Read `ignore.txt` next to `root` and build a canonicalized set.
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
let ignore_file = root.join("ignore.txt");
if !ignore_file.exists() {
return HashSet::new();
}
let content = match fs::read_to_string(&ignore_file) {
Ok(s) => s,
Err(e) => {
eprintln!("Could not read {}: {e}", ignore_file.display());
return HashSet::new();
}
};
content
.lines()
.map(str::trim)
.filter(|l| !l.is_empty() && !l.starts_with('#'))
.filter_map(|line| {
let p = PathBuf::from(line);
let abs = if p.is_absolute() { p } else { root.join(p) };
fs::canonicalize(abs).ok()
})
.collect()
}
/// Wait for Enter if running in a TTY, shown before printing errors.
fn wait_before_errors(msg: &str) {
let _ = io::stdout().flush();
if is_terminal::is_terminal(io::stdin()) {
eprint!("{msg}");
let _ = io::stderr().flush();
let mut s = String::new();
let _ = io::stdin().read_line(&mut s);
}
}
/// CLI: `verify_uc <root_dir> [file_name]`
///
fn main() {
let mut args = std::env::args().skip(1);
let root_dir = args.next().unwrap_or_else(|| {
eprintln!("Usage: verify_uc <root_dir> [file_name]");
std::process::exit(1);
});
let target_raw = args.next(); // optional file name hint
let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase());
let single_mode = target_ci.is_some();
let root = PathBuf::from(&root_dir);
if !root.exists() {
eprintln!("Root directory '{root_dir}' does not exist.");
std::process::exit(1);
}
let t0 = Instant::now();
mark("baseline", t0);
// Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode
let ignored = load_ignore_set(&root);
let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
let mut seen = 0usize;
let mut picked_any = false;
for entry in walkdir::WalkDir::new(&root)
.into_iter()
.filter_map(Result::ok)
.filter(|e| {
let path = e.path();
if let Ok(abs) = fs::canonicalize(path)
&& ignored.contains(&abs)
{
return false;
}
path.is_file()
&& path
.extension()
.and_then(|e| e.to_str())
.is_some_and(|e| e.eq_ignore_ascii_case("uc"))
})
{
if !single_mode && seen >= FILE_LIMIT {
break;
}
// If in single-file mode, keep only the first whose file name matches.
if let Some(needle) = target_ci.as_deref() {
let fname = entry
.path()
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("");
let fname_lc = fname.to_ascii_lowercase();
if !(fname_lc == needle || fname_lc.contains(needle)) {
continue;
}
}
seen += 1;
let path = entry.path();
match fs::read(path) {
Ok(raw) => {
let (label, _, _) = chardet::detect(&raw);
let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8);
let (txt, _, _) = enc.decode(&raw);
uc_files.push((path.to_path_buf(), txt.into_owned()));
picked_any = true;
if single_mode {
// Only the first match.
break;
}
}
Err(e) => {
wait_before_errors("Read error detected. Press Enter to print details...");
eprintln!("Failed to read `{}`: {e}", path.display());
std::process::exit(1);
}
}
}
if single_mode && !picked_any {
let needle = target_raw.as_deref().unwrap();
eprintln!(
"No .uc file matching '{needle}' found under '{}'.",
root.display()
);
std::process::exit(1);
}
println!(
"Loaded {} .uc files into memory (cap={}, reached={}).",
uc_files.len(),
FILE_LIMIT,
if !single_mode && uc_files.len() >= FILE_LIMIT {
"yes"
} else {
"no"
}
);
mark("after_read", t0);
// Stage 1: tokenize all
let t_tok = Instant::now();
let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len());
let mut tk_error_idx: Option<usize> = None;
for (i, (path, source)) in uc_files.iter().enumerate() {
let tf = TokenizedFile::tokenize(source);
if tk_error_idx.is_none() && tf.has_errors() {
tk_error_idx = Some(i);
}
tokenized.push((path.clone(), tf));
}
println!(
"Tokenized {} files in {:.2?}",
tokenized.len(),
t_tok.elapsed()
);
mark("after_tokenize", t0);
// If tokenization error: wait, dump tokens for the first failing file, then exit.
if let Some(idx) = tk_error_idx {
let (bad_path, _) = &tokenized[idx];
wait_before_errors("Tokenization error found. Press Enter to dump tokens...");
eprintln!("--- Tokenization error in: {}", bad_path.display());
//bad_tf.dump_debug_layout(); // from DebugTools
std::process::exit(1);
}
// Stage 2: parse all with ONE arena kept alive
let arena = rottlib::arena::Arena::new();
let t_parse = Instant::now();
// First failing parse: (tokenized_index, diagnostics, fatal)
let mut first_fail: Option<(usize, Vec<Diag>, Option<String>)> = None;
for (i, (path, tk)) in tokenized.iter().enumerate() {
// --- progress line BEFORE parsing this file ---
{
use std::io::Write;
eprint!(
"Parsing [{}/{}] {} | rss={} MB\r\n",
i + 1,
tokenized.len(),
path.display(),
rss_mb()
);
let _ = io::stderr().flush();
}
let mut parser = Parser::new(tk, &arena);
match parser.parse_source_file() {
Ok(_) => {
if !parser.diagnostics.is_empty() && first_fail.is_none() {
first_fail = Some((i, parser.diagnostics.clone(), None));
}
}
Err(e) => {
if first_fail.is_none() {
first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}"))));
}
}
}
}
println!(
"Parsed {} files in {:.2?}",
tokenized.len(),
t_parse.elapsed()
);
mark("after_parse", t0);
// Summary
println!("--- Summary ---");
println!("Files processed: {}", tokenized.len());
println!("File cap: {FILE_LIMIT}");
if let Some((idx, diags, fatal)) = first_fail {
wait_before_errors("Parse issues detected. Press Enter to print diagnostics...");
let (path, tf) = &tokenized[idx];
eprintln!("--- Parse issues in first failing file ---");
eprintln!("File: {}", path.display());
if let Some(f) = &fatal {
eprintln!("Fatal parse error: {f}");
}
if diags.is_empty() && fatal.is_none() {
eprintln!("(no diagnostics captured)");
} else {
let use_colors = is_terminal::is_terminal(io::stderr());
let fname = path.display().to_string();
let total = diags.len();
let first_n = DIAG_SHOW_FIRST.min(total);
let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));
if total > first_n + last_n {
// first window
for (k, d) in diags.iter().take(first_n).enumerate() {
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{}: {:#?}", k + 1, d);
}
}
eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
// last window
let start = total - last_n;
for (offset, d) in diags.iter().skip(start).enumerate() {
let idx_global = start + offset + 1;
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{idx_global}: {d:#?}");
}
}
} else {
for (k, d) in diags.iter().enumerate() {
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
eprintln!("#{}: {:#?}", k + 1, d);
}
}
}
}
std::process::exit(1);
}
println!("All files parsed without diagnostics.");
}