#![allow( clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo, clippy::restriction )] use std::{ collections::HashSet, fs, io::{self, Write}, path::PathBuf, time::Instant, }; use encoding_rs::Encoding; use rottlib::diagnostics::Diagnostic as Diag; use rottlib::lexer::TokenizedFile; use rottlib::parser::Parser; mod pretty; // ---------- CONFIG ---------- const FILE_LIMIT: usize = 10000; // cap on files scanned const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics /// If true, print the old debug struct dump after each pretty diagnostic. const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true; // Cargo.toml additions: // is-terminal = "0.4" // sysinfo = { version = "0.30", features = ["multithread"] } // walkdir = "2" // chardet = "0.2" // encoding_rs = "0.8" // Linux-only accurate RSS in MB. Fallback uses sysinfo. fn rss_mb() -> u64 { #[cfg(target_os = "linux")] { use std::io::Read; let mut s = String::new(); if let Ok(mut f) = std::fs::File::open("/proc/self/statm") && f.read_to_string(&mut s).is_ok() && let Some(rss_pages) = s .split_whitespace() .nth(1) .and_then(|x| x.parse::().ok()) { let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 }; return (rss_pages * page) / (1024 * 1024); } } use sysinfo::{System, get_current_pid}; let mut sys = System::new(); sys.refresh_processes(); let Ok(pid) = get_current_pid() else { return 0 }; sys.process(pid).map_or(0, |p| p.memory() / 1024) } fn mark(label: &str, t0: Instant) { println!( "[{:>14}] t={:>8.2?} rss={} MB", label, t0.elapsed(), rss_mb() ); } /// Read `ignore.txt` next to `root` and build a canonicalized set. fn load_ignore_set(root: &std::path::Path) -> HashSet { let ignore_file = root.join("ignore.txt"); if !ignore_file.exists() { return HashSet::new(); } let content = match fs::read_to_string(&ignore_file) { Ok(s) => s, Err(e) => { eprintln!("Could not read {}: {e}", ignore_file.display()); return HashSet::new(); } }; content .lines() .map(str::trim) .filter(|l| !l.is_empty() && !l.starts_with('#')) .filter_map(|line| { let p = PathBuf::from(line); let abs = if p.is_absolute() { p } else { root.join(p) }; fs::canonicalize(abs).ok() }) .collect() } /// Wait for Enter if running in a TTY, shown before printing errors. fn wait_before_errors(msg: &str) { let _ = io::stdout().flush(); if is_terminal::is_terminal(io::stdin()) { eprint!("{msg}"); let _ = io::stderr().flush(); let mut s = String::new(); let _ = io::stdin().read_line(&mut s); } } /// CLI: `verify_uc [file_name]` /// fn main() { let mut args = std::env::args().skip(1); let root_dir = args.next().unwrap_or_else(|| { eprintln!("Usage: verify_uc [file_name]"); std::process::exit(1); }); let target_raw = args.next(); // optional file name hint let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase()); let single_mode = target_ci.is_some(); let root = PathBuf::from(&root_dir); if !root.exists() { eprintln!("Root directory '{root_dir}' does not exist."); std::process::exit(1); } let t0 = Instant::now(); mark("baseline", t0); // Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode let ignored = load_ignore_set(&root); let mut uc_files: Vec<(PathBuf, String)> = Vec::new(); let mut seen = 0usize; let mut picked_any = false; for entry in walkdir::WalkDir::new(&root) .into_iter() .filter_map(Result::ok) .filter(|e| { let path = e.path(); if let Ok(abs) = fs::canonicalize(path) && ignored.contains(&abs) { return false; } path.is_file() && path .extension() .and_then(|e| e.to_str()) .is_some_and(|e| e.eq_ignore_ascii_case("uc")) }) { if !single_mode && seen >= FILE_LIMIT { break; } // If in single-file mode, keep only the first whose file name matches. if let Some(needle) = target_ci.as_deref() { let fname = entry .path() .file_name() .and_then(|s| s.to_str()) .unwrap_or(""); let fname_lc = fname.to_ascii_lowercase(); if !(fname_lc == needle || fname_lc.contains(needle)) { continue; } } seen += 1; let path = entry.path(); match fs::read(path) { Ok(raw) => { let (label, _, _) = chardet::detect(&raw); let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8); let (txt, _, _) = enc.decode(&raw); uc_files.push((path.to_path_buf(), txt.into_owned())); picked_any = true; if single_mode { // Only the first match. break; } } Err(e) => { wait_before_errors("Read error detected. Press Enter to print details..."); eprintln!("Failed to read `{}`: {e}", path.display()); std::process::exit(1); } } } if single_mode && !picked_any { let needle = target_raw.as_deref().unwrap(); eprintln!( "No .uc file matching '{needle}' found under '{}'.", root.display() ); std::process::exit(1); } println!( "Loaded {} .uc files into memory (cap={}, reached={}).", uc_files.len(), FILE_LIMIT, if !single_mode && uc_files.len() >= FILE_LIMIT { "yes" } else { "no" } ); mark("after_read", t0); // Stage 1: tokenize all let t_tok = Instant::now(); let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len()); let mut tk_error_idx: Option = None; for (i, (path, source)) in uc_files.iter().enumerate() { let tf = TokenizedFile::tokenize(source); if tk_error_idx.is_none() && tf.has_errors() { tk_error_idx = Some(i); } tokenized.push((path.clone(), tf)); } println!( "Tokenized {} files in {:.2?}", tokenized.len(), t_tok.elapsed() ); mark("after_tokenize", t0); // If tokenization error: wait, dump tokens for the first failing file, then exit. if let Some(idx) = tk_error_idx { let (bad_path, _) = &tokenized[idx]; wait_before_errors("Tokenization error found. Press Enter to dump tokens..."); eprintln!("--- Tokenization error in: {}", bad_path.display()); //bad_tf.dump_debug_layout(); // from DebugTools std::process::exit(1); } // Stage 2: parse all with ONE arena kept alive let arena = rottlib::arena::Arena::new(); let t_parse = Instant::now(); // First failing parse: (tokenized_index, diagnostics, fatal) let mut first_fail: Option<(usize, Vec, Option)> = None; for (i, (path, tk)) in tokenized.iter().enumerate() { // --- progress line BEFORE parsing this file --- { use std::io::Write; eprint!( "Parsing [{}/{}] {} | rss={} MB\r\n", i + 1, tokenized.len(), path.display(), rss_mb() ); let _ = io::stderr().flush(); } let mut parser = Parser::new(tk, &arena); match parser.parse_source_file() { Ok(_) => { if !parser.diagnostics.is_empty() && first_fail.is_none() { first_fail = Some((i, parser.diagnostics.clone(), None)); } } Err(e) => { if first_fail.is_none() { first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}")))); } } } } println!( "Parsed {} files in {:.2?}", tokenized.len(), t_parse.elapsed() ); mark("after_parse", t0); // Summary println!("--- Summary ---"); println!("Files processed: {}", tokenized.len()); println!("File cap: {FILE_LIMIT}"); if let Some((idx, diags, fatal)) = first_fail { wait_before_errors("Parse issues detected. Press Enter to print diagnostics..."); let (path, tf) = &tokenized[idx]; eprintln!("--- Parse issues in first failing file ---"); eprintln!("File: {}", path.display()); if let Some(f) = &fatal { eprintln!("Fatal parse error: {f}"); } if diags.is_empty() && fatal.is_none() { eprintln!("(no diagnostics captured)"); } else { let use_colors = is_terminal::is_terminal(io::stderr()); let fname = path.display().to_string(); let total = diags.len(); let first_n = DIAG_SHOW_FIRST.min(total); let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n)); if total > first_n + last_n { // first window for (k, d) in diags.iter().take(first_n).enumerate() { let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors); if ALSO_PRINT_DEBUG_AFTER_PRETTY { eprintln!("#{}: {:#?}", k + 1, d); } } eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n)); // last window let start = total - last_n; for (offset, d) in diags.iter().skip(start).enumerate() { let idx_global = start + offset + 1; let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors); if ALSO_PRINT_DEBUG_AFTER_PRETTY { eprintln!("#{idx_global}: {d:#?}"); } } } else { for (k, d) in diags.iter().enumerate() { let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors); if ALSO_PRINT_DEBUG_AFTER_PRETTY { eprintln!("#{}: {:#?}", k + 1, d); } } } } std::process::exit(1); } println!("All files parsed without diagnostics."); }