339 lines
11 KiB
Rust
339 lines
11 KiB
Rust
#![allow(
|
|
clippy::all,
|
|
clippy::pedantic,
|
|
clippy::nursery,
|
|
clippy::cargo,
|
|
clippy::restriction
|
|
)]
|
|
|
|
use std::{
|
|
collections::HashSet,
|
|
fs,
|
|
io::{self, Write},
|
|
path::PathBuf,
|
|
time::Instant,
|
|
};
|
|
|
|
use encoding_rs::Encoding;
|
|
use rottlib::diagnostics::Diagnostic as Diag;
|
|
use rottlib::lexer::TokenizedFile;
|
|
use rottlib::parser::Parser;
|
|
|
|
mod pretty;
|
|
|
|
// ---------- CONFIG ----------
|
|
const FILE_LIMIT: usize = 10000; // cap on files scanned
|
|
const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics
|
|
const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics
|
|
/// If true, print the old debug struct dump after each pretty diagnostic.
|
|
const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true;
|
|
|
|
// Cargo.toml additions:
|
|
// is-terminal = "0.4"
|
|
// sysinfo = { version = "0.30", features = ["multithread"] }
|
|
// walkdir = "2"
|
|
// chardet = "0.2"
|
|
// encoding_rs = "0.8"
|
|
|
|
// Linux-only accurate RSS in MB. Fallback uses sysinfo.
|
|
fn rss_mb() -> u64 {
|
|
#[cfg(target_os = "linux")]
|
|
{
|
|
use std::io::Read;
|
|
let mut s = String::new();
|
|
if let Ok(mut f) = std::fs::File::open("/proc/self/statm")
|
|
&& f.read_to_string(&mut s).is_ok()
|
|
&& let Some(rss_pages) = s
|
|
.split_whitespace()
|
|
.nth(1)
|
|
.and_then(|x| x.parse::<u64>().ok())
|
|
{
|
|
let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 };
|
|
return (rss_pages * page) / (1024 * 1024);
|
|
}
|
|
}
|
|
use sysinfo::{System, get_current_pid};
|
|
let mut sys = System::new();
|
|
sys.refresh_processes();
|
|
let Ok(pid) = get_current_pid() else { return 0 };
|
|
sys.process(pid).map_or(0, |p| p.memory() / 1024)
|
|
}
|
|
|
|
fn mark(label: &str, t0: Instant) {
|
|
println!(
|
|
"[{:>14}] t={:>8.2?} rss={} MB",
|
|
label,
|
|
t0.elapsed(),
|
|
rss_mb()
|
|
);
|
|
}
|
|
|
|
/// Read `ignore.txt` next to `root` and build a canonicalized set.
|
|
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
|
|
let ignore_file = root.join("ignore.txt");
|
|
if !ignore_file.exists() {
|
|
return HashSet::new();
|
|
}
|
|
let content = match fs::read_to_string(&ignore_file) {
|
|
Ok(s) => s,
|
|
Err(e) => {
|
|
eprintln!("Could not read {}: {e}", ignore_file.display());
|
|
return HashSet::new();
|
|
}
|
|
};
|
|
content
|
|
.lines()
|
|
.map(str::trim)
|
|
.filter(|l| !l.is_empty() && !l.starts_with('#'))
|
|
.filter_map(|line| {
|
|
let p = PathBuf::from(line);
|
|
let abs = if p.is_absolute() { p } else { root.join(p) };
|
|
fs::canonicalize(abs).ok()
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
/// Wait for Enter if running in a TTY, shown before printing errors.
|
|
fn wait_before_errors(msg: &str) {
|
|
let _ = io::stdout().flush();
|
|
if is_terminal::is_terminal(io::stdin()) {
|
|
eprint!("{msg}");
|
|
let _ = io::stderr().flush();
|
|
let mut s = String::new();
|
|
let _ = io::stdin().read_line(&mut s);
|
|
}
|
|
}
|
|
|
|
/// CLI: `verify_uc <root_dir> [file_name]`
|
|
///
|
|
fn main() {
|
|
let mut args = std::env::args().skip(1);
|
|
let root_dir = args.next().unwrap_or_else(|| {
|
|
eprintln!("Usage: verify_uc <root_dir> [file_name]");
|
|
std::process::exit(1);
|
|
});
|
|
|
|
let target_raw = args.next(); // optional file name hint
|
|
let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase());
|
|
let single_mode = target_ci.is_some();
|
|
|
|
let root = PathBuf::from(&root_dir);
|
|
if !root.exists() {
|
|
eprintln!("Root directory '{root_dir}' does not exist.");
|
|
std::process::exit(1);
|
|
}
|
|
|
|
let t0 = Instant::now();
|
|
mark("baseline", t0);
|
|
|
|
// Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode
|
|
let ignored = load_ignore_set(&root);
|
|
let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
|
|
let mut seen = 0usize;
|
|
let mut picked_any = false;
|
|
|
|
for entry in walkdir::WalkDir::new(&root)
|
|
.into_iter()
|
|
.filter_map(Result::ok)
|
|
.filter(|e| {
|
|
let path = e.path();
|
|
if let Ok(abs) = fs::canonicalize(path)
|
|
&& ignored.contains(&abs)
|
|
{
|
|
return false;
|
|
}
|
|
path.is_file()
|
|
&& path
|
|
.extension()
|
|
.and_then(|e| e.to_str())
|
|
.is_some_and(|e| e.eq_ignore_ascii_case("uc"))
|
|
})
|
|
{
|
|
if !single_mode && seen >= FILE_LIMIT {
|
|
break;
|
|
}
|
|
|
|
// If in single-file mode, keep only the first whose file name matches.
|
|
if let Some(needle) = target_ci.as_deref() {
|
|
let fname = entry
|
|
.path()
|
|
.file_name()
|
|
.and_then(|s| s.to_str())
|
|
.unwrap_or("");
|
|
let fname_lc = fname.to_ascii_lowercase();
|
|
if !(fname_lc == needle || fname_lc.contains(needle)) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
seen += 1;
|
|
|
|
let path = entry.path();
|
|
match fs::read(path) {
|
|
Ok(raw) => {
|
|
let (label, _, _) = chardet::detect(&raw);
|
|
let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8);
|
|
let (txt, _, _) = enc.decode(&raw);
|
|
uc_files.push((path.to_path_buf(), txt.into_owned()));
|
|
picked_any = true;
|
|
if single_mode {
|
|
// Only the first match.
|
|
break;
|
|
}
|
|
}
|
|
Err(e) => {
|
|
wait_before_errors("Read error detected. Press Enter to print details...");
|
|
eprintln!("Failed to read `{}`: {e}", path.display());
|
|
std::process::exit(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if single_mode && !picked_any {
|
|
let needle = target_raw.as_deref().unwrap();
|
|
eprintln!(
|
|
"No .uc file matching '{needle}' found under '{}'.",
|
|
root.display()
|
|
);
|
|
std::process::exit(1);
|
|
}
|
|
|
|
println!(
|
|
"Loaded {} .uc files into memory (cap={}, reached={}).",
|
|
uc_files.len(),
|
|
FILE_LIMIT,
|
|
if !single_mode && uc_files.len() >= FILE_LIMIT {
|
|
"yes"
|
|
} else {
|
|
"no"
|
|
}
|
|
);
|
|
mark("after_read", t0);
|
|
|
|
// Stage 1: tokenize all
|
|
let t_tok = Instant::now();
|
|
let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len());
|
|
let mut tk_error_idx: Option<usize> = None;
|
|
|
|
for (i, (path, source)) in uc_files.iter().enumerate() {
|
|
let tf = TokenizedFile::tokenize(source);
|
|
if tk_error_idx.is_none() && tf.has_errors() {
|
|
tk_error_idx = Some(i);
|
|
}
|
|
tokenized.push((path.clone(), tf));
|
|
}
|
|
println!(
|
|
"Tokenized {} files in {:.2?}",
|
|
tokenized.len(),
|
|
t_tok.elapsed()
|
|
);
|
|
mark("after_tokenize", t0);
|
|
|
|
// If tokenization error: wait, dump tokens for the first failing file, then exit.
|
|
if let Some(idx) = tk_error_idx {
|
|
let (bad_path, _) = &tokenized[idx];
|
|
wait_before_errors("Tokenization error found. Press Enter to dump tokens...");
|
|
eprintln!("--- Tokenization error in: {}", bad_path.display());
|
|
//bad_tf.dump_debug_layout(); // from DebugTools
|
|
std::process::exit(1);
|
|
}
|
|
|
|
// Stage 2: parse all with ONE arena kept alive
|
|
let arena = rottlib::arena::Arena::new();
|
|
let t_parse = Instant::now();
|
|
|
|
// First failing parse: (tokenized_index, diagnostics, fatal)
|
|
let mut first_fail: Option<(usize, Vec<Diag>, Option<String>)> = None;
|
|
|
|
for (i, (path, tk)) in tokenized.iter().enumerate() {
|
|
// --- progress line BEFORE parsing this file ---
|
|
{
|
|
use std::io::Write;
|
|
eprint!(
|
|
"Parsing [{}/{}] {} | rss={} MB\r\n",
|
|
i + 1,
|
|
tokenized.len(),
|
|
path.display(),
|
|
rss_mb()
|
|
);
|
|
let _ = io::stderr().flush();
|
|
}
|
|
|
|
let mut parser = Parser::new(tk, &arena);
|
|
|
|
match parser.parse_source_file() {
|
|
Ok(_) => {
|
|
if !parser.diagnostics.is_empty() && first_fail.is_none() {
|
|
first_fail = Some((i, parser.diagnostics.clone(), None));
|
|
}
|
|
}
|
|
Err(e) => {
|
|
if first_fail.is_none() {
|
|
first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}"))));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
println!(
|
|
"Parsed {} files in {:.2?}",
|
|
tokenized.len(),
|
|
t_parse.elapsed()
|
|
);
|
|
mark("after_parse", t0);
|
|
|
|
// Summary
|
|
println!("--- Summary ---");
|
|
println!("Files processed: {}", tokenized.len());
|
|
println!("File cap: {FILE_LIMIT}");
|
|
|
|
if let Some((idx, diags, fatal)) = first_fail {
|
|
wait_before_errors("Parse issues detected. Press Enter to print diagnostics...");
|
|
let (path, tf) = &tokenized[idx];
|
|
eprintln!("--- Parse issues in first failing file ---");
|
|
eprintln!("File: {}", path.display());
|
|
if let Some(f) = &fatal {
|
|
eprintln!("Fatal parse error: {f}");
|
|
}
|
|
if diags.is_empty() && fatal.is_none() {
|
|
eprintln!("(no diagnostics captured)");
|
|
} else {
|
|
let use_colors = is_terminal::is_terminal(io::stderr());
|
|
let fname = path.display().to_string();
|
|
let total = diags.len();
|
|
let first_n = DIAG_SHOW_FIRST.min(total);
|
|
let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));
|
|
|
|
if total > first_n + last_n {
|
|
// first window
|
|
for (k, d) in diags.iter().take(first_n).enumerate() {
|
|
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
|
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
|
eprintln!("#{}: {:#?}", k + 1, d);
|
|
}
|
|
}
|
|
eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
|
|
// last window
|
|
let start = total - last_n;
|
|
for (offset, d) in diags.iter().skip(start).enumerate() {
|
|
let idx_global = start + offset + 1;
|
|
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
|
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
|
eprintln!("#{idx_global}: {d:#?}");
|
|
}
|
|
}
|
|
} else {
|
|
for (k, d) in diags.iter().enumerate() {
|
|
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
|
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
|
eprintln!("#{}: {:#?}", k + 1, d);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
std::process::exit(1);
|
|
}
|
|
|
|
println!("All files parsed without diagnostics.");
|
|
}
|