rott/dev_tests/src/uc_lexer_verify.rs

#![allow(
    clippy::all,
    clippy::pedantic,
    clippy::nursery,
    clippy::cargo,
    clippy::restriction
)]

use std::{
    collections::HashSet,
    fs,
    io::{self, Write},
    path::PathBuf,
    time::Instant,
};

use encoding_rs::Encoding;
use rottlib::diagnostics::Diagnostic as Diag;
use rottlib::lexer::TokenizedFile;
use rottlib::parser::Parser;

mod pretty;

// ---------- CONFIG ----------
const FILE_LIMIT: usize = 10000; // cap on files scanned
const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics
const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics
/// If true, print the old debug struct dump after each pretty diagnostic.
const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true;

// Cargo.toml additions:
// is-terminal = "0.4"
// sysinfo = { version = "0.30", features = ["multithread"] }
// walkdir = "2"
// chardet = "0.2"
// encoding_rs = "0.8"

// Linux-only accurate RSS in MB. Fallback uses sysinfo.
fn rss_mb() -> u64 {
    #[cfg(target_os = "linux")]
    {
        use std::io::Read;
        let mut s = String::new();
        if let Ok(mut f) = std::fs::File::open("/proc/self/statm")
            && f.read_to_string(&mut s).is_ok()
            && let Some(rss_pages) = s
                .split_whitespace()
                .nth(1)
                .and_then(|x| x.parse::<u64>().ok())
        {
            let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 };
            return (rss_pages * page) / (1024 * 1024);
        }
    }
    use sysinfo::{System, get_current_pid};
    let mut sys = System::new();
    sys.refresh_processes();
    let Ok(pid) = get_current_pid() else { return 0 };
    sys.process(pid).map_or(0, |p| p.memory() / 1024)
}

fn mark(label: &str, t0: Instant) {
    println!(
        "[{:>14}] t={:>8.2?} rss={} MB",
        label,
        t0.elapsed(),
        rss_mb()
    );
}

/// Read `ignore.txt` next to `root` and build a canonicalized set.
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
    let ignore_file = root.join("ignore.txt");
    if !ignore_file.exists() {
        return HashSet::new();
    }
    let content = match fs::read_to_string(&ignore_file) {
        Ok(s) => s,
        Err(e) => {
            eprintln!("Could not read {}: {e}", ignore_file.display());
            return HashSet::new();
        }
    };
    content
        .lines()
        .map(str::trim)
        .filter(|l| !l.is_empty() && !l.starts_with('#'))
        .filter_map(|line| {
            let p = PathBuf::from(line);
            let abs = if p.is_absolute() { p } else { root.join(p) };
            fs::canonicalize(abs).ok()
        })
        .collect()
}

/// Wait for Enter if running in a TTY, shown before printing errors.
fn wait_before_errors(msg: &str) {
    let _ = io::stdout().flush();
    if is_terminal::is_terminal(io::stdin()) {
        eprint!("{msg}");
        let _ = io::stderr().flush();
        let mut s = String::new();
        let _ = io::stdin().read_line(&mut s);
    }
}

/// CLI: `verify_uc <root_dir> [file_name]`
///
fn main() {
    let mut args = std::env::args().skip(1);
    let root_dir = args.next().unwrap_or_else(|| {
        eprintln!("Usage: verify_uc <root_dir> [file_name]");
        std::process::exit(1);
    });

    let target_raw = args.next(); // optional file name hint
    let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase());
    let single_mode = target_ci.is_some();

    let root = PathBuf::from(&root_dir);
    if !root.exists() {
        eprintln!("Root directory '{root_dir}' does not exist.");
        std::process::exit(1);
    }

    let t0 = Instant::now();
    mark("baseline", t0);

    // Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode
    let ignored = load_ignore_set(&root);
    let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
    let mut seen = 0usize;
    let mut picked_any = false;

    for entry in walkdir::WalkDir::new(&root)
        .into_iter()
        .filter_map(Result::ok)
        .filter(|e| {
            let path = e.path();
            if let Ok(abs) = fs::canonicalize(path)
                && ignored.contains(&abs)
            {
                return false;
            }
            path.is_file()
                && path
                    .extension()
                    .and_then(|e| e.to_str())
                    .is_some_and(|e| e.eq_ignore_ascii_case("uc"))
        })
    {
        if !single_mode && seen >= FILE_LIMIT {
            break;
        }

        // If in single-file mode, keep only the first whose file name matches.
        if let Some(needle) = target_ci.as_deref() {
            let fname = entry
                .path()
                .file_name()
                .and_then(|s| s.to_str())
                .unwrap_or("");
            let fname_lc = fname.to_ascii_lowercase();
            if !(fname_lc == needle || fname_lc.contains(needle)) {
                continue;
            }
        }

        seen += 1;

        let path = entry.path();
        match fs::read(path) {
            Ok(raw) => {
                let (label, _, _) = chardet::detect(&raw);
                let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8);
                let (txt, _, _) = enc.decode(&raw);
                uc_files.push((path.to_path_buf(), txt.into_owned()));
                picked_any = true;
                if single_mode {
                    // Only the first match.
                    break;
                }
            }
            Err(e) => {
                wait_before_errors("Read error detected. Press Enter to print details...");
                eprintln!("Failed to read `{}`: {e}", path.display());
                std::process::exit(1);
            }
        }
    }

    if single_mode && !picked_any {
        let needle = target_raw.as_deref().unwrap();
        eprintln!(
            "No .uc file matching '{needle}' found under '{}'.",
            root.display()
        );
        std::process::exit(1);
    }

    println!(
        "Loaded {} .uc files into memory (cap={}, reached={}).",
        uc_files.len(),
        FILE_LIMIT,
        if !single_mode && uc_files.len() >= FILE_LIMIT {
            "yes"
        } else {
            "no"
        }
    );
    mark("after_read", t0);

    // Stage 1: tokenize all
    let t_tok = Instant::now();
    let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len());
    let mut tk_error_idx: Option<usize> = None;

    for (i, (path, source)) in uc_files.iter().enumerate() {
        let tf = TokenizedFile::tokenize(source);
        if tk_error_idx.is_none() && tf.has_errors() {
            tk_error_idx = Some(i);
        }
        tokenized.push((path.clone(), tf));
    }
    println!(
        "Tokenized {} files in {:.2?}",
        tokenized.len(),
        t_tok.elapsed()
    );
    mark("after_tokenize", t0);

    // If tokenization error: wait, dump tokens for the first failing file, then exit.
    if let Some(idx) = tk_error_idx {
        let (bad_path, _) = &tokenized[idx];
        wait_before_errors("Tokenization error found. Press Enter to dump tokens...");
        eprintln!("--- Tokenization error in: {}", bad_path.display());
        //bad_tf.dump_debug_layout(); // from DebugTools
        std::process::exit(1);
    }

    // Stage 2: parse all with ONE arena kept alive
    let arena = rottlib::arena::Arena::new();
    let t_parse = Instant::now();

    // First failing parse: (tokenized_index, diagnostics, fatal)
    let mut first_fail: Option<(usize, Vec<Diag>, Option<String>)> = None;

    for (i, (path, tk)) in tokenized.iter().enumerate() {
        // --- progress line BEFORE parsing this file ---
        {
            use std::io::Write;
            eprint!(
                "Parsing [{}/{}] {} | rss={} MB\r\n",
                i + 1,
                tokenized.len(),
                path.display(),
                rss_mb()
            );
            let _ = io::stderr().flush();
        }

        let mut parser = Parser::new(tk, &arena);

        match parser.parse_source_file() {
            Ok(_) => {
                if !parser.diagnostics.is_empty() && first_fail.is_none() {
                    first_fail = Some((i, parser.diagnostics.clone(), None));
                }
            }
            Err(e) => {
                if first_fail.is_none() {
                    first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}"))));
                }
            }
        }
    }

    println!(
        "Parsed {} files in {:.2?}",
        tokenized.len(),
        t_parse.elapsed()
    );
    mark("after_parse", t0);

    // Summary
    println!("--- Summary ---");
    println!("Files processed: {}", tokenized.len());
    println!("File cap:        {FILE_LIMIT}");

    if let Some((idx, diags, fatal)) = first_fail {
        wait_before_errors("Parse issues detected. Press Enter to print diagnostics...");
        let (path, tf) = &tokenized[idx];
        eprintln!("--- Parse issues in first failing file ---");
        eprintln!("File: {}", path.display());
        if let Some(f) = &fatal {
            eprintln!("Fatal parse error: {f}");
        }
        if diags.is_empty() && fatal.is_none() {
            eprintln!("(no diagnostics captured)");
        } else {
            let use_colors = is_terminal::is_terminal(io::stderr());
            let fname = path.display().to_string();
            let total = diags.len();
            let first_n = DIAG_SHOW_FIRST.min(total);
            let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));

            if total > first_n + last_n {
                // first window
                for (k, d) in diags.iter().take(first_n).enumerate() {
                    let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
                    if ALSO_PRINT_DEBUG_AFTER_PRETTY {
                        eprintln!("#{}: {:#?}", k + 1, d);
                    }
                }
                eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
                // last window
                let start = total - last_n;
                for (offset, d) in diags.iter().skip(start).enumerate() {
                    let idx_global = start + offset + 1;
                    let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
                    if ALSO_PRINT_DEBUG_AFTER_PRETTY {
                        eprintln!("#{idx_global}: {d:#?}");
                    }
                }
            } else {
                for (k, d) in diags.iter().enumerate() {
                    let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
                    if ALSO_PRINT_DEBUG_AFTER_PRETTY {
                        eprintln!("#{}: {:#?}", k + 1, d);
                    }
                }
            }
        }
        std::process::exit(1);
    }

    println!("All files parsed without diagnostics.");
}