Add delimeter matching to lexer
This commit is contained in:
parent
150bd2f5cf
commit
9d3313995e
@ -1,13 +0,0 @@
|
||||
// diagnostics_render.rs
|
||||
|
||||
use rottlib::diagnostics::{Diagnostic};
|
||||
use rottlib::lexer::TokenizedFile;
|
||||
|
||||
pub fn render_diagnostic(
|
||||
diag: &Diagnostic,
|
||||
_file: &TokenizedFile,
|
||||
file_name: Option<&str>,
|
||||
colors: bool,
|
||||
) {
|
||||
diag.render(_file, file_name.unwrap_or("<default>"));
|
||||
}
|
||||
@ -19,8 +19,6 @@ use rottlib::diagnostics::Diagnostic as Diag;
|
||||
use rottlib::lexer::TokenizedFile;
|
||||
use rottlib::parser::Parser;
|
||||
|
||||
mod pretty;
|
||||
|
||||
// ---------- CONFIG ----------
|
||||
const FILE_LIMIT: usize = 10000; // cap on files scanned
|
||||
const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics
|
||||
@ -35,6 +33,43 @@ const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true;
|
||||
// chardet = "0.2"
|
||||
// encoding_rs = "0.8"
|
||||
|
||||
fn render_diagnostic(diag: &Diag, file: &TokenizedFile<'_>, file_name: &str) {
|
||||
diag.render(file, file_name);
|
||||
}
|
||||
|
||||
fn render_diagnostics_window(diags: &[Diag], tf: &TokenizedFile<'_>, file_name: &str) {
|
||||
let total = diags.len();
|
||||
let first_n = DIAG_SHOW_FIRST.min(total);
|
||||
let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));
|
||||
|
||||
if total > first_n + last_n {
|
||||
for (k, d) in diags.iter().take(first_n).enumerate() {
|
||||
render_diagnostic(d, tf, file_name);
|
||||
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||
eprintln!("#{}: {:#?}", k + 1, d);
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
|
||||
|
||||
let start = total - last_n;
|
||||
for (offset, d) in diags.iter().skip(start).enumerate() {
|
||||
let idx_global = start + offset + 1;
|
||||
render_diagnostic(d, tf, file_name);
|
||||
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||
eprintln!("#{idx_global}: {d:#?}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (k, d) in diags.iter().enumerate() {
|
||||
render_diagnostic(d, tf, file_name);
|
||||
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||
eprintln!("#{}: {:#?}", k + 1, d);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Linux-only accurate RSS in MB. Fallback uses sysinfo.
|
||||
fn rss_mb() -> u64 {
|
||||
#[cfg(target_os = "linux")]
|
||||
@ -230,11 +265,24 @@ fn main() {
|
||||
mark("after_tokenize", t0);
|
||||
|
||||
// If tokenization error: wait, dump tokens for the first failing file, then exit.
|
||||
// If tokenization error: wait, print lexer diagnostics for the first failing file, then exit.
|
||||
if let Some(idx) = tk_error_idx {
|
||||
let (bad_path, _) = &tokenized[idx];
|
||||
wait_before_errors("Tokenization error found. Press Enter to dump tokens...");
|
||||
eprintln!("--- Tokenization error in: {}", bad_path.display());
|
||||
//bad_tf.dump_debug_layout(); // from DebugTools
|
||||
let (bad_path, bad_tf) = &tokenized[idx];
|
||||
wait_before_errors("Tokenization issues detected. Press Enter to print diagnostics...");
|
||||
|
||||
let fname = bad_path.display().to_string();
|
||||
|
||||
eprintln!("--- Tokenization issues in first failing file ---");
|
||||
eprintln!("File: {fname}");
|
||||
|
||||
let diags = bad_tf.diagnostics();
|
||||
|
||||
if diags.is_empty() {
|
||||
eprintln!("(no diagnostics captured)");
|
||||
} else {
|
||||
render_diagnostics_window(diags, bad_tf, &fname);
|
||||
}
|
||||
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
@ -298,38 +346,8 @@ fn main() {
|
||||
if diags.is_empty() && fatal.is_none() {
|
||||
eprintln!("(no diagnostics captured)");
|
||||
} else {
|
||||
let use_colors = is_terminal::is_terminal(io::stderr());
|
||||
let fname = path.display().to_string();
|
||||
let total = diags.len();
|
||||
let first_n = DIAG_SHOW_FIRST.min(total);
|
||||
let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));
|
||||
|
||||
if total > first_n + last_n {
|
||||
// first window
|
||||
for (k, d) in diags.iter().take(first_n).enumerate() {
|
||||
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
||||
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||
eprintln!("#{}: {:#?}", k + 1, d);
|
||||
}
|
||||
}
|
||||
eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
|
||||
// last window
|
||||
let start = total - last_n;
|
||||
for (offset, d) in diags.iter().skip(start).enumerate() {
|
||||
let idx_global = start + offset + 1;
|
||||
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
||||
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||
eprintln!("#{idx_global}: {d:#?}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (k, d) in diags.iter().enumerate() {
|
||||
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
||||
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||
eprintln!("#{}: {:#?}", k + 1, d);
|
||||
}
|
||||
}
|
||||
}
|
||||
render_diagnostics_window(&diags, tf, &fname);
|
||||
}
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
@ -7,69 +7,63 @@
|
||||
)]
|
||||
|
||||
use rottlib::arena::Arena;
|
||||
use rottlib::diagnostics::Diagnostic;
|
||||
use rottlib::lexer::TokenizedFile;
|
||||
use rottlib::parser::Parser;
|
||||
|
||||
mod pretty;
|
||||
|
||||
// a * * *
|
||||
|
||||
/// Expressions to test.
|
||||
/// Lexer-focused fixtures.
|
||||
///
|
||||
/// Add, remove, or edit entries here.
|
||||
/// Using `(&str, &str)` gives each case a human-readable label.
|
||||
/// Expressions to test.
|
||||
///
|
||||
/// Add, remove, or edit entries here.
|
||||
/// Using `(&str, &str)` gives each case a human-readable label.
|
||||
/// Keep these small: the goal is to inspect lexer diagnostics and delimiter
|
||||
/// recovery behavior, not full parser behavior.
|
||||
const TEST_CASES: &[(&str, &str)] = &[
|
||||
// P0022: invalid return value start
|
||||
// L0001: invalid or unknown token
|
||||
(
|
||||
"files/P0022_01.uc",
|
||||
"return ] ;",
|
||||
),
|
||||
(
|
||||
"files/P0022_02.uc",
|
||||
"return\n ]\n;\n",
|
||||
),
|
||||
(
|
||||
"files/P0022_03.uc",
|
||||
"return\n}\n",
|
||||
"files/L0001_01.uc",
|
||||
"`",
|
||||
),
|
||||
|
||||
// P0023: invalid break value start
|
||||
// L0002: unexpected closing delimiter
|
||||
(
|
||||
"files/P0023_01.uc",
|
||||
"break ] ;",
|
||||
),
|
||||
(
|
||||
"files/P0023_02.uc",
|
||||
"break\n \n\n\n\n ]\n;\n",
|
||||
),
|
||||
(
|
||||
"files/P0023_03.uc",
|
||||
"break\n}\n",
|
||||
"files/L0002_01.uc",
|
||||
"]",
|
||||
),
|
||||
|
||||
// P0024: goto target is missing or not a label token
|
||||
// L0003: unclosed delimiter before later closing delimiter
|
||||
//
|
||||
// The `}` can still recover by matching the earlier `{`.
|
||||
(
|
||||
"files/P0024_01.uc",
|
||||
"goto;",
|
||||
"files/L0003_01.uc",
|
||||
"{\n foo(\n}\n",
|
||||
),
|
||||
|
||||
// L0004: mismatched closing delimiter
|
||||
(
|
||||
"files/P0024_02.uc",
|
||||
"goto\n ;\n",
|
||||
"files/L0004_01.uc",
|
||||
"(]",
|
||||
),
|
||||
|
||||
// L0005: unclosed delimiter at end of file
|
||||
(
|
||||
"files/P0024_03.uc",
|
||||
"goto\n ]\n;\n",
|
||||
"files/L0005_01.uc",
|
||||
"foo(",
|
||||
),
|
||||
|
||||
// Mixed recovery case:
|
||||
//
|
||||
// `)` recovers by matching `(` after treating `[` as unclosed;
|
||||
// the following `]` is then unexpected.
|
||||
(
|
||||
"files/P0024_04.uc",
|
||||
"goto",
|
||||
"files/L_mixed_01.uc",
|
||||
"([)]",
|
||||
),
|
||||
];
|
||||
|
||||
/// If true, also run the parser after tokenization.
|
||||
///
|
||||
/// For lexer-focused fixtures this is usually noisy, so keep it off unless you
|
||||
/// want to inspect how parser recovery behaves after lexer diagnostics.
|
||||
const RUN_PARSER: bool = false;
|
||||
|
||||
/// If true, print the parsed expression using Debug formatting.
|
||||
const PRINT_PARSED_EXPR: bool = false;
|
||||
|
||||
@ -82,8 +76,28 @@ fn main() {
|
||||
let mut had_any_problem = false;
|
||||
|
||||
for (idx, (label, source)) in TEST_CASES.iter().enumerate() {
|
||||
println!("============================================================");
|
||||
println!("Case {}: {}", idx + 1, label);
|
||||
println!("------------------------------------------------------------");
|
||||
|
||||
let tf = TokenizedFile::tokenize(source);
|
||||
|
||||
let lexer_diagnostics = tf.diagnostics();
|
||||
|
||||
if lexer_diagnostics.is_empty() {
|
||||
println!("Lexer diagnostics: none");
|
||||
} else {
|
||||
had_any_problem = true;
|
||||
|
||||
if ALWAYS_PRINT_DIAGNOSTICS {
|
||||
println!("Lexer diagnostics:");
|
||||
for diag in lexer_diagnostics {
|
||||
render_diagnostic(diag, &tf, Some(label), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if RUN_PARSER {
|
||||
let mut parser = Parser::new(&tf, &arena);
|
||||
let expr = parser.parse_expression();
|
||||
|
||||
@ -93,13 +107,15 @@ fn main() {
|
||||
}
|
||||
|
||||
if parser.diagnostics.is_empty() {
|
||||
println!("Diagnostics: none");
|
||||
println!("Parser diagnostics: none");
|
||||
} else {
|
||||
had_any_problem = true;
|
||||
|
||||
if ALWAYS_PRINT_DIAGNOSTICS {
|
||||
let use_colors = false;
|
||||
for (k, diag) in parser.diagnostics.iter().enumerate() {
|
||||
pretty::render_diagnostic(diag, &tf, Some(label), use_colors);
|
||||
println!("Parser diagnostics:");
|
||||
for diag in &parser.diagnostics {
|
||||
render_diagnostic(diag, &tf, Some(label), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -108,10 +124,20 @@ fn main() {
|
||||
}
|
||||
|
||||
println!("============================================================");
|
||||
|
||||
if had_any_problem {
|
||||
println!("Done. At least one case had tokenization or parse diagnostics.");
|
||||
println!("Done. At least one case had lexer or parser diagnostics.");
|
||||
std::process::exit(1);
|
||||
} else {
|
||||
println!("Done. All cases completed without diagnostics.");
|
||||
}
|
||||
}
|
||||
|
||||
fn render_diagnostic(
|
||||
diag: &Diagnostic,
|
||||
file: &TokenizedFile<'_>,
|
||||
file_name: Option<&str>,
|
||||
_colors: bool,
|
||||
) {
|
||||
diag.render(file, file_name.unwrap_or("<default>"));
|
||||
}
|
||||
@ -30,6 +30,7 @@ use std::ops::Range;
|
||||
|
||||
use logos::Logos;
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticBuilder};
|
||||
use raw_lexer::RawToken;
|
||||
|
||||
pub use raw_lexer::BraceKind;
|
||||
@ -143,6 +144,10 @@ pub struct TokenizedFile<'src> {
|
||||
/// Records only exists for multiline tokens and ranges can be empty for
|
||||
/// lines that only contain line break boundary.
|
||||
multi_line_map: HashMap<BufferIndex, Vec<VisibleByteRange>>,
|
||||
/// Sparse map between matched opening and closing delimiters.
|
||||
delimiter_matches: DelimiterMatches,
|
||||
/// Diagnostics produced during tokenization and delimiter matching.
|
||||
diagnostics: Vec<Diagnostic>,
|
||||
/// Simple flag for marking erroneous state.
|
||||
had_errors: bool,
|
||||
}
|
||||
@ -205,6 +210,12 @@ struct Tokenizer<'src> {
|
||||
/// that started on `line_number`; it is consumed exactly once by
|
||||
/// [`Self::commit_current_line`].
|
||||
multi_line_start_line: Option<LineNumber>,
|
||||
/// Tracks delimiter pairs and delimiter diagnostics.
|
||||
delimiter_matcher: DelimiterMatcher,
|
||||
/// Tracks crude line indentation for delimiter recovery diagnostics.
|
||||
indent_tracker: IndentTracker,
|
||||
/// Diagnostics produced while tokenizing.
|
||||
diagnostics: Vec<Diagnostic>,
|
||||
/// Set to `true` if the lexer reported any error tokens.
|
||||
had_errors: bool,
|
||||
}
|
||||
@ -226,7 +237,8 @@ impl<'src> TokenizedFile<'src> {
|
||||
RawToken::Error
|
||||
});
|
||||
let token_piece = make_token_data(Token::from(token), lexer.slice());
|
||||
tokenizer.process_token_piece(token_piece);
|
||||
let position = tokenizer.process_token_piece(token_piece);
|
||||
tokenizer.after_token(position);
|
||||
}
|
||||
tokenizer.into_tokenized_file()
|
||||
}
|
||||
@ -244,6 +256,18 @@ impl<'src> TokenizedFile<'src> {
|
||||
pub const fn iter(&self) -> Tokens<'_, 'src> {
|
||||
Tokens::new(self)
|
||||
}
|
||||
|
||||
/// Returns diagnostics produced during tokenization and delimiter matching.
|
||||
#[must_use]
|
||||
pub fn diagnostics(&self) -> &[Diagnostic] {
|
||||
&self.diagnostics
|
||||
}
|
||||
|
||||
/// Returns the matching delimiter token, if this token is a matched delimiter.
|
||||
#[must_use]
|
||||
pub fn matching_delimiter(&self, position: TokenPosition) -> Option<TokenPosition> {
|
||||
self.delimiter_matches.mate_of(position)
|
||||
}
|
||||
}
|
||||
|
||||
impl Line {
|
||||
@ -298,17 +322,22 @@ impl<'src> Tokenizer<'src> {
|
||||
line_number: 0,
|
||||
uncommitted_start_index: 0,
|
||||
multi_line_start_line: None,
|
||||
delimiter_matcher: DelimiterMatcher::default(),
|
||||
indent_tracker: IndentTracker::new(),
|
||||
diagnostics: Vec::new(),
|
||||
had_errors: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles a token span and dispatches to the appropriate handler.
|
||||
fn process_token_piece(&mut self, token_piece: TokenData<'src>) {
|
||||
fn process_token_piece(&mut self, token_piece: TokenData<'src>) -> TokenPosition {
|
||||
let position = TokenPosition(self.buffer.len());
|
||||
if token_piece.token.can_span_lines() {
|
||||
self.process_multi_line_token(token_piece);
|
||||
} else {
|
||||
self.process_single_line_token(token_piece);
|
||||
}
|
||||
position
|
||||
}
|
||||
|
||||
/// Handles simple tokens that *never* span multiple lines, allowing us to
|
||||
@ -387,6 +416,26 @@ impl<'src> Tokenizer<'src> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Runs post-processing that needs the token to already have a stable
|
||||
/// [`TokenPosition`].
|
||||
fn after_token(&mut self, position: TokenPosition) {
|
||||
let token_piece = self.buffer[position.0];
|
||||
|
||||
if token_piece.token == Token::Error {
|
||||
self.had_errors = true;
|
||||
self.diagnostics
|
||||
.push(diagnostic_invalid_token(position, token_piece.lexeme));
|
||||
}
|
||||
|
||||
self.delimiter_matcher.observe(
|
||||
position,
|
||||
token_piece.token,
|
||||
self.indent_tracker.current_indent(),
|
||||
);
|
||||
|
||||
self.indent_tracker.observe_token(token_piece);
|
||||
}
|
||||
|
||||
/// Finishes tokenization, converting accumulated data into
|
||||
/// [`TokenizedFile`].
|
||||
fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
|
||||
@ -399,6 +448,12 @@ impl<'src> Tokenizer<'src> {
|
||||
self.lines.push(Line::continued(from));
|
||||
}
|
||||
|
||||
let (delimiter_matches, delimiter_diagnostics) = self.delimiter_matcher.finish();
|
||||
if !delimiter_diagnostics.is_empty() {
|
||||
self.had_errors = true;
|
||||
}
|
||||
self.diagnostics.extend(delimiter_diagnostics);
|
||||
|
||||
self.buffer.shrink_to_fit();
|
||||
self.lines.shrink_to_fit();
|
||||
|
||||
@ -406,6 +461,8 @@ impl<'src> Tokenizer<'src> {
|
||||
buffer: self.buffer,
|
||||
lines: self.lines,
|
||||
multi_line_map: self.multi_line_map,
|
||||
delimiter_matches,
|
||||
diagnostics: self.diagnostics,
|
||||
had_errors: self.had_errors,
|
||||
}
|
||||
}
|
||||
@ -507,3 +564,409 @@ impl<'file, 'src> IntoIterator for &'file TokenizedFile<'src> {
|
||||
self.iter()
|
||||
}
|
||||
}
|
||||
|
||||
/// Sparse map between matched delimiter tokens.
|
||||
///
|
||||
/// Stores both directions:
|
||||
///
|
||||
/// - opening delimiter -> closing delimiter
|
||||
/// - closing delimiter -> opening delimiter
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct DelimiterMatches {
|
||||
mates: HashMap<TokenPosition, TokenPosition>,
|
||||
}
|
||||
|
||||
impl DelimiterMatches {
|
||||
/// Returns the matching delimiter token for `position`, if known.
|
||||
#[must_use]
|
||||
pub fn mate_of(&self, position: TokenPosition) -> Option<TokenPosition> {
|
||||
self.mates.get(&position).copied()
|
||||
}
|
||||
|
||||
fn insert_pair(&mut self, left: TokenPosition, right: TokenPosition) {
|
||||
self.mates.insert(left, right);
|
||||
self.mates.insert(right, left);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
enum DelimiterKind {
|
||||
Parenthesis,
|
||||
Bracket,
|
||||
Brace,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
enum DelimiterSide {
|
||||
Open,
|
||||
Close,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct OpenDelimiter {
|
||||
kind: DelimiterKind,
|
||||
position: TokenPosition,
|
||||
indent: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct DelimiterMatcher {
|
||||
stack: Vec<OpenDelimiter>,
|
||||
matches: DelimiterMatches,
|
||||
diagnostics: Vec<Diagnostic>,
|
||||
stopped_at_defaultproperties: bool,
|
||||
}
|
||||
|
||||
impl DelimiterMatcher {
|
||||
fn handle_defaultproperties_boundary(&mut self, defaultproperties_position: TokenPosition) {
|
||||
while let Some(open) = self.stack.pop() {
|
||||
self.diagnostics
|
||||
.push(diagnostic_unclosed_delimiter_before_defaultproperties(
|
||||
open,
|
||||
defaultproperties_position,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
fn observe(&mut self, position: TokenPosition, token: Token, indent: usize) {
|
||||
if self.stopped_at_defaultproperties {
|
||||
return;
|
||||
}
|
||||
|
||||
if token == Token::Keyword(Keyword::DefaultProperties) {
|
||||
self.handle_defaultproperties_boundary(position);
|
||||
self.stopped_at_defaultproperties = true;
|
||||
return;
|
||||
}
|
||||
|
||||
let Some((side, kind)) = delimiter_of(token) else {
|
||||
return;
|
||||
};
|
||||
|
||||
match side {
|
||||
DelimiterSide::Open => {
|
||||
self.stack.push(OpenDelimiter {
|
||||
kind,
|
||||
position,
|
||||
indent,
|
||||
});
|
||||
}
|
||||
DelimiterSide::Close => {
|
||||
self.handle_close(kind, position, indent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_close(
|
||||
&mut self,
|
||||
close_kind: DelimiterKind,
|
||||
close_position: TokenPosition,
|
||||
close_indent: usize,
|
||||
) {
|
||||
let Some(top) = self.stack.last().copied() else {
|
||||
self.diagnostics
|
||||
.push(diagnostic_unexpected_closing_delimiter(
|
||||
close_kind,
|
||||
close_position,
|
||||
));
|
||||
return;
|
||||
};
|
||||
|
||||
if top.kind == close_kind {
|
||||
let open = self.stack.pop().expect("stack top was just checked");
|
||||
self.matches.insert_pair(open.position, close_position);
|
||||
return;
|
||||
}
|
||||
|
||||
let earlier_same_kind = self.stack.iter().rposition(|open| open.kind == close_kind);
|
||||
|
||||
match earlier_same_kind {
|
||||
Some(index) => {
|
||||
let bad_open = top;
|
||||
let recovered_open = self.stack[index];
|
||||
|
||||
self.diagnostics
|
||||
.push(diagnostic_unclosed_delimiter_before_later_close(
|
||||
bad_open,
|
||||
close_kind,
|
||||
close_position,
|
||||
recovered_open,
|
||||
recovered_open.indent == close_indent,
|
||||
));
|
||||
|
||||
while self.stack.len() - 1 > index {
|
||||
self.stack.pop();
|
||||
}
|
||||
|
||||
let open = self
|
||||
.stack
|
||||
.pop()
|
||||
.expect("same-kind delimiter was found on the stack");
|
||||
|
||||
self.matches.insert_pair(open.position, close_position);
|
||||
}
|
||||
None => {
|
||||
let bad_open = self.stack.pop().expect("stack top was known to exist");
|
||||
|
||||
self.diagnostics
|
||||
.push(diagnostic_mismatched_closing_delimiter(
|
||||
bad_open,
|
||||
close_kind,
|
||||
close_position,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(mut self) -> (DelimiterMatches, Vec<Diagnostic>) {
|
||||
if !self.stopped_at_defaultproperties {
|
||||
while let Some(open) = self.stack.pop() {
|
||||
self.diagnostics
|
||||
.push(diagnostic_unclosed_delimiter_at_eof(open));
|
||||
}
|
||||
}
|
||||
|
||||
(self.matches, self.diagnostics)
|
||||
}
|
||||
}
|
||||
|
||||
fn delimiter_of(token: Token) -> Option<(DelimiterSide, DelimiterKind)> {
|
||||
match token {
|
||||
Token::LeftParenthesis => Some((DelimiterSide::Open, DelimiterKind::Parenthesis)),
|
||||
Token::RightParenthesis => Some((DelimiterSide::Close, DelimiterKind::Parenthesis)),
|
||||
|
||||
Token::LeftBracket => Some((DelimiterSide::Open, DelimiterKind::Bracket)),
|
||||
Token::RightBracket => Some((DelimiterSide::Close, DelimiterKind::Bracket)),
|
||||
|
||||
Token::LeftBrace => Some((DelimiterSide::Open, DelimiterKind::Brace)),
|
||||
Token::RightBrace => Some((DelimiterSide::Close, DelimiterKind::Brace)),
|
||||
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn open_delimiter_text(kind: DelimiterKind) -> &'static str {
|
||||
match kind {
|
||||
DelimiterKind::Parenthesis => "(",
|
||||
DelimiterKind::Bracket => "[",
|
||||
DelimiterKind::Brace => "{",
|
||||
}
|
||||
}
|
||||
|
||||
fn close_delimiter_text(kind: DelimiterKind) -> &'static str {
|
||||
match kind {
|
||||
DelimiterKind::Parenthesis => ")",
|
||||
DelimiterKind::Bracket => "]",
|
||||
DelimiterKind::Brace => "}",
|
||||
}
|
||||
}
|
||||
|
||||
/// Crude indentation tracker used only for delimiter recovery diagnostics.
|
||||
///
|
||||
/// This is intentionally simple. It assumes indentation is represented by a
|
||||
/// run of whitespace tokens before the first non-trivia token on a physical
|
||||
/// line. It does not try to normalize tabs or handle every multiline trivia
|
||||
/// edge case precisely, because indentation is only a diagnostic heuristic.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct IndentTracker {
|
||||
current_indent: usize,
|
||||
before_first_nontrivia_on_line: bool,
|
||||
}
|
||||
|
||||
impl IndentTracker {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
current_indent: 0,
|
||||
before_first_nontrivia_on_line: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn current_indent(&self) -> usize {
|
||||
self.current_indent
|
||||
}
|
||||
|
||||
fn observe_token(&mut self, token_piece: TokenData<'_>) {
|
||||
if token_is_newline(token_piece.token) {
|
||||
self.current_indent = 0;
|
||||
self.before_first_nontrivia_on_line = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if self.before_first_nontrivia_on_line && token_is_whitespace(token_piece.token) {
|
||||
self.current_indent += token_piece.lexeme.chars().count();
|
||||
return;
|
||||
}
|
||||
|
||||
if token_is_trivia(token_piece.token) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.before_first_nontrivia_on_line = false;
|
||||
}
|
||||
}
|
||||
|
||||
fn token_is_newline(token: Token) -> bool {
|
||||
matches!(token, Token::Newline)
|
||||
}
|
||||
|
||||
fn token_is_whitespace(token: Token) -> bool {
|
||||
matches!(token, Token::Whitespace)
|
||||
}
|
||||
|
||||
fn token_is_trivia(token: Token) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Token::Whitespace | Token::Newline | Token::LineComment | Token::BlockComment
|
||||
)
|
||||
}
|
||||
|
||||
fn diagnostic_invalid_token(position: TokenPosition, lexeme: &str) -> Diagnostic {
|
||||
let shown = display_lexeme(lexeme);
|
||||
|
||||
DiagnosticBuilder::error(format!("invalid token: {}", shown))
|
||||
.code("L0001")
|
||||
.primary_label(
|
||||
TokenSpan::new(position),
|
||||
format!("invalid token: {}", shown),
|
||||
)
|
||||
.build()
|
||||
}
|
||||
|
||||
fn diagnostic_unexpected_closing_delimiter(
|
||||
close_kind: DelimiterKind,
|
||||
close_position: TokenPosition,
|
||||
) -> Diagnostic {
|
||||
let close = close_delimiter_text(close_kind);
|
||||
|
||||
DiagnosticBuilder::error(format!("unexpected closing delimiter: `{}`", close))
|
||||
.code("L0002")
|
||||
.primary_label(
|
||||
TokenSpan::new(close_position),
|
||||
"unexpected closing delimiter",
|
||||
)
|
||||
.build()
|
||||
}
|
||||
|
||||
fn diagnostic_unclosed_delimiter_before_later_close(
|
||||
bad_open: OpenDelimiter,
|
||||
close_kind: DelimiterKind,
|
||||
close_position: TokenPosition,
|
||||
recovered_open: OpenDelimiter,
|
||||
same_indent: bool,
|
||||
) -> Diagnostic {
|
||||
let bad_open_text = open_delimiter_text(bad_open.kind);
|
||||
let close_text = close_delimiter_text(close_kind);
|
||||
let recovered_open_text = open_delimiter_text(recovered_open.kind);
|
||||
|
||||
let mut builder =
|
||||
DiagnosticBuilder::error(format!("unclosed delimiter before `{}`", close_text))
|
||||
.code("L0003")
|
||||
.primary_label(
|
||||
TokenSpan::new(bad_open.position),
|
||||
format!(
|
||||
"this `{}` is not closed before `{}`",
|
||||
bad_open_text, close_text
|
||||
),
|
||||
)
|
||||
.secondary_label(
|
||||
TokenSpan::new(close_position),
|
||||
format!(
|
||||
"this `{}` is matched with the earlier `{}`",
|
||||
close_text, recovered_open_text
|
||||
),
|
||||
);
|
||||
|
||||
if same_indent {
|
||||
builder = builder.secondary_label(
|
||||
TokenSpan::new(recovered_open.position),
|
||||
format!(
|
||||
"this `{}` is likely the intended match",
|
||||
recovered_open_text,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
builder.build()
|
||||
}
|
||||
|
||||
fn diagnostic_mismatched_closing_delimiter(
|
||||
bad_open: OpenDelimiter,
|
||||
close_kind: DelimiterKind,
|
||||
close_position: TokenPosition,
|
||||
) -> Diagnostic {
|
||||
let open = open_delimiter_text(bad_open.kind);
|
||||
let close = close_delimiter_text(close_kind);
|
||||
|
||||
DiagnosticBuilder::error(format!("mismatched closing delimiter: `{}`", close))
|
||||
.code("L0004")
|
||||
.primary_label(
|
||||
TokenSpan::new(close_position),
|
||||
format!("closing delimiter does not match `{}`", open),
|
||||
)
|
||||
.secondary_label(
|
||||
TokenSpan::new(bad_open.position),
|
||||
format!("`{}` opened here", open),
|
||||
)
|
||||
.build()
|
||||
}
|
||||
|
||||
fn diagnostic_unclosed_delimiter_at_eof(open: OpenDelimiter) -> Diagnostic {
|
||||
let open_text = open_delimiter_text(open.kind);
|
||||
|
||||
DiagnosticBuilder::error(format!("unclosed delimiter: `{}`", open_text))
|
||||
.code("L0005")
|
||||
.primary_label(
|
||||
TokenSpan::new(open.position),
|
||||
format!("this `{}` was never closed", open_text),
|
||||
)
|
||||
.build()
|
||||
}
|
||||
|
||||
fn diagnostic_unclosed_delimiter_before_defaultproperties(
|
||||
open: OpenDelimiter,
|
||||
defaultproperties_position: TokenPosition,
|
||||
) -> Diagnostic {
|
||||
let open_text = open_delimiter_text(open.kind);
|
||||
|
||||
DiagnosticBuilder::error("unclosed delimiter before `defaultproperties`")
|
||||
.code("L0006")
|
||||
.primary_label(
|
||||
TokenSpan::new(open.position),
|
||||
format!("this `{}` is not closed before `defaultproperties`", open_text),
|
||||
)
|
||||
.secondary_label(
|
||||
TokenSpan::new(defaultproperties_position),
|
||||
"delimiter matching stops at `defaultproperties`",
|
||||
)
|
||||
.build()
|
||||
}
|
||||
|
||||
fn display_lexeme(lexeme: &str) -> String {
|
||||
if lexeme.is_empty() {
|
||||
return "<empty>".to_string();
|
||||
}
|
||||
|
||||
if lexeme == "`" {
|
||||
return "backtick".to_string();
|
||||
}
|
||||
|
||||
let escaped: String = lexeme.chars().flat_map(char::escape_default).collect();
|
||||
|
||||
const MAX_DISPLAY_CHARS: usize = 32;
|
||||
|
||||
let mut display = String::new();
|
||||
let mut chars = escaped.chars();
|
||||
|
||||
for _ in 0..MAX_DISPLAY_CHARS {
|
||||
let Some(character) = chars.next() else {
|
||||
return format!("`{}`", display);
|
||||
};
|
||||
display.push(character);
|
||||
}
|
||||
|
||||
if chars.next().is_some() {
|
||||
display.push_str("...");
|
||||
}
|
||||
|
||||
format!("`{}`", display)
|
||||
}
|
||||
@ -384,4 +384,31 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the first significant token after `position`, without consuming it.
|
||||
///
|
||||
/// This buffers through the cursor, so trivia is still recorded normally and
|
||||
/// insignificant tokens are skipped consistently with the rest of the parser.
|
||||
///
|
||||
/// Returns `None` if the stream ends before a later significant token is found.
|
||||
#[must_use]
|
||||
pub(crate) fn peek_token_after_position(&mut self, position: TokenPosition) -> Option<Token> {
|
||||
let mut lookahead = 0usize;
|
||||
|
||||
loop {
|
||||
self.cursor
|
||||
.ensure_lookahead_available(lookahead, &mut self.trivia);
|
||||
|
||||
let Some((token_position, token_data)) = self.cursor.lookahead_buffer.get(lookahead)
|
||||
else {
|
||||
return None;
|
||||
};
|
||||
|
||||
if *token_position > position {
|
||||
return Some(token_data.token);
|
||||
}
|
||||
|
||||
lookahead += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -122,6 +122,8 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
)
|
||||
// Header recovery must not consume the next `;`;
|
||||
// it belongs to the surrounding `for` header.
|
||||
// That's why `SyncLevel` match is preferable to syncing onto
|
||||
// matching delimiter - latter wouldn't stop at preceding `;`.
|
||||
.sync_error_until(self, SyncLevel::CloseParenthesis)
|
||||
.unwrap_or_fallback(self),
|
||||
)
|
||||
@ -216,7 +218,7 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
)
|
||||
.widen_error_span_from(for_keyword_position)
|
||||
.related_token("for_header_start", left_parenthesis_position)
|
||||
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
|
||||
.ok_or_report(self);
|
||||
}
|
||||
|
||||
|
||||
@ -97,27 +97,13 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
else {
|
||||
return None;
|
||||
};
|
||||
let mut nesting_depth: usize = 1;
|
||||
let mut lookahead_token_offset: usize = 1;
|
||||
while let Some(lookahead_token) = self.peek_token_at(lookahead_token_offset) {
|
||||
match lookahead_token {
|
||||
Token::LeftParenthesis => nesting_depth += 1,
|
||||
Token::RightParenthesis => {
|
||||
if nesting_depth == 1 {
|
||||
return self
|
||||
.peek_token_at(lookahead_token_offset + 1)
|
||||
|
||||
let right_parenthesis_position =
|
||||
self.file().matching_delimiter(left_parenthesis_position)?;
|
||||
|
||||
self.peek_token_after_position(right_parenthesis_position)
|
||||
.is_some_and(|token| token.is_valid_identifier_name())
|
||||
.then_some(left_parenthesis_position);
|
||||
}
|
||||
nesting_depth -= 1;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
lookahead_token_offset += 1;
|
||||
}
|
||||
// Recovery is left to normal expression parsing when the closing `)`
|
||||
// is missing.
|
||||
None
|
||||
.then_some(left_parenthesis_position)
|
||||
}
|
||||
|
||||
/// Parses a control-flow condition.
|
||||
|
||||
@ -196,7 +196,7 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
.widen_error_span_from(left_parenthesis_position)
|
||||
.extend_blame_to_next_token(self)
|
||||
.related_token("left_parenthesis", left_parenthesis_position)
|
||||
.sync_error_until(self, SyncLevel::CloseParenthesis)
|
||||
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
|
||||
.fallback(self);
|
||||
};
|
||||
let inner_expression = self.parse_expression();
|
||||
@ -206,7 +206,7 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis,
|
||||
)
|
||||
.widen_error_span_from(left_parenthesis_position)
|
||||
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||
.sync_error_at_matching_delimiter(self, left_parenthesis_position)
|
||||
.extend_blame_start_to_covered_start()
|
||||
.related_token("left_parenthesis", left_parenthesis_position)
|
||||
.unwrap_or_fallback(self);
|
||||
|
||||
@ -277,7 +277,7 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
error = error.related("first_extra_argument", span);
|
||||
}
|
||||
error
|
||||
.sync_error_until(self, SyncLevel::CloseParenthesis)
|
||||
.sync_error_until_matching_delimiter(self, state.left_parenthesis_position)
|
||||
.report_error(self);
|
||||
}
|
||||
}
|
||||
@ -296,7 +296,7 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
.related_token("new_keyword", state.new_keyword_position)
|
||||
.related_token("left_parenthesis", state.left_parenthesis_position);
|
||||
let class_specifier_parse_action = if self.next_token_definitely_cannot_start_expression() {
|
||||
error = error.sync_error_at(self, SyncLevel::CloseParenthesis);
|
||||
error = error.sync_error_at_matching_delimiter(self, state.left_parenthesis_position);
|
||||
// Skipping the class specifier avoids cascading errors when
|
||||
// the next token cannot start an expression anyway.
|
||||
NewClassSpecifierParseAction::Skip
|
||||
|
||||
@ -138,6 +138,22 @@ impl SyncLevel {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn fallback_sync_level_for_delimiter_start(token: Option<Token>) -> SyncLevel {
|
||||
match token {
|
||||
Some(Token::LeftParenthesis) => SyncLevel::CloseParenthesis,
|
||||
Some(Token::LeftBracket) => SyncLevel::CloseBracket,
|
||||
Some(Token::LeftBrace) => SyncLevel::BlockBoundary,
|
||||
_ => SyncLevel::CloseParenthesis,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_opening_delimiter(token: Token) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Token::LeftParenthesis | Token::LeftBracket | Token::LeftBrace
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
@ -171,6 +187,105 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Skips tokens until a token with exactly `level` is found, then consumes
|
||||
/// that token.
|
||||
///
|
||||
/// This mirrors the behavior used by [`ResultRecoveryExt::sync_error_at`]:
|
||||
/// stronger sync tokens can stop [`Self::recover_until`], but they are not
|
||||
/// consumed unless they are exactly the requested level.
|
||||
fn recover_at_sync_level(&mut self, level: SyncLevel) {
|
||||
self.recover_until(level);
|
||||
|
||||
if self
|
||||
.peek_token()
|
||||
.and_then(SyncLevel::for_token)
|
||||
.is_some_and(|next_level| next_level == level)
|
||||
{
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
/// Recovers up to the lexer-produced matching delimiter for
|
||||
/// `delimiter_start`, if possible, but does not consume it.
|
||||
///
|
||||
/// If `delimiter_start` is not an opening delimiter, if no match is known,
|
||||
/// or if the parser has already moved past the matching delimiter, this
|
||||
/// falls back to ordinary sync-level recovery inferred from
|
||||
/// `delimiter_start`.
|
||||
pub(crate) fn recover_until_matching_delimiter_or_sync(
|
||||
&mut self,
|
||||
delimiter_start: TokenPosition,
|
||||
) {
|
||||
let start_token = self.file().token_at(delimiter_start).map(|data| data.token);
|
||||
let fallback_level = SyncLevel::fallback_sync_level_for_delimiter_start(start_token);
|
||||
|
||||
let Some(start_token) = start_token else {
|
||||
self.recover_until(fallback_level);
|
||||
return;
|
||||
};
|
||||
|
||||
if !SyncLevel::is_opening_delimiter(start_token) {
|
||||
self.recover_until(fallback_level);
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(target) = self.file().matching_delimiter(delimiter_start) else {
|
||||
self.recover_until(fallback_level);
|
||||
return;
|
||||
};
|
||||
|
||||
if self.peek_position_or_eof() > target {
|
||||
self.recover_until(fallback_level);
|
||||
return;
|
||||
}
|
||||
|
||||
while self.peek_position_or_eof() < target {
|
||||
self.advance();
|
||||
}
|
||||
}
|
||||
|
||||
/// Recovers by using the lexer-produced matching delimiter for
|
||||
/// `delimiter_start`, if possible.
|
||||
///
|
||||
/// If `delimiter_start` is not an opening delimiter, if no match is known,
|
||||
/// or if the parser has already moved past the matching delimiter, this
|
||||
/// falls back to ordinary sync-level recovery inferred from
|
||||
/// `delimiter_start`.
|
||||
pub(crate) fn recover_at_matching_delimiter_or_sync(&mut self, delimiter_start: TokenPosition) {
|
||||
let start_token = self.file().token_at(delimiter_start).map(|data| data.token);
|
||||
let fallback_level = SyncLevel::fallback_sync_level_for_delimiter_start(start_token);
|
||||
|
||||
let Some(start_token) = start_token else {
|
||||
self.recover_at_sync_level(fallback_level);
|
||||
return;
|
||||
};
|
||||
|
||||
if !SyncLevel::is_opening_delimiter(start_token) {
|
||||
self.recover_at_sync_level(fallback_level);
|
||||
return;
|
||||
}
|
||||
|
||||
let Some(target) = self.file().matching_delimiter(delimiter_start) else {
|
||||
self.recover_at_sync_level(fallback_level);
|
||||
return;
|
||||
};
|
||||
|
||||
if self.peek_position_or_eof() > target {
|
||||
self.recover_at_sync_level(fallback_level);
|
||||
return;
|
||||
}
|
||||
|
||||
while self.peek_position_or_eof() < target {
|
||||
self.advance();
|
||||
}
|
||||
|
||||
if self.peek_position_or_eof() == target {
|
||||
self.advance();
|
||||
} else {
|
||||
self.recover_at_sync_level(fallback_level);
|
||||
}
|
||||
}
|
||||
|
||||
/// Reports `error` and returns the recovery fallback for `T`.
|
||||
///
|
||||
/// This is the primitive used when parsing must keep going with a
|
||||
@ -234,6 +349,39 @@ pub trait ResultRecoveryExt<'src, 'arena, T>: Sized {
|
||||
#[must_use]
|
||||
fn sync_error_at(self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self;
|
||||
|
||||
/// Extends the right end of the error span up to, but not including, the
|
||||
/// known matching closing delimiter for `delimiter_start`, if that delimiter
|
||||
/// is known and has not already been passed.
|
||||
///
|
||||
/// If no usable delimiter match exists, falls back to ordinary
|
||||
/// [`SyncLevel`]-based recovery. The fallback level is inferred from the
|
||||
/// token at `delimiter_start`.
|
||||
#[must_use]
|
||||
fn sync_error_until_matching_delimiter(
|
||||
self,
|
||||
parser: &mut Parser<'src, 'arena>,
|
||||
delimiter_start: TokenPosition,
|
||||
) -> Self;
|
||||
|
||||
/// Extends the right end of the error span to include the known matching
|
||||
/// closing delimiter for `delimiter_start`, if that delimiter is known and
|
||||
/// has not already been passed.
|
||||
///
|
||||
/// If no usable delimiter match exists, falls back to ordinary
|
||||
/// [`SyncLevel`]-based recovery. The fallback level is inferred from the
|
||||
/// token at `delimiter_start`:
|
||||
///
|
||||
/// - `(` -> [`SyncLevel::CloseParenthesis`]
|
||||
/// - `[` -> [`SyncLevel::CloseBracket`]
|
||||
/// - `{` -> [`SyncLevel::BlockBoundary`]
|
||||
/// - anything else -> [`SyncLevel::CloseParenthesis`]
|
||||
#[must_use]
|
||||
fn sync_error_at_matching_delimiter(
|
||||
self,
|
||||
parser: &mut Parser<'src, 'arena>,
|
||||
delimiter_start: TokenPosition,
|
||||
) -> Self;
|
||||
|
||||
/// Either returns expected value or its best effort fallback.
|
||||
#[must_use]
|
||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
|
||||
@ -305,6 +453,36 @@ impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, '
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_until_matching_delimiter(
|
||||
mut self,
|
||||
parser: &mut Parser<'src, 'arena>,
|
||||
delimiter_start: TokenPosition,
|
||||
) -> Self {
|
||||
if let Err(ref mut error) = self {
|
||||
parser.recover_until_matching_delimiter_or_sync(delimiter_start);
|
||||
error.covered_span.end = std::cmp::max(
|
||||
error.covered_span.end,
|
||||
parser.last_consumed_position_or_start(),
|
||||
);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_at_matching_delimiter(
|
||||
mut self,
|
||||
parser: &mut Parser<'src, 'arena>,
|
||||
delimiter_start: TokenPosition,
|
||||
) -> Self {
|
||||
if let Err(ref mut error) = self {
|
||||
parser.recover_at_matching_delimiter_or_sync(delimiter_start);
|
||||
error.covered_span.end = std::cmp::max(
|
||||
error.covered_span.end,
|
||||
parser.last_consumed_position_or_start(),
|
||||
);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
|
||||
where
|
||||
T: RecoveryFallback<'src, 'arena>,
|
||||
@ -390,6 +568,32 @@ impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_until_matching_delimiter(
|
||||
mut self,
|
||||
parser: &mut Parser<'src, 'arena>,
|
||||
delimiter_start: TokenPosition,
|
||||
) -> Self {
|
||||
parser.recover_until_matching_delimiter_or_sync(delimiter_start);
|
||||
self.covered_span.end = std::cmp::max(
|
||||
self.covered_span.end,
|
||||
parser.last_consumed_position_or_start(),
|
||||
);
|
||||
self
|
||||
}
|
||||
|
||||
fn sync_error_at_matching_delimiter(
|
||||
mut self,
|
||||
parser: &mut Parser<'src, 'arena>,
|
||||
delimiter_start: TokenPosition,
|
||||
) -> Self {
|
||||
parser.recover_at_matching_delimiter_or_sync(delimiter_start);
|
||||
self.covered_span.end = std::cmp::max(
|
||||
self.covered_span.end,
|
||||
parser.last_consumed_position_or_start(),
|
||||
);
|
||||
self
|
||||
}
|
||||
|
||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) {
|
||||
parser.report_error(self);
|
||||
}
|
||||
|
||||
350
rottlib/tests/lexer_diagnostics.rs
Normal file
350
rottlib/tests/lexer_diagnostics.rs
Normal file
@ -0,0 +1,350 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use rottlib::diagnostics::{Diagnostic, Severity};
|
||||
use rottlib::lexer::{TokenPosition, TokenSpan, TokenizedFile};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ExpectedLabel {
|
||||
span: TokenSpan,
|
||||
message: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ExpectedDiagnostic<'a> {
|
||||
headline: &'static str,
|
||||
severity: Severity,
|
||||
code: Option<&'static str>,
|
||||
primary_label: Option<ExpectedLabel>,
|
||||
secondary_labels: &'a [ExpectedLabel],
|
||||
help: Option<&'static str>,
|
||||
notes: &'a [&'static str],
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_diagnostic(actual: &Diagnostic, expected: &ExpectedDiagnostic<'_>) {
|
||||
assert_eq!(actual.headline(), expected.headline);
|
||||
assert_eq!(actual.severity(), expected.severity);
|
||||
assert_eq!(actual.code(), expected.code);
|
||||
assert_eq!(actual.help(), expected.help);
|
||||
|
||||
match (actual.primary_label(), expected.primary_label.as_ref()) {
|
||||
(None, None) => {}
|
||||
(Some(actual), Some(expected)) => {
|
||||
assert_eq!(actual.span, expected.span);
|
||||
assert_eq!(actual.message, expected.message);
|
||||
}
|
||||
_ => panic!("primary label mismatch"),
|
||||
}
|
||||
|
||||
let actual_secondary = actual.secondary_labels();
|
||||
assert_eq!(actual_secondary.len(), expected.secondary_labels.len());
|
||||
|
||||
for (actual, expected) in actual_secondary
|
||||
.iter()
|
||||
.zip(expected.secondary_labels.iter())
|
||||
{
|
||||
assert_eq!(actual.span, expected.span);
|
||||
assert_eq!(actual.message, expected.message);
|
||||
}
|
||||
|
||||
let actual_notes = actual.notes();
|
||||
assert_eq!(actual_notes.len(), expected.notes.len());
|
||||
|
||||
for (actual, expected) in actual_notes.iter().zip(expected.notes.iter()) {
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Fixture {
|
||||
label: &'static str,
|
||||
source: &'static str,
|
||||
}
|
||||
|
||||
type FixtureRun = Vec<Diagnostic>;
|
||||
|
||||
struct FixtureRuns {
|
||||
runs: HashMap<&'static str, FixtureRun>,
|
||||
}
|
||||
|
||||
impl FixtureRuns {
|
||||
#[track_caller]
|
||||
fn get(&self, label: &str) -> Option<Vec<Diagnostic>> {
|
||||
self.runs.get(label).cloned()
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn get_any(&self, label: &str) -> Diagnostic {
|
||||
self.runs
|
||||
.get(label)
|
||||
.map(|fixture_run| fixture_run[0].clone())
|
||||
.unwrap_or_else(|| panic!("no fixture run for `{label}`"))
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn get_by_code(&self, label: &str, code: &str) -> Diagnostic {
|
||||
self.runs
|
||||
.get(label)
|
||||
.unwrap_or_else(|| panic!("no fixture run for `{label}`"))
|
||||
.iter()
|
||||
.find(|diagnostic| diagnostic.code() == Some(code))
|
||||
.unwrap_or_else(|| panic!("no `{code}` diagnostic in fixture `{label}`"))
|
||||
.clone()
|
||||
}
|
||||
}
|
||||
|
||||
const fn span(position: usize) -> TokenSpan {
|
||||
TokenSpan {
|
||||
start: TokenPosition(position),
|
||||
end: TokenPosition(position),
|
||||
}
|
||||
}
|
||||
|
||||
const LEXER_FIXTURES: &[Fixture] = &[
|
||||
Fixture {
|
||||
label: "files/L0001_01.uc",
|
||||
source: "`",
|
||||
},
|
||||
Fixture {
|
||||
label: "files/L0002_01.uc",
|
||||
source: "]",
|
||||
},
|
||||
Fixture {
|
||||
label: "files/L0003_01.uc",
|
||||
source: "{\n foo(\n}\n",
|
||||
},
|
||||
Fixture {
|
||||
label: "files/L0004_01.uc",
|
||||
source: "(]",
|
||||
},
|
||||
Fixture {
|
||||
label: "files/L0005_01.uc",
|
||||
source: "foo(",
|
||||
},
|
||||
Fixture {
|
||||
label: "files/L_mixed_01.uc",
|
||||
source: "([)]",
|
||||
},
|
||||
];
|
||||
|
||||
fn run_fixture(fixture: &'static Fixture) -> FixtureRun {
|
||||
let file = TokenizedFile::tokenize(fixture.source);
|
||||
file.diagnostics().to_vec()
|
||||
}
|
||||
|
||||
fn run_fixtures(fixtures: &'static [Fixture]) -> FixtureRuns {
|
||||
let mut runs = HashMap::new();
|
||||
|
||||
for fixture in fixtures {
|
||||
runs.insert(fixture.label, run_fixture(fixture));
|
||||
}
|
||||
|
||||
FixtureRuns { runs }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_lexer_diagnostic_counts() {
|
||||
let runs = run_fixtures(LEXER_FIXTURES);
|
||||
|
||||
assert_eq!(runs.get("files/L0001_01.uc").unwrap().len(), 1);
|
||||
assert_eq!(runs.get("files/L0002_01.uc").unwrap().len(), 1);
|
||||
assert_eq!(runs.get("files/L0003_01.uc").unwrap().len(), 1);
|
||||
assert_eq!(runs.get("files/L0004_01.uc").unwrap().len(), 1);
|
||||
assert_eq!(runs.get("files/L0005_01.uc").unwrap().len(), 1);
|
||||
assert_eq!(runs.get("files/L_mixed_01.uc").unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_l0001_invalid_token() {
|
||||
let runs = run_fixtures(LEXER_FIXTURES);
|
||||
|
||||
assert_diagnostic(
|
||||
&runs.get_any("files/L0001_01.uc"),
|
||||
&ExpectedDiagnostic {
|
||||
headline: "invalid token: backtick",
|
||||
severity: Severity::Error,
|
||||
code: Some("L0001"),
|
||||
primary_label: Some(ExpectedLabel {
|
||||
span: span(0),
|
||||
message: "invalid token: backtick",
|
||||
}),
|
||||
secondary_labels: &[],
|
||||
help: None,
|
||||
notes: &[],
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_l0002_unexpected_closing_delimiter() {
|
||||
let runs = run_fixtures(LEXER_FIXTURES);
|
||||
|
||||
assert_diagnostic(
|
||||
&runs.get_any("files/L0002_01.uc"),
|
||||
&ExpectedDiagnostic {
|
||||
headline: "unexpected closing delimiter: `]`",
|
||||
severity: Severity::Error,
|
||||
code: Some("L0002"),
|
||||
primary_label: Some(ExpectedLabel {
|
||||
span: span(0),
|
||||
message: "unexpected closing delimiter",
|
||||
}),
|
||||
secondary_labels: &[],
|
||||
help: None,
|
||||
notes: &[],
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_l0003_unclosed_delimiter_before_later_close() {
|
||||
let runs = run_fixtures(LEXER_FIXTURES);
|
||||
|
||||
assert_diagnostic(
|
||||
&runs.get_any("files/L0003_01.uc"),
|
||||
&ExpectedDiagnostic {
|
||||
headline: "unclosed delimiter before `}`",
|
||||
severity: Severity::Error,
|
||||
code: Some("L0003"),
|
||||
primary_label: Some(ExpectedLabel {
|
||||
span: span(4),
|
||||
message: "this `(` is not closed before `}`",
|
||||
}),
|
||||
secondary_labels: &[
|
||||
ExpectedLabel {
|
||||
span: span(6),
|
||||
message: "this `}` is matched with the earlier `{`",
|
||||
},
|
||||
ExpectedLabel {
|
||||
span: span(0),
|
||||
message: "this `{` is likely the intended match",
|
||||
},
|
||||
],
|
||||
help: None,
|
||||
notes: &[],
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_l0004_mismatched_closing_delimiter() {
|
||||
let runs = run_fixtures(LEXER_FIXTURES);
|
||||
|
||||
assert_diagnostic(
|
||||
&runs.get_any("files/L0004_01.uc"),
|
||||
&ExpectedDiagnostic {
|
||||
headline: "mismatched closing delimiter: `]`",
|
||||
severity: Severity::Error,
|
||||
code: Some("L0004"),
|
||||
primary_label: Some(ExpectedLabel {
|
||||
span: span(1),
|
||||
message: "closing delimiter does not match `(`",
|
||||
}),
|
||||
secondary_labels: &[ExpectedLabel {
|
||||
span: span(0),
|
||||
message: "`(` opened here",
|
||||
}],
|
||||
help: None,
|
||||
notes: &[],
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_l0005_unclosed_delimiter_at_eof() {
|
||||
let runs = run_fixtures(LEXER_FIXTURES);
|
||||
|
||||
assert_diagnostic(
|
||||
&runs.get_any("files/L0005_01.uc"),
|
||||
&ExpectedDiagnostic {
|
||||
headline: "unclosed delimiter: `(`",
|
||||
severity: Severity::Error,
|
||||
code: Some("L0005"),
|
||||
primary_label: Some(ExpectedLabel {
|
||||
span: span(1),
|
||||
message: "this `(` was never closed",
|
||||
}),
|
||||
secondary_labels: &[],
|
||||
help: None,
|
||||
notes: &[],
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_mixed_recovery_diagnostics() {
|
||||
let runs = run_fixtures(LEXER_FIXTURES);
|
||||
|
||||
assert_diagnostic(
|
||||
&runs.get_by_code("files/L_mixed_01.uc", "L0003"),
|
||||
&ExpectedDiagnostic {
|
||||
headline: "unclosed delimiter before `)`",
|
||||
severity: Severity::Error,
|
||||
code: Some("L0003"),
|
||||
primary_label: Some(ExpectedLabel {
|
||||
span: span(1),
|
||||
message: "this `[` is not closed before `)`",
|
||||
}),
|
||||
secondary_labels: &[
|
||||
ExpectedLabel {
|
||||
span: span(2),
|
||||
message: "this `)` is matched with the earlier `(`",
|
||||
},
|
||||
ExpectedLabel {
|
||||
span: span(0),
|
||||
message: "this `(` is likely the intended match",
|
||||
},
|
||||
],
|
||||
help: None,
|
||||
notes: &[],
|
||||
},
|
||||
);
|
||||
|
||||
assert_diagnostic(
|
||||
&runs.get_by_code("files/L_mixed_01.uc", "L0002"),
|
||||
&ExpectedDiagnostic {
|
||||
headline: "unexpected closing delimiter: `]`",
|
||||
severity: Severity::Error,
|
||||
code: Some("L0002"),
|
||||
primary_label: Some(ExpectedLabel {
|
||||
span: span(3),
|
||||
message: "unexpected closing delimiter",
|
||||
}),
|
||||
secondary_labels: &[],
|
||||
help: None,
|
||||
notes: &[],
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_recovered_delimiter_matches_are_stored() {
|
||||
let file = TokenizedFile::tokenize("{\n foo(\n}\n");
|
||||
|
||||
assert_eq!(
|
||||
file.matching_delimiter(TokenPosition(0)),
|
||||
Some(TokenPosition(6))
|
||||
);
|
||||
assert_eq!(
|
||||
file.matching_delimiter(TokenPosition(6)),
|
||||
Some(TokenPosition(0))
|
||||
);
|
||||
assert_eq!(file.matching_delimiter(TokenPosition(4)), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_mixed_recovery_delimiter_matches_are_stored() {
|
||||
let file = TokenizedFile::tokenize("([)]");
|
||||
|
||||
assert_eq!(
|
||||
file.matching_delimiter(TokenPosition(0)),
|
||||
Some(TokenPosition(2))
|
||||
);
|
||||
assert_eq!(
|
||||
file.matching_delimiter(TokenPosition(2)),
|
||||
Some(TokenPosition(0))
|
||||
);
|
||||
|
||||
assert_eq!(file.matching_delimiter(TokenPosition(1)), None);
|
||||
assert_eq!(file.matching_delimiter(TokenPosition(3)), None);
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user