From 150bd2f5cfcf7068a9e95ec1d481291018fa855d Mon Sep 17 00:00:00 2001 From: dkanus Date: Tue, 28 Apr 2026 21:28:34 +0700 Subject: [PATCH] Clean control flow code --- dev_tests/src/verify_expr.rs | 124 +-- rottlib/src/ast/expressions.rs | 6 +- .../control_flow_expressions.rs | 131 ++++ .../parse_error_diagnostics/mod.rs | 22 +- rottlib/src/diagnostics/render.rs | 3 + rottlib/src/parser/errors.rs | 8 +- .../parser/grammar/expression/control_flow.rs | 712 ------------------ .../expression/control_flow/for_loop.rs | 282 +++++++ .../grammar/expression/control_flow/mod.rs | 502 ++++++++++++ .../src/parser/grammar/expression/pratt.rs | 12 + .../parser/grammar/expression/primary/mod.rs | 2 +- .../parser/grammar/expression/primary/new.rs | 9 +- .../control_flow_expressions.rs | 332 +++++++- 13 files changed, 1307 insertions(+), 838 deletions(-) delete mode 100644 rottlib/src/parser/grammar/expression/control_flow.rs create mode 100644 rottlib/src/parser/grammar/expression/control_flow/for_loop.rs create mode 100644 rottlib/src/parser/grammar/expression/control_flow/mod.rs diff --git a/dev_tests/src/verify_expr.rs b/dev_tests/src/verify_expr.rs index 0b526dd..8736688 100644 --- a/dev_tests/src/verify_expr.rs +++ b/dev_tests/src/verify_expr.rs @@ -23,128 +23,50 @@ mod pretty; /// Add, remove, or edit entries here. /// Using `(&str, &str)` gives each case a human-readable label. const TEST_CASES: &[(&str, &str)] = &[ - // P0016: invalid initializer start after `for (` + // P0022: invalid return value start ( - "files/P0016_01.uc", - "for\n(] ; )", + "files/P0022_01.uc", + "return ] ;", ), ( - "files/P0016_02.uc", - "for (\n ]\n ;\n)\n Body();\n", + "files/P0022_02.uc", + "return\n ]\n;\n", ), ( - "files/P0016_03.uc", - "for (\n }\n ;\n)\n", - ), - ( - "files/P0016_06.uc", - "for (\n ]\n\n\n ; Step)\n", + "files/P0022_03.uc", + "return\n}\n", ), - // P0017: initializer parsed, but first `;` is missing + // P0023: invalid break value start ( - "files/P0017_01.uc", - "for (Init ] ; )", + "files/P0023_01.uc", + "break ] ;", ), ( - "files/P0017_02.uc", - "for (Init\n ]\n ;\n)\n", + "files/P0023_02.uc", + "break\n \n\n\n\n ]\n;\n", ), ( - "files/P0017_04.uc", - "for (Init {\n Body();\n}; )\n", - ), - ( - "files/P0017_05.uc", - "for (Init", + "files/P0023_03.uc", + "break\n}\n", ), - // P0018: invalid condition start after first `;` + // P0024: goto target is missing or not a label token ( - "files/P0018_01.uc", - "for \n\n (; ] ; )", + "files/P0024_01.uc", + "goto;", ), ( - "files/P0018_02.uc", - "for (;\n ]\n ;\n)\n Body();\n", + "files/P0024_02.uc", + "goto\n ;\n", ), ( - "files/P0018_03.uc", - "for (;\n }\n ;\n)\n", + "files/P0024_03.uc", + "goto\n ]\n;\n", ), ( - "files/P0018_06.uc", - "for (;", - ), - - // P0019: condition parsed, but second `;` is missing - ( - "files/P0019_01.uc", - "for (; bCondition )", - ), - ( - "files/P0019_02.uc", - "for (; bCondition\n)\n Body();\n", - ), - ( - "files/P0019_03.uc", - "for (; bCondition ] ; )", - ), - ( - "files/P0019_04.uc", - "for (; bCondition\n{\n Body();\n}\n;\n)\n", - ), - ( - "files/P0019_06.uc", - "for (; bCondition", - ), - ( - "files/P0019_07.uc", - "for (; bCondition Step)", - ), - - // P0020: invalid step start after second `;` - ( - "files/P0020_01.uc", - "for (;;;)", - ), - ( - "files/P0020_02.uc", - "for (;;\n ;\n)\n", - ), - ( - "files/P0020_03.uc", - "for (;; ])", - ), - ( - "files/P0020_04.uc", - "for (;;\n }\n)\n", - ), - ( - "files/P0020_08.uc", - "for (;;\n ]\n", - ), - - // P0021: missing `)` to close `for` header - ( - "files/P0021_01.uc", - "for (;;", - ), - ( - "files/P0021_02.uc", - "for (;; Step", - ), - ( - "files/P0021_03.uc", - "for (;; Step;\n Body();\n", - ), - ( - "files/P0021_05.uc", - "for (Init; bCondition; Step\n{\n Body();\n}\n", - ), - ( - "files/P0021_09.uc", - "for\n(Init;\n bCondition;\n Step\n]\n", + "files/P0024_04.uc", + "goto", ), ]; diff --git a/rottlib/src/ast/expressions.rs b/rottlib/src/ast/expressions.rs index 59cf27c..d79cdd7 100644 --- a/rottlib/src/ast/expressions.rs +++ b/rottlib/src/ast/expressions.rs @@ -105,7 +105,7 @@ pub enum Expression<'src, 'arena> { /// bodies, optional trailing semicolons, and recovery anchors. If { condition: ExpressionRef<'src, 'arena>, - body: BranchBody<'src, 'arena>, + then_body: BranchBody<'src, 'arena>, else_body: Option>, }, /// `while (condition) body` @@ -123,7 +123,7 @@ pub enum Expression<'src, 'arena> { /// The iteration source / iterator expression is stored as a normal /// expression node because the language permits nontrivial syntax there. ForEach { - iterated_expression: ExpressionRef<'src, 'arena>, + iterator_expression: ExpressionRef<'src, 'arena>, body: BranchBody<'src, 'arena>, }, /// Traditional three-part `for` loop. @@ -134,7 +134,7 @@ pub enum Expression<'src, 'arena> { /// - `for (;cond;)` /// - `for (;;step)` For { - initialization: Option>, + initializer: Option>, condition: Option>, step: Option>, body: BranchBody<'src, 'arena>, diff --git a/rottlib/src/diagnostics/parse_error_diagnostics/control_flow_expressions.rs b/rottlib/src/diagnostics/parse_error_diagnostics/control_flow_expressions.rs index 6e2b274..cb6fe57 100644 --- a/rottlib/src/diagnostics/parse_error_diagnostics/control_flow_expressions.rs +++ b/rottlib/src/diagnostics/parse_error_diagnostics/control_flow_expressions.rs @@ -677,3 +677,134 @@ pub(super) fn diagnostic_for_loop_header_missing_closing_parenthesis<'src>( .code("P0021") .build() } + +pub(super) fn diagnostic_return_value_invalid_start<'src>( + error: ParseError, + file: &TokenizedFile<'src>, +) -> Diagnostic { + let return_keyword_span = error + .related_spans + .get(diagnostic_labels::EXPRESSION_REQUIRED_BY) + .copied(); + + let found = found_at(file, error.blame_span.end); + + let (header_text, primary_text) = match found { + FoundAt::Token(token_text) => ( + format!( + "expected return value expression or `;` after `return`, found `{}`", + token_text + ), + format!("unexpected `{}`", token_text), + ), + FoundAt::EndOfFile => ( + "expected return value expression or `;` after `return`, found end of file" + .to_string(), + "reached end of file here".to_string(), + ), + FoundAt::Unknown => ( + "expected return value expression or `;` after `return`".to_string(), + "expected return value expression here".to_string(), + ), + }; + + let mut builder = DiagnosticBuilder::error(header_text); + + if let Some(span) = return_keyword_span + && !file.same_line(span.start, error.blame_span.end) + { + builder = builder.secondary_label( + span, + "after this `return`, a value expression or `;` was expected", + ); + } + + builder + .primary_label(error.blame_span, primary_text) + .code("P0022") + .build() +} + +pub(super) fn diagnostic_break_value_invalid_start<'src>( + error: ParseError, + file: &TokenizedFile<'src>, +) -> Diagnostic { + let break_keyword_span = error + .related_spans + .get(diagnostic_labels::EXPRESSION_REQUIRED_BY) + .copied(); + + let found = found_at(file, error.blame_span.end); + + let (header_text, primary_text) = match found { + FoundAt::Token(token_text) => ( + format!( + "expected break value expression or `;` after `break`, found `{}`", + token_text + ), + format!("unexpected `{}`", token_text), + ), + FoundAt::EndOfFile => ( + "expected break value expression or `;` after `break`, found end of file" + .to_string(), + "reached end of file here".to_string(), + ), + FoundAt::Unknown => ( + "expected break value expression or `;` after `break`".to_string(), + "expected break value expression here".to_string(), + ), + }; + + let mut builder = DiagnosticBuilder::error(header_text); + + if let Some(span) = break_keyword_span + && !file.same_line(span.start, error.blame_span.end) + { + builder = builder.secondary_label( + span, + "after this `break`, a value expression or `;` was expected", + ); + } + + builder + .primary_label(error.blame_span, primary_text) + .code("P0023") + .build() +} + +pub(super) fn diagnostic_goto_missing_label<'src>( + error: ParseError, + file: &TokenizedFile<'src>, +) -> Diagnostic { + let goto_keyword_span = error.related_spans.get("goto_keyword").copied(); + + let found = found_at(file, error.blame_span.end); + + let (header_text, primary_text) = match found { + FoundAt::Token(token_text) => ( + format!("expected label after `goto`, found `{}`", token_text), + format!("unexpected `{}`", token_text), + ), + FoundAt::EndOfFile => ( + "expected label after `goto`, found end of file".to_string(), + "reached end of file here".to_string(), + ), + FoundAt::Unknown => ( + "expected label after `goto`".to_string(), + "expected label here".to_string(), + ), + }; + + let mut builder = DiagnosticBuilder::error(header_text); + + if let Some(span) = goto_keyword_span + && !file.same_line(span.start, error.blame_span.end) + { + builder = builder.secondary_label(span, "after this `goto`, a label was expected"); + } + + builder + .primary_label(error.blame_span, primary_text) + .code("P0024") + .build() +} \ No newline at end of file diff --git a/rottlib/src/diagnostics/parse_error_diagnostics/mod.rs b/rottlib/src/diagnostics/parse_error_diagnostics/mod.rs index e659493..3fc5f19 100644 --- a/rottlib/src/diagnostics/parse_error_diagnostics/mod.rs +++ b/rottlib/src/diagnostics/parse_error_diagnostics/mod.rs @@ -13,8 +13,8 @@ use super::{Diagnostic, DiagnosticBuilder}; use crate::lexer::{TokenPosition, TokenSpan, TokenizedFile}; use crate::parser::{ParseError, ParseErrorKind}; -mod primary_expressions; mod control_flow_expressions; +mod primary_expressions; #[derive(Clone, Copy)] enum FoundAt<'src> { @@ -69,8 +69,8 @@ pub(crate) fn diagnostic_from_parse_error<'src>( error: ParseError, file: &TokenizedFile<'src>, ) -> Diagnostic { - use primary_expressions::*; use control_flow_expressions::*; + use primary_expressions::*; match error.kind { // primary_expressions.rs ParseErrorKind::ParenthesizedExpressionInvalidStart => { @@ -102,18 +102,11 @@ pub(crate) fn diagnostic_from_parse_error<'src>( diagnostic_new_argument_missing_comma(error, file) } // control_flow_expressions.rs - ParseErrorKind::ConditionExpected => { - diagnostic_condition_expected(error, file) - } + ParseErrorKind::ConditionExpected => diagnostic_condition_expected(error, file), ParseErrorKind::ControlFlowBodyExpected => { diagnostic_control_flow_body_expected(error, file) } - ParseErrorKind::DoMissingUntil => { - diagnostic_do_missing_until(error, file) - } - ParseErrorKind::ForEachIteratorExpressionExpected => { - diagnostic_for_each_iterator_expression_expected(error, file) - } + ParseErrorKind::DoMissingUntil => diagnostic_do_missing_until(error, file), ParseErrorKind::ForEachIteratorExpressionExpected => { diagnostic_for_each_iterator_expression_expected(error, file) } @@ -135,9 +128,14 @@ pub(crate) fn diagnostic_from_parse_error<'src>( ParseErrorKind::ForLoopHeaderMissingClosingParenthesis => { diagnostic_for_loop_header_missing_closing_parenthesis(error, file) } + ParseErrorKind::ReturnValueInvalidStart => { + diagnostic_return_value_invalid_start(error, file) + } + ParseErrorKind::BreakValueInvalidStart => diagnostic_break_value_invalid_start(error, file), + ParseErrorKind::GotoMissingLabel => diagnostic_goto_missing_label(error, file), _ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind)) .primary_label(error.covered_span, "happened here") .build(), } -} \ No newline at end of file +} diff --git a/rottlib/src/diagnostics/render.rs b/rottlib/src/diagnostics/render.rs index a33f603..795fab4 100644 --- a/rottlib/src/diagnostics/render.rs +++ b/rottlib/src/diagnostics/render.rs @@ -36,6 +36,9 @@ error: expected one of `,`, `:`, or `}`, found `token_to` 3. */ + +// TODO: check if blue guidelines are sometimes red or vice versa +// TODO: tabs needs to be replaced with 1-width character // These are abstract rendering events, not self-contained draw commands. // They are emitted in increasing order of "significant lines" (range starts/ends). // The actual source span for a label is recovered later from its LabelType. diff --git a/rottlib/src/parser/errors.rs b/rottlib/src/parser/errors.rs index ade3856..3931d3e 100644 --- a/rottlib/src/parser/errors.rs +++ b/rottlib/src/parser/errors.rs @@ -58,6 +58,12 @@ pub enum ParseErrorKind { ForLoopHeaderStepInvalidStart, /// P0021 ForLoopHeaderMissingClosingParenthesis, + /// P0022 + ReturnValueInvalidStart, + /// P0023 + BreakValueInvalidStart, + /// P0024 + GotoMissingLabel, // ================== Old errors to be thrown away! ================== /// Expression inside `(...)` could not be parsed and no closing `)` /// was found. @@ -93,8 +99,6 @@ pub enum ParseErrorKind { /// Found `case` arms after a `default` branch. SwitchCasesAfterDefault, SwitchMissingClosingBrace, - /// A `goto` was not followed by a label. - GotoMissingLabel, /// Unexpected end of input while parsing. UnexpectedEndOfFile, /// Token looked like a numeric literal but could not be parsed as one. diff --git a/rottlib/src/parser/grammar/expression/control_flow.rs b/rottlib/src/parser/grammar/expression/control_flow.rs deleted file mode 100644 index 12d9657..0000000 --- a/rottlib/src/parser/grammar/expression/control_flow.rs +++ /dev/null @@ -1,712 +0,0 @@ -//! Control expression parsing for Fermented `UnrealScript`. -//! -//! ## Condition boundary recovery and legacy compatibility -//! -//! Fermented `UnrealScript` allows omitting parentheses `(...)` around -//! condition expressions of `if`/`while`/`until` and similar constructs. -//! Conditions are therefore parsed as ordinary expressions by default. -//! -//! This means that a leading parenthesized expression may still be part of a -//! larger condition: -//! -//! ```unrealscript -//! if (2 + 2) * 2 < 7 { ... } -//! while (Index + 1) < Count DoWork(); -//! ``` -//! -//! For compatibility with older `UnrealScript` code, we apply one conservative -//! legacy cut-off rule: -//! -//! If the condition begins with a parenthesized expression, and the token after -//! the matching `)` is identifier-like, the parenthesized expression is treated -//! as the whole condition. The following identifier-like token is left for the -//! branch body. -//! -//! This prevents the parser from accidentally consuming the following -//! statement/body as part of the condition in older code such as: -//! -//! ```unrealscript -//! if ( AIController(Controller) != None ) Cross = vect(0,0,0); -//! ``` -//! -//! Without the legacy cut-off, a permissive expression parser could interpret -//! `Cross` as a continuation of the condition in dialects where identifier-like -//! tokens may participate in operator syntax. -//! -//! Operator tokens such as `*`, `+`, `<`, `==`, etc. do not trigger this -//! legacy cut-off. They allow the normal expression parser to continue the -//! condition. -//! -//! Trade-off: if an identifier-like token after the closing `)` was intended as -//! a custom/named operator, the parser prefers the legacy interpretation and -//! ends the condition at the closing `)`. Write the condition with additional -//! parentheses or use an unambiguous operator form. -//! -//! ## Disambiguation of `for` as loop vs expression -//! -//! Unlike other control-flow keywords, `for` is disambiguated from a functions -//! or variables with the same name. This is done syntactically in -//! [`Parser::is_for_loop_header_ahead`]: a `for` token followed by -//! a `(` whose contents contain a top-level `;` is unambiguously a loop header. -//! -//! This rule is lightweight, local, and robust, and mirrors the fixed grammar -//! `for (init; condition; step)` without requiring name resolution. -//! -//! ### Why this is not done for `if` / `while` / `do` -//! -//! No similarly reliable way to discriminate `if`, `while`, or related -//! keywords at this stage of parsing: their parenthesized forms are -//! indistinguishable from single argument function calls. -//! -//! Supporting these keywords as identifiers would complicate parsing -//! disproportionately and we always treat them as openers for conditional and -//! cycle expressions. This matches common `UnrealScript` usage and intentionally -//! drops support for moronic design choices where such names were reused -//! as variables or functions (like what author did by declaring -//! a `For` function in Acedia). -//! -//! ### But what about `switch`? -//! -//! `switch` is handled separately because, in existing `UnrealScript` code, -//! it may appear either as a keyword-led construct or as an identifier. -//! -//! Its disambiguation rule is simpler than for `for`: if the next token is -//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains -//! available as an identifier. -//! -//! This rule is local and purely syntactic, matching the behavior expected by -//! the existing codebase we support. The actual parsing of `switch` expressions -//! lives in a separate module because the construct itself is more involved -//! than the control-flow forms handled here. - -use crate::ast::{BranchBody, Expression, ExpressionRef, OptionalExpression}; -use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; -use crate::parser::{ - ParseErrorKind, ParseExpressionResult, ParseResult, Parser, ResultRecoveryExt, SyncLevel, - diagnostic_labels, -}; - -struct ParsedForHeader<'src, 'arena> { - initialization: OptionalExpression<'src, 'arena>, - condition: OptionalExpression<'src, 'arena>, - step: OptionalExpression<'src, 'arena>, - right_parenthesis_position: Option, -} - -impl<'src, 'arena> Parser<'src, 'arena> { - /// Returns whether a leading parenthesized condition should be cut off - /// at its closing `)` for legacy compatibility. - /// - /// This checks for the shape: - /// - /// ```text - /// ( ... ) identifier-like-token - /// ``` - /// - /// When this shape is found, the parser stops the condition at the matching - /// `)` and leaves the following identifier-like token for the branch body. - /// - /// This preserves old single-line forms such as: - /// - /// ```unrealscript - /// if (Condition) Cross = 7; - /// ``` - /// - /// while still allowing ordinary operator continuations such as: - /// - /// ```unrealscript - /// if (2 + 2) * 2 < 7 { ... } - /// ``` - fn should_apply_legacy_parenthesized_condition_cutoff(&mut self) -> bool { - if self.peek_token() != Some(Token::LeftParenthesis) { - return false; - } - return true; - let mut nesting_depth: usize = 1; - let mut lookahead_token_offset: usize = 1; - while let Some(next_token) = self.peek_token_at(lookahead_token_offset) { - match next_token { - Token::LeftParenthesis => nesting_depth += 1, - Token::RightParenthesis => { - if nesting_depth <= 1 { - return self - .peek_token_at(lookahead_token_offset + 1) - .map(|token| token.is_valid_identifier_name()) - .unwrap_or_default(); - } - nesting_depth -= 1; - } - _ => (), - } - lookahead_token_offset += 1; - } - // End-of-file is reached before finding matching `)` - a clear error; - // doesn't matter if we parse it like legacy condition. - false - } - - // TODO: note how weird returned result here is - /// Parses a control-flow condition. - /// - /// Conditions are parsed as ordinary expressions by default. - /// - /// For legacy compatibility, if the condition starts with a parenthesized - /// expression followed by an identifier-like token, the parenthesized - /// expression is treated as the complete condition and returned as - /// [`Expression::Parentheses`]. The following identifier-like token is left - /// for the branch body. - /// - /// This preserves old forms like: - /// - /// ```unrealscript - /// if (Condition) Cross -= 3; - /// ``` - /// - /// while still allowing common operator continuations like: - /// - /// ```unrealscript - /// if (2 + 2) * 2 < 7 { ... } - /// ``` - fn parse_condition( - &mut self, - error_kind: ParseErrorKind, - ) -> ParseExpressionResult<'src, 'arena> { - if self.next_token_definitely_cannot_start_expression() { - let keyword_position = self.last_consumed_position_or_start(); - let error_position = self.peek_position_or_eof(); - return Err(self - .make_error_at(error_kind, error_position) - .blame_token(error_position) - .related_token(diagnostic_labels::EXPRESSION_REQUIRED_BY, keyword_position)); - } - if self.should_apply_legacy_parenthesized_condition_cutoff() - && let Some(left_parenthesis_position) = self.eat_with_position(Token::LeftParenthesis) - { - Ok(self.parse_parenthesized_expression_tail(left_parenthesis_position)) - } else { - Ok(self.parse_expression()) - } - } - - /// Parses a branch body for a control-flow construct. - /// - /// Normalizes the following source forms into a [`BranchBody`]: - /// - /// - empty body with semicolon: `if (cond);` - /// - empty body before a closing `}`: `if (cond) }` - /// - non-empty block body: `if (cond) { ... }` - /// - non-empty single-expression body: `if (cond) expr;` - /// - /// For non-block bodies, this method consumes a trailing `;` when present - /// and records its position in the returned [`BranchBody`]. - fn parse_branch_body( - &mut self, - control_keyword_position: TokenPosition, - ) -> BranchBody<'src, 'arena> { - let Some((first_token, first_token_position)) = self.peek_token_and_position() else { - let error = self - .make_error_at_last_consumed(ParseErrorKind::ControlFlowBodyExpected) - .blame_token(self.file.eof()) - .related_token( - diagnostic_labels::EXPRESSION_REQUIRED_BY, - control_keyword_position, - ) - .related_token( - diagnostic_labels::EXPRESSION_EXPECTED_AFTER, - self.last_consumed_position_or_start(), - ); - let end_anchor_token_position = error.covered_span.end; - self.report_error(error); - return BranchBody { - expression: None, - semicolon_position: None, - end_anchor_token_position, - }; - }; - // `if (is_condition);` - if first_token == Token::Semicolon { - self.advance(); // ';' - return BranchBody { - expression: None, - semicolon_position: Some(first_token_position), - end_anchor_token_position: first_token_position, - }; - } - // `{ ... if (is_condition) }` - if first_token == Token::RightBrace { - return BranchBody { - expression: None, - semicolon_position: None, - end_anchor_token_position: self.last_consumed_position_or_start(), - }; - } - let branch_expression = self - .parse_expression_with_start_error( - ParseErrorKind::ControlFlowBodyExpected, - control_keyword_position, - self.last_consumed_position_or_start(), - ) - .unwrap_or_fallback(self); - let end_anchor_token_position = branch_expression.span().end; - // A block body in `if {...}` or `if {...};` owns its own terminator; - // a following `;` does not belong to the branch body. - if let Expression::Block(_) = *branch_expression { - return BranchBody { - expression: Some(branch_expression), - semicolon_position: None, - end_anchor_token_position, - }; - } - // For single-expression bodies, consume a trailing semicolon if present - let trailing_semicolon_position = if self.eat(Token::Semicolon) { - self.last_consumed_position() - } else { - None - }; - BranchBody { - expression: Some(branch_expression), - semicolon_position: trailing_semicolon_position, - end_anchor_token_position: trailing_semicolon_position - .unwrap_or(end_anchor_token_position), - } - } - - fn parse_condition_and_branch_body( - &mut self, - condition_context: TokenPosition, - error_kind: ParseErrorKind, - ) -> ParseResult<'src, 'arena, (ExpressionRef<'src, 'arena>, BranchBody<'src, 'arena>)> { - let first_position = self.peek_position_or_eof(); - let condition = self.parse_condition(error_kind)?; - if let Expression::Block(..) = *condition - && self.next_token_definitely_cannot_start_expression() - { - return Err(self - .make_error_at(error_kind, first_position) - .blame_token(first_position) - .related_token(diagnostic_labels::EXPRESSION_REQUIRED_BY, condition_context) - .related("branch_body", *condition.span())); - } - let body = self.parse_branch_body(condition_context); - Ok((condition, body)) - } - - /// Parses an `if` expression after the `if` keyword. - /// - /// The resulting [`Expression::If`] spans from `if_keyword_position` to the - /// end of the `if` body, or to the end of the `else` body if one is - /// present. - #[must_use] - pub(crate) fn parse_if_tail( - &mut self, - if_keyword_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - let (condition, body) = match self - .parse_condition_and_branch_body(if_keyword_position, ParseErrorKind::ConditionExpected) - { - Ok(good_result) => good_result, - Err(error) => return error.fallback(self), - }; - - let (else_body, if_end_position) = if self.peek_keyword() == Some(Keyword::Else) { - self.advance(); // 'else' - let else_body = self.parse_branch_body(self.last_consumed_position_or_start()); - let else_body_end = else_body.end_anchor_token_position; - (Some(else_body), else_body_end) - } else { - (None, body.end_anchor_token_position) - }; - - let span = TokenSpan::range(if_keyword_position, if_end_position); - self.arena.alloc_node( - Expression::If { - condition, - body, - else_body, - }, - span, - ) - } - - /// Parses a `while` expression after the `while` keyword. - /// - /// The resulting [`Expression::While`] spans from `while_keyword_position` - /// to the end of its body. - #[must_use] - pub(crate) fn parse_while_tail( - &mut self, - while_keyword_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - let (condition, body) = match self.parse_condition_and_branch_body( - while_keyword_position, - ParseErrorKind::ConditionExpected, - ) { - Ok(good_result) => good_result, - Err(error) => return error.fallback(self), - }; - let span = TokenSpan::range(while_keyword_position, body.end_anchor_token_position); - self.arena - .alloc_node(Expression::While { condition, body }, span) - } - - /// Parses a `do ... until ...` expression after the `do` keyword. - /// - /// The resulting [`Expression::DoUntil`] spans from `do_keyword_position` - /// to the end of the condition. - #[must_use] - pub(crate) fn parse_do_until_tail( - &mut self, - do_keyword_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - let body = self.parse_branch_body(do_keyword_position); - - let condition = if self - .expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil) - .widen_error_span_from(do_keyword_position) - .related_token("do_keyword", do_keyword_position) - .report_error(self) - { - crate::arena::ArenaNode::new_in( - Expression::Error, - TokenSpan::new(body.end_anchor_token_position), - self.arena, - ) - } else { - self.parse_condition(ParseErrorKind::ConditionExpected) - .related_token("do_keyword", do_keyword_position) - .unwrap_or_fallback(self) - }; - let span = TokenSpan::range(do_keyword_position, condition.span().end); - self.arena - .alloc_node(Expression::DoUntil { condition, body }, span) - } - - /// Parses a `foreach` expression after the `foreach` keyword. - /// - /// The iterator part is consumed as a regular expression, followed by a - /// branch body. - /// - /// The resulting [`Expression::ForEach`] spans from - /// `foreach_keyword_position` to the end of the body. - #[must_use] - pub(crate) fn parse_foreach_tail( - &mut self, - foreach_keyword_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - if self - .peek_token() - .map(|error| !error.is_valid_identifier_name()) - .unwrap_or_default() - { - let error_position = self.peek_position_or_eof(); - return self - .make_error_at( - ParseErrorKind::ForEachIteratorExpressionExpected, - error_position, - ) - .blame_token(error_position) - .related_token( - diagnostic_labels::EXPRESSION_REQUIRED_BY, - foreach_keyword_position, - ) - .fallback(self); - } - let iterated_expression = - match self.parse_condition(ParseErrorKind::ForEachIteratorExpressionExpected) { - Ok(good_result) => good_result, - Err(error) => return error.fallback(self), - }; - let body = self.parse_branch_body(foreach_keyword_position); - let span = TokenSpan::range(foreach_keyword_position, body.end_anchor_token_position); - self.arena.alloc_node( - Expression::ForEach { - iterated_expression, - body, - }, - span, - ) - } - - /// Returns whether the upcoming tokens have the syntactic shape of a - /// `for (...)` header. - /// - /// More precisely, this returns `true` iff the next token is `(` and a - /// top-level `;` appears before the matching `)` is closed or input ends. - /// - /// This is used only for loop-vs-identifier disambiguation. - pub(super) fn is_for_loop_header_ahead(&mut self) -> Option { - let Some((Token::LeftParenthesis, left_parenthesis_position)) = - self.peek_token_and_position() - else { - return None; - }; - let mut nesting_depth: usize = 1; - let mut lookahead_token_offset: usize = 1; - while let Some(next_token) = self.peek_token_at(lookahead_token_offset) { - match next_token { - Token::LeftParenthesis => nesting_depth += 1, - Token::RightParenthesis => { - if nesting_depth <= 1 { - // End of the immediate `for (...)` group without a - // top-level `;`: not a loop header. - return None; - } - nesting_depth -= 1; - } - Token::Semicolon if nesting_depth == 1 => return Some(left_parenthesis_position), - _ => (), - } - lookahead_token_offset += 1; - } - // EOF before closing the immediate `for (...)` group. Treat this as an - // incomplete `for` loop header, not as a function call, so recovery can - // produce `P0017` / `P0021`-style diagnostics. - Some(left_parenthesis_position) - //None - } - - /// Parses a `for` expression after the `for` keyword. - /// - /// This method expects the standard header shape - /// `for (initialization; condition; step)` and then parses a branch body. - /// - /// Each header component may be omitted. The resulting [`Expression::For`] - /// spans from `for_keyword_position` to the end of the body. - #[must_use] - pub(crate) fn parse_for_tail( - &mut self, - for_keyword_position: TokenPosition, - left_parenthesis_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - let header = self.parse_for_header(for_keyword_position, left_parenthesis_position); - if header.right_parenthesis_position.is_none() { - return self.arena.alloc_node( - Expression::Error, - TokenSpan::range(for_keyword_position, self.last_consumed_position_or_start()), - ); - } - let body = self.parse_branch_body(for_keyword_position); - let span = TokenSpan::range(for_keyword_position, body.end_anchor_token_position); - self.arena.alloc_node( - Expression::For { - initialization: header.initialization, - condition: header.condition, - step: header.step, - body, - }, - span, - ) - } - - fn parse_for_optional_expression( - &mut self, - bad_start_error_kind: crate::parser::ParseErrorKind, - stop_token: Token, - for_keyword_position: crate::lexer::TokenPosition, - left_parenthesis_position: crate::lexer::TokenPosition, - ) -> OptionalExpression<'src, 'arena> { - if let Some(next_token) = self.peek_token() - && next_token != stop_token - { - Some( - self.parse_expression_with_start_error( - bad_start_error_kind, - for_keyword_position, - left_parenthesis_position, - ) - .sync_error_until(self, SyncLevel::CloseParenthesis) - .unwrap_or_fallback(self), - ) - } else { - None - } - } - - fn parse_for_header( - &mut self, - for_keyword_position: TokenPosition, - left_parenthesis_position: TokenPosition, - ) -> ParsedForHeader<'src, 'arena> { - let mut header = ParsedForHeader { - initialization: None, - condition: None, - step: None, - right_parenthesis_position: None, - }; - header.initialization = self.parse_for_optional_expression( - ParseErrorKind::ForLoopHeaderInitializerInvalidStart, - Token::Semicolon, - for_keyword_position, - left_parenthesis_position, - ); - let error_token = match self.peek_token_and_position() { - Some((Token::Semicolon, _)) => { - self.advance(); - None - } - Some((Token::RightParenthesis, right_parenthesis_position)) => { - header.right_parenthesis_position = Some(right_parenthesis_position); - self.advance(); - Some(right_parenthesis_position) - } - Some((_, next_token_position)) => Some(next_token_position), - None => Some(self.peek_position_or_eof()), - }; - if let Some(error_token) = error_token { - if let Some(ref a) = header.initialization { - if matches!(**a, Expression::Error) { - return header; - } - } - let mut error = self - .make_error_at( - ParseErrorKind::ForLoopHeaderMissingSemicolonAfterInitializer, - error_token, - ) - .widen_error_span_from(for_keyword_position) - .blame_token(error_token); - if let Some(ref a) = header.initialization { - error = error.related("for_header_initializer", *a.span()) - }; - error.report_error(self); - return header; - } - let first_semicolon_position = self.last_consumed_position_or_start(); - header.condition = self.parse_for_optional_expression( - ParseErrorKind::ForLoopHeaderConditionInvalidStart, - Token::Semicolon, - for_keyword_position, - first_semicolon_position, - ); - let error_token = match self.peek_token_and_position() { - Some((Token::Semicolon, _)) => { - self.advance(); - None - } - Some((Token::RightParenthesis, right_parenthesis_position)) => { - header.right_parenthesis_position = Some(right_parenthesis_position); - self.advance(); - Some(right_parenthesis_position) - } - Some((_, next_token_position)) => Some(next_token_position), - None => Some(self.peek_position_or_eof()), - }; - if let Some(error_token) = error_token { - if let Some(ref a) = header.condition { - if matches!(**a, Expression::Error) { - return header; - } - } - let mut error = self - .make_error_at( - ParseErrorKind::ForLoopHeaderMissingSemicolonAfterCondition, - error_token, - ) - .widen_error_span_from(for_keyword_position) - .blame_token(error_token); - if let Some(ref a) = header.condition { - error = error.related("for_header_condition", *a.span()) - }; - error.report_error(self); - return header; - } - let second_semicolon_position = self.last_consumed_position_or_start(); - header.step = self.parse_for_optional_expression( - ParseErrorKind::ForLoopHeaderStepInvalidStart, - Token::RightParenthesis, - for_keyword_position, - second_semicolon_position, - ); - // ////////////////////////////////// - if let Some(ref a) = header.step - && matches!(**a, Expression::Error) - { - if let Some((Token::RightParenthesis, right_parenthesis_position)) = - self.peek_token_and_position() - { - header.right_parenthesis_position = Some(right_parenthesis_position); - self.advance(); - } - return header; - } - - header.right_parenthesis_position = self - .expect( - Token::RightParenthesis, - ParseErrorKind::ForLoopHeaderMissingClosingParenthesis, - ) - .widen_error_span_from(for_keyword_position) - .related_token("for_header_start", left_parenthesis_position) - .sync_error_at(self, SyncLevel::CloseParenthesis) - .ok_or_report(self); - // ////////////////////////////////// - header - } - - /// Parses the continuation of a `return` expression after its keyword. - /// - /// If the next token is not `;`, consumes a return value expression. - /// The terminating `;` is not consumed here. - #[must_use] - pub(crate) fn parse_return_tail( - &mut self, - return_keyword_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - let (value, span) = if self.peek_token() == Some(Token::Semicolon) { - (None, TokenSpan::new(return_keyword_position)) - } else { - let returned_value = self.parse_expression(); - let span = TokenSpan::range(return_keyword_position, returned_value.span().end); - (Some(returned_value), span) - }; - self.arena.alloc_node(Expression::Return(value), span) - } - - /// Parses the continuation of a `break` expression after its keyword. - /// - /// If the next token is not `;`, consumes a break value expression. - /// The terminating `;` is not consumed here. - #[must_use] - pub(crate) fn parse_break_tail( - &mut self, - break_keyword_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - let (value, span) = if self.peek_token() == Some(Token::Semicolon) { - (None, TokenSpan::new(break_keyword_position)) - } else { - let returned_value = self.parse_expression(); - let span = TokenSpan::range(break_keyword_position, returned_value.span().end); - (Some(returned_value), span) - }; - self.arena.alloc_node(Expression::Break(value), span) - } - - /// Parses the continuation of a `goto` expression after its keyword. - /// - /// Accepts either a name literal or an identifier as the target label. - #[must_use] - pub(crate) fn parse_goto_tail( - &mut self, - goto_keyword_position: TokenPosition, - ) -> ExpressionRef<'src, 'arena> { - if let Some((label_token, label_position)) = self.peek_token_and_position() - && (label_token == Token::NameLiteral || label_token == Token::Identifier) - { - self.advance(); - return self.arena.alloc_node_between( - Expression::Goto(label_position), - goto_keyword_position, - label_position, - ); - } - self.make_error_at_last_consumed(ParseErrorKind::GotoMissingLabel) - .widen_error_span_from(goto_keyword_position) - .sync_error_until(self, SyncLevel::Statement) - .report_error(self); - crate::arena::ArenaNode::new_in( - Expression::Error, - TokenSpan::new(goto_keyword_position), - self.arena, - ) - } -} diff --git a/rottlib/src/parser/grammar/expression/control_flow/for_loop.rs b/rottlib/src/parser/grammar/expression/control_flow/for_loop.rs new file mode 100644 index 0000000..7e3cfd9 --- /dev/null +++ b/rottlib/src/parser/grammar/expression/control_flow/for_loop.rs @@ -0,0 +1,282 @@ +//! Parser for `for` loop expressions in Fermented UnrealScript. + +use crate::ast::{Expression, ExpressionRef, OptionalExpression}; +use crate::lexer::{self, Token, TokenPosition}; +use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; + +#[derive(Debug)] +struct ParsedForHeader<'src, 'arena> { + initializer: OptionalExpression<'src, 'arena>, + condition: OptionalExpression<'src, 'arena>, + step: OptionalExpression<'src, 'arena>, + right_parenthesis_position: Option, +} + +impl<'src, 'arena> ParsedForHeader<'src, 'arena> { + #[must_use] + fn new() -> Self { + Self { + initializer: None, + condition: None, + step: None, + right_parenthesis_position: None, + } + } +} + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Returns the position of the next `(` when it can start a `for (...)` + /// header. + /// + /// Recognizes a header by a top-level `;` before the matching `)`. + /// Incomplete headers are accepted so later parsing can produce + /// `for`-specific diagnostics. + /// + /// Performs a lookahead-only check used for loop-vs-identifier + /// disambiguation. + pub(in super::super) fn peek_for_loop_header_left_parenthesis_position( + &mut self, + ) -> Option { + let Some((Token::LeftParenthesis, left_parenthesis_position)) = + self.peek_token_and_position() + else { + return None; + }; + let mut nesting_depth: usize = 1; + let mut lookahead_offset: usize = 1; + while let Some(next_token) = self.peek_token_at(lookahead_offset) { + match next_token { + Token::LeftParenthesis => nesting_depth += 1, + Token::RightParenthesis => { + if nesting_depth <= 1 { + // A closed immediate group without a top-level `;` + // is not a loop header. + return None; + } + nesting_depth -= 1; + } + Token::Semicolon if nesting_depth == 1 => return Some(left_parenthesis_position), + _ => (), + } + lookahead_offset += 1; + } + // Prefer treating incomplete `for (` as a loop header so recovery + // reports header diagnostics instead of call-like diagnostics. + Some(left_parenthesis_position) + } + + /// Parses a `for` expression after `for` and the opening `(` have been + /// consumed. + /// + /// Header components may be omitted. Returns [`Expression::Error`] if the + /// header cannot be closed; otherwise returns [`Expression::For`] spanning + /// from `for_keyword_position` through the parsed body. + #[must_use] + pub(in super::super) fn parse_for_tail( + &mut self, + for_keyword_position: TokenPosition, + left_parenthesis_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let header = self.parse_for_header(for_keyword_position, left_parenthesis_position); + if header.right_parenthesis_position.is_none() { + return self.arena.alloc_node( + Expression::Error, + lexer::TokenSpan::range( + for_keyword_position, + self.last_consumed_position_or_start(), + ), + ); + } + let body = self.parse_branch_body(for_keyword_position); + let span = lexer::TokenSpan::range(for_keyword_position, body.end_anchor_token_position); + self.arena.alloc_node( + Expression::For { + initializer: header.initializer, + condition: header.condition, + step: header.step, + body, + }, + span, + ) + } + + /// Parses one optional `for` header component before `terminator_token`. + /// + /// Uses `component_start_anchor_position` to anchor invalid-start + /// diagnostics at the position where an expression was expected. + fn parse_optional_for_header_expression( + &mut self, + invalid_start_error_kind: ParseErrorKind, + terminator_token: Token, + for_keyword_position: TokenPosition, + component_start_anchor_position: TokenPosition, + ) -> OptionalExpression<'src, 'arena> { + if let Some(next_token) = self.peek_token() + && next_token != terminator_token + { + Some( + self.parse_expression_with_start_error( + invalid_start_error_kind, + for_keyword_position, + component_start_anchor_position, + ) + // Header recovery must not consume the next `;`; + // it belongs to the surrounding `for` header. + .sync_error_until(self, SyncLevel::CloseParenthesis) + .unwrap_or_fallback(self), + ) + } else { + None + } + } + + fn optional_expression_is_error(expression: &OptionalExpression<'src, 'arena>) -> bool { + expression + .as_ref() + .is_some_and(|expression| matches!(**expression, Expression::Error)) + } + + /// Consumes the next header semicolon or recovers at `)`. + /// + /// Suppresses the missing-semicolon diagnostic when the component + /// expression has already failed. + fn consume_for_header_semicolon_or_recover( + &mut self, + for_keyword_position: TokenPosition, + right_parenthesis_position: &mut Option, + component_expression: &OptionalExpression<'src, 'arena>, + missing_semicolon_error_kind: ParseErrorKind, + component_diagnostic_label: &'static str, + ) -> bool { + let diagnostic_position = match self.peek_token_and_position() { + Some((Token::Semicolon, _)) => { + self.advance(); + return true; + } + Some((Token::RightParenthesis, position)) => { + *right_parenthesis_position = Some(position); + self.advance(); + position + } + Some((_, position)) => position, + None => self.peek_position_or_eof(), + }; + // Do not add a missing-semicolon error on top of a component + // parse error. + if Self::optional_expression_is_error(component_expression) { + return false; + } + let mut error = self + .make_error_at(missing_semicolon_error_kind, diagnostic_position) + .widen_error_span_from(for_keyword_position) + .blame_token(diagnostic_position); + if let Some(expression) = component_expression { + error = error.related(component_diagnostic_label, *expression.span()); + } + error.report_error(self); + false + } + + /// Finishes recovery after an invalid step expression. + /// + /// Consumes a following `)` when present and suppresses the missing-`)` + /// check to avoid a cascading header diagnostic. Returns `true` when header + /// parsing should stop. + fn recover_after_invalid_step_expression( + &mut self, + header: &mut ParsedForHeader<'src, 'arena>, + ) -> bool { + if !Self::optional_expression_is_error(&header.step) { + return false; + } + if let Some((Token::RightParenthesis, right_parenthesis_position)) = + self.peek_token_and_position() + { + header.right_parenthesis_position = Some(right_parenthesis_position); + self.advance(); + } + true + } + + /// Consumes the closing `)` of a `for` header or reports + /// a header-level error. + /// + /// Links the diagnostic back to the opening `(` so incomplete headers point + /// to the whole header region. + fn consume_for_header_closing_parenthesis( + &mut self, + for_keyword_position: TokenPosition, + left_parenthesis_position: TokenPosition, + header: &mut ParsedForHeader<'src, 'arena>, + ) { + header.right_parenthesis_position = self + .expect( + Token::RightParenthesis, + ParseErrorKind::ForLoopHeaderMissingClosingParenthesis, + ) + .widen_error_span_from(for_keyword_position) + .related_token("for_header_start", left_parenthesis_position) + .sync_error_at(self, SyncLevel::CloseParenthesis) + .ok_or_report(self); + } + + /// Parses the initializer, condition, and step expressions of a `for` + /// header. + /// + /// Stops after the first unrecovered separator error so later recovery + /// does not produce duplicate header diagnostics. + fn parse_for_header( + &mut self, + for_keyword_position: TokenPosition, + left_parenthesis_position: TokenPosition, + ) -> ParsedForHeader<'src, 'arena> { + let mut header = ParsedForHeader::new(); + header.initializer = self.parse_optional_for_header_expression( + ParseErrorKind::ForLoopHeaderInitializerInvalidStart, + Token::Semicolon, + for_keyword_position, + left_parenthesis_position, + ); + if !self.consume_for_header_semicolon_or_recover( + for_keyword_position, + &mut header.right_parenthesis_position, + &header.initializer, + ParseErrorKind::ForLoopHeaderMissingSemicolonAfterInitializer, + "for_header_initializer", + ) { + return header; + } + let initializer_semicolon_position = self.last_consumed_position_or_start(); + header.condition = self.parse_optional_for_header_expression( + ParseErrorKind::ForLoopHeaderConditionInvalidStart, + Token::Semicolon, + for_keyword_position, + initializer_semicolon_position, + ); + if !self.consume_for_header_semicolon_or_recover( + for_keyword_position, + &mut header.right_parenthesis_position, + &header.condition, + ParseErrorKind::ForLoopHeaderMissingSemicolonAfterCondition, + "for_header_condition", + ) { + return header; + } + let condition_semicolon_position = self.last_consumed_position_or_start(); + header.step = self.parse_optional_for_header_expression( + ParseErrorKind::ForLoopHeaderStepInvalidStart, + Token::RightParenthesis, + for_keyword_position, + condition_semicolon_position, + ); + if self.recover_after_invalid_step_expression(&mut header) { + return header; + } + self.consume_for_header_closing_parenthesis( + for_keyword_position, + left_parenthesis_position, + &mut header, + ); + header + } +} diff --git a/rottlib/src/parser/grammar/expression/control_flow/mod.rs b/rottlib/src/parser/grammar/expression/control_flow/mod.rs new file mode 100644 index 0000000..3712e40 --- /dev/null +++ b/rottlib/src/parser/grammar/expression/control_flow/mod.rs @@ -0,0 +1,502 @@ +//! Control expression parsing for Fermented `UnrealScript`. +//! +//! ## Condition boundary recovery and legacy compatibility +//! +//! Fermented `UnrealScript` allows omitting parentheses `(...)` around +//! condition expressions of `if`/`while`/`until` and similar constructs. +//! Conditions are therefore parsed as ordinary expressions by default. +//! +//! This means that a leading parenthesized expression may still be part of a +//! larger condition: +//! +//! ```unrealscript +//! if (2 + 2) * 2 < 7 { ... } +//! while (Index + 1) < Count DoWork(); +//! ``` +//! +//! For compatibility with older `UnrealScript` code, we apply one conservative +//! legacy cut-off rule: +//! +//! If the condition begins with a parenthesized expression, and the token after +//! the matching `)` is identifier-like, the parenthesized expression is treated +//! as the whole condition. The following identifier-like token is left for the +//! branch body. +//! +//! This prevents the parser from accidentally consuming the following +//! statement/body as part of the condition in older code such as: +//! +//! ```unrealscript +//! if ( AIController(Controller) != None ) Cross = vect(0,0,0); +//! ``` +//! +//! Without the legacy cut-off, a permissive expression parser could interpret +//! `Cross` as a continuation of the condition in dialects where identifier-like +//! tokens may participate in operator syntax. +//! +//! Operator tokens such as `*`, `+`, `<`, `==`, etc. do not trigger this +//! legacy cut-off. They allow the normal expression parser to continue the +//! condition. +//! +//! Trade-off: if an identifier-like token after the closing `)` was intended as +//! a custom/named operator, the parser prefers the legacy interpretation and +//! ends the condition at the closing `)`. Write the condition with additional +//! parentheses or use an unambiguous operator form. +//! +//! ## Disambiguation of `for` as loop vs expression +//! +//! Unlike other control-flow keywords, `for` is disambiguated from functions +//! and variables with the same name. This is done syntactically in +//! [`Parser::is_for_loop_header_ahead`]: a `for` token followed by +//! a `(` whose contents contain a top-level `;` is unambiguously a loop header. +//! +//! This rule is lightweight, local, and robust, and mirrors the fixed grammar +//! `for (init; condition; step)` without requiring name resolution. +//! +//! ### Why this is not done for `if` / `while` / `do` +//! +//! There is no similarly reliable way to discriminate `if`, `while`, or related +//! keywords at this stage of parsing: their parenthesized forms are +//! indistinguishable from single argument function calls. +//! +//! Supporting these keywords as identifiers would complicate parsing +//! disproportionately and we always treat them as openers for conditional and +//! loop expressions. This matches common `UnrealScript` usage and +//! intentionally drops support for moronic design choices where such names were +//! reused as variables or functions (like what author did by declaring +//! a `For` function in Acedia). +//! +//! ### But what about `switch`? +//! +//! `switch` is handled separately because, in existing `UnrealScript` code, +//! it may appear either as a keyword-led construct or as an identifier. +//! +//! Its disambiguation rule is simpler than for `for`: if the next token is +//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains +//! available as an identifier. +//! +//! This rule is local and purely syntactic, matching the behavior expected by +//! the existing codebase we support. The actual parsing of `switch` expressions +//! lives in a separate module because the construct itself is more involved +//! than the control-flow forms handled here. + +use crate::ast::{BranchBody, Expression, ExpressionRef}; +use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; +use crate::parser::{ + ParseErrorKind, ParseExpressionResult, ParseResult, Parser, ResultRecoveryExt, + diagnostic_labels, +}; + +pub(super) mod for_loop; + +impl<'src, 'arena> Parser<'src, 'arena> { + /// Returns the opening `(` of a legacy parenthesized condition cut-off + /// when it applies. + fn find_legacy_parenthesized_condition_opening_position(&mut self) -> Option { + let Some((Token::LeftParenthesis, left_parenthesis_position)) = + self.peek_token_and_position() + else { + return None; + }; + let mut nesting_depth: usize = 1; + let mut lookahead_token_offset: usize = 1; + while let Some(lookahead_token) = self.peek_token_at(lookahead_token_offset) { + match lookahead_token { + Token::LeftParenthesis => nesting_depth += 1, + Token::RightParenthesis => { + if nesting_depth == 1 { + return self + .peek_token_at(lookahead_token_offset + 1) + .is_some_and(|token| token.is_valid_identifier_name()) + .then_some(left_parenthesis_position); + } + nesting_depth -= 1; + } + _ => (), + } + lookahead_token_offset += 1; + } + // Recovery is left to normal expression parsing when the closing `)` + // is missing. + None + } + + /// Parses a control-flow condition. + /// + /// Conditions are parsed as ordinary expressions unless the legacy + /// parenthesized-condition cut-off applies. + fn parse_condition( + &mut self, + invalid_start_error_kind: ParseErrorKind, + ) -> ParseExpressionResult<'src, 'arena> { + if self.next_token_definitely_cannot_start_expression() { + let keyword_position = self.last_consumed_position_or_start(); + let error_position = self.peek_position_or_eof(); + return Err(self + .make_error_at(invalid_start_error_kind, error_position) + .blame_token(error_position) + .related_token(diagnostic_labels::EXPRESSION_REQUIRED_BY, keyword_position)); + } + if let Some(left_parenthesis_position) = + self.find_legacy_parenthesized_condition_opening_position() + { + self.advance(); + Ok(self.parse_parenthesized_expression_tail(left_parenthesis_position)) + } else { + Ok(self.parse_expression()) + } + } + + /// Parses a branch body for a control-flow construct. + /// + /// Normalizes the following source forms into a [`BranchBody`]: + /// + /// - empty body with semicolon: `if (cond);` + /// - empty body before a closing `}`: `if (cond) }` + /// - non-empty block body: `if (cond) { ... }` + /// - non-empty single-expression body: `if (cond) expr;` + /// + /// For non-block bodies, this method consumes a trailing `;` when present + /// and records its position in the returned [`BranchBody`]. + fn parse_branch_body( + &mut self, + branch_owner_keyword_position: TokenPosition, + ) -> BranchBody<'src, 'arena> { + let Some((first_token, first_token_position)) = self.peek_token_and_position() else { + return self.recover_missing_branch_body(branch_owner_keyword_position); + }; + // `if (is_condition);` + if first_token == Token::Semicolon { + return self.parse_empty_semicolon_branch_body(first_token_position); + } + // `{ ... if (is_condition) }` + if first_token == Token::RightBrace { + return self.make_empty_branch_body_before_closing_brace(); + } + self.parse_non_empty_branch_body(branch_owner_keyword_position) + } + + fn recover_missing_branch_body( + &mut self, + branch_owner_keyword_position: TokenPosition, + ) -> BranchBody<'src, 'arena> { + let error = self + .make_error_at_last_consumed(ParseErrorKind::ControlFlowBodyExpected) + .blame_token(self.file.eof()) + .related_token( + diagnostic_labels::EXPRESSION_REQUIRED_BY, + branch_owner_keyword_position, + ) + .related_token( + diagnostic_labels::EXPRESSION_EXPECTED_AFTER, + self.last_consumed_position_or_start(), + ); + let end_anchor_token_position = error.covered_span.end; + self.report_error(error); + BranchBody { + expression: None, + semicolon_position: None, + end_anchor_token_position, + } + } + + fn parse_empty_semicolon_branch_body( + &mut self, + semicolon_position: TokenPosition, + ) -> BranchBody<'src, 'arena> { + self.advance(); // ';' + BranchBody { + expression: None, + semicolon_position: Some(semicolon_position), + end_anchor_token_position: semicolon_position, + } + } + + fn make_empty_branch_body_before_closing_brace(&mut self) -> BranchBody<'src, 'arena> { + BranchBody { + expression: None, + semicolon_position: None, + end_anchor_token_position: self.last_consumed_position_or_start(), + } + } + + fn parse_non_empty_branch_body( + &mut self, + branch_owner_keyword_position: TokenPosition, + ) -> BranchBody<'src, 'arena> { + let branch_expression = self + .parse_expression_with_start_error( + ParseErrorKind::ControlFlowBodyExpected, + branch_owner_keyword_position, + self.last_consumed_position_or_start(), + ) + .unwrap_or_fallback(self); + let end_anchor_token_position = branch_expression.span().end; + // A block body in `if {...}` or `if {...};` owns its own terminator; + // a following `;` does not belong to the branch body. + if let Expression::Block(_) = *branch_expression { + return BranchBody { + expression: Some(branch_expression), + semicolon_position: None, + end_anchor_token_position, + }; + } + // Single-expression bodies own their optional trailing `;`. + let trailing_semicolon_position = if self.eat(Token::Semicolon) { + self.last_consumed_position() + } else { + None + }; + BranchBody { + expression: Some(branch_expression), + semicolon_position: trailing_semicolon_position, + end_anchor_token_position: trailing_semicolon_position + .unwrap_or(end_anchor_token_position), + } + } + + fn parse_condition_and_branch_body( + &mut self, + branch_owner_keyword_position: TokenPosition, + condition_expected_error_kind: ParseErrorKind, + ) -> ParseResult<'src, 'arena, (ExpressionRef<'src, 'arena>, BranchBody<'src, 'arena>)> { + let condition_start_position = self.peek_position_or_eof(); + let condition = self.parse_condition(condition_expected_error_kind)?; + // Prefer diagnosing `if {...}` as a missing condition instead of + // treating the block as the condition and then reporting + // a missing body. + if let Expression::Block(..) = *condition + && self.next_token_definitely_cannot_start_expression() + { + return Err(self + .make_error_at(condition_expected_error_kind, condition_start_position) + .blame_token(condition_start_position) + .related_token( + diagnostic_labels::EXPRESSION_REQUIRED_BY, + branch_owner_keyword_position, + ) + .related("branch_body", *condition.span())); + } + let branch_body = self.parse_branch_body(branch_owner_keyword_position); + Ok((condition, branch_body)) + } + + /// Parses an `if` expression after the `if` keyword. + /// + /// The resulting [`Expression::If`] spans from `if_keyword_position` to the + /// end of the `if` body, or to the end of the `else` body if one is + /// present. + #[must_use] + pub(super) fn parse_if_tail( + &mut self, + if_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let (condition, then_body) = match self + .parse_condition_and_branch_body(if_keyword_position, ParseErrorKind::ConditionExpected) + { + Ok(condition_and_body) => condition_and_body, + Err(error) => return error.fallback(self), + }; + + let (else_body, if_expression_end_position) = if self.peek_keyword() == Some(Keyword::Else) + { + self.advance(); // 'else' + let else_body = self.parse_branch_body(self.last_consumed_position_or_start()); + let else_body_end_position = else_body.end_anchor_token_position; + (Some(else_body), else_body_end_position) + } else { + (None, then_body.end_anchor_token_position) + }; + + let span = TokenSpan::range(if_keyword_position, if_expression_end_position); + self.arena.alloc_node( + Expression::If { + condition, + then_body, + else_body, + }, + span, + ) + } + + /// Parses a `while` expression after the `while` keyword. + /// + /// The resulting [`Expression::While`] spans from `while_keyword_position` + /// to the end of its body. + #[must_use] + pub(super) fn parse_while_tail( + &mut self, + while_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let (condition, body) = match self.parse_condition_and_branch_body( + while_keyword_position, + ParseErrorKind::ConditionExpected, + ) { + Ok(condition_and_body) => condition_and_body, + Err(error) => return error.fallback(self), + }; + let span = TokenSpan::range(while_keyword_position, body.end_anchor_token_position); + self.arena + .alloc_node(Expression::While { condition, body }, span) + } + + /// Parses a `do ... until ...` expression after the `do` keyword. + /// + /// The resulting [`Expression::DoUntil`] spans from `do_keyword_position` + /// to the end of the condition. + #[must_use] + pub(super) fn parse_do_until_tail( + &mut self, + do_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let body = self.parse_branch_body(do_keyword_position); + + let until_keyword_was_missing = self + .expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil) + .widen_error_span_from(do_keyword_position) + .related_token("do_keyword", do_keyword_position) + .report_error(self); + let condition = if until_keyword_was_missing { + self.make_error_expression_at(body.end_anchor_token_position) + } else { + self.parse_condition(ParseErrorKind::ConditionExpected) + .related_token("do_keyword", do_keyword_position) + .unwrap_or_fallback(self) + }; + let span = TokenSpan::range(do_keyword_position, condition.span().end); + self.arena + .alloc_node(Expression::DoUntil { condition, body }, span) + } + + /// Parses a `foreach` expression after the `foreach` keyword. + /// + /// The iterator expression must start with an identifier-like token. Later + /// stages validate its full shape. + /// + /// The resulting [`Expression::ForEach`] spans from + /// `foreach_keyword_position` to the end of the body. + #[must_use] + pub(super) fn parse_foreach_tail( + &mut self, + foreach_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + // End-of-file is allowed through so the shared condition parser can + // produce the missing-iterator diagnostic. + if self + .peek_token() + .is_some_and(|token| !token.is_valid_identifier_name()) + { + let error_position = self.peek_position_or_eof(); + return self + .make_error_at( + ParseErrorKind::ForEachIteratorExpressionExpected, + error_position, + ) + .blame_token(error_position) + .related_token( + diagnostic_labels::EXPRESSION_REQUIRED_BY, + foreach_keyword_position, + ) + .fallback(self); + } + let iterator_expression = + match self.parse_condition(ParseErrorKind::ForEachIteratorExpressionExpected) { + Ok(iterator_expression) => iterator_expression, + Err(error) => return error.fallback(self), + }; + let body = self.parse_branch_body(foreach_keyword_position); + let span = TokenSpan::range(foreach_keyword_position, body.end_anchor_token_position); + self.arena.alloc_node( + Expression::ForEach { + iterator_expression, + body, + }, + span, + ) + } + + /// Parses a `return` expression after the `return` keyword. + /// + /// Consumes an optional return value expression. The terminating `;` is + /// left for the surrounding expression parser. + #[must_use] + pub(super) fn parse_return_tail( + &mut self, + return_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let (return_value, span) = match self.peek_token() { + // Non-`;` followers are parsed here so invalid return values are + // reported as return-specific diagnostics. + None | Some(Token::Semicolon) => (None, TokenSpan::new(return_keyword_position)), + _ => { + let return_value = self + .parse_expression_with_start_error( + ParseErrorKind::ReturnValueInvalidStart, + return_keyword_position, + return_keyword_position, + ) + .unwrap_or_fallback(self); + let span = TokenSpan::range(return_keyword_position, return_value.span().end); + (Some(return_value), span) + } + }; + self.arena + .alloc_node(Expression::Return(return_value), span) + } + + /// Parses a `break` expression after the `break` keyword. + /// + /// Consumes an optional break value expression. The terminating `;` is left + /// for the surrounding expression parser. + #[must_use] + pub(super) fn parse_break_tail( + &mut self, + break_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + let (break_value, span) = match self.peek_token() { + // Non-`;` followers are parsed here so invalid break values are + // reported as break-specific diagnostics. + None | Some(Token::Semicolon) => (None, TokenSpan::new(break_keyword_position)), + _ => { + let break_value = self + .parse_expression_with_start_error( + ParseErrorKind::BreakValueInvalidStart, + break_keyword_position, + break_keyword_position, + ) + .unwrap_or_fallback(self); + let span = TokenSpan::range(break_keyword_position, break_value.span().end); + (Some(break_value), span) + } + }; + self.arena.alloc_node(Expression::Break(break_value), span) + } + + /// Parses the continuation of a `goto` expression after its keyword. + /// + /// Accepts either a name literal or an identifier as the target label. + #[must_use] + pub(super) fn parse_goto_tail( + &mut self, + goto_keyword_position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + // Labels may be written either as UnrealScript name literals or + // as bare names. + if let Some((label_token, label_position)) = self.peek_token_and_position() + && (label_token == Token::NameLiteral || label_token.is_valid_identifier_name()) + { + self.advance(); + return self.arena.alloc_node_between( + Expression::Goto(label_position), + goto_keyword_position, + label_position, + ); + } + let error_position = self.peek_position_or_eof(); + self.make_error_at_last_consumed(ParseErrorKind::GotoMissingLabel) + .widen_error_span_from(goto_keyword_position) + .blame_token(error_position) + .related_token("goto_keyword", goto_keyword_position) + .report_error(self); + self.make_error_expression_at(goto_keyword_position) + } +} diff --git a/rottlib/src/parser/grammar/expression/pratt.rs b/rottlib/src/parser/grammar/expression/pratt.rs index 9c590c0..60a8d26 100644 --- a/rottlib/src/parser/grammar/expression/pratt.rs +++ b/rottlib/src/parser/grammar/expression/pratt.rs @@ -33,6 +33,7 @@ //! - [`super::precedence`] - operator precedence definitions use crate::ast::{self, Expression, ExpressionRef}; +use crate::lexer::TokenPosition; use crate::parser::{self, ParseExpressionResult, Parser, ResultRecoveryExt, diagnostic_labels}; pub use super::precedence::PrecedenceRank; @@ -108,6 +109,17 @@ impl<'src, 'arena> Parser<'src, 'arena> { self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST) } + pub(super) fn make_error_expression_at( + &self, + position: TokenPosition, + ) -> ExpressionRef<'src, 'arena> { + crate::arena::ArenaNode::new_in( + Expression::Error, + crate::lexer::TokenSpan::new(position), + self.arena, + ) + } + /// Parses an expression, including only operators with binding power /// at least `min_precedence_rank` (as tight or tighter). fn parse_expression_with_min_precedence_rank( diff --git a/rottlib/src/parser/grammar/expression/primary/mod.rs b/rottlib/src/parser/grammar/expression/primary/mod.rs index 93bb5ff..ca8eff9 100644 --- a/rottlib/src/parser/grammar/expression/primary/mod.rs +++ b/rottlib/src/parser/grammar/expression/primary/mod.rs @@ -119,7 +119,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { // These keywords remain valid identifiers unless the following // tokens commit to the keyword-led form. Keyword::For - if let Some(left_parenthesis_position) = self.is_for_loop_header_ahead() => + if let Some(left_parenthesis_position) = self.peek_for_loop_header_left_parenthesis_position() => { self.advance(); // `(` self.parse_for_tail(token_position, left_parenthesis_position) diff --git a/rottlib/src/parser/grammar/expression/primary/new.rs b/rottlib/src/parser/grammar/expression/primary/new.rs index f2ef6ec..17cd00d 100644 --- a/rottlib/src/parser/grammar/expression/primary/new.rs +++ b/rottlib/src/parser/grammar/expression/primary/new.rs @@ -332,11 +332,10 @@ impl<'src, 'arena> Parser<'src, 'arena> { return self.report_error_with_fallback(error); } NewClassSpecifierParseAction::Parse => self.parse_expression(), - NewClassSpecifierParseAction::Skip => crate::arena::ArenaNode::new_in( - Expression::Error, - TokenSpan::new(self.peek_position_or_eof()), - self.arena, - ), + NewClassSpecifierParseAction::Skip => { + let error_position = self.peek_position_or_eof(); + self.make_error_expression_at(error_position) + } } } } diff --git a/rottlib/tests/parser_diagnostics/control_flow_expressions.rs b/rottlib/tests/parser_diagnostics/control_flow_expressions.rs index 47ba80d..5990f45 100644 --- a/rottlib/tests/parser_diagnostics/control_flow_expressions.rs +++ b/rottlib/tests/parser_diagnostics/control_flow_expressions.rs @@ -685,23 +685,38 @@ pub(super) const FOR_HEADER_FIXTURES: &[Fixture] = &[ ]; #[test] -fn check_for_header_fixture_counts() { +fn check_p0016_for_header_fixture_counts() { let runs = run_fixtures(FOR_HEADER_FIXTURES); assert_eq!(runs.get("files/P0016_01.uc").unwrap().len(), 3); assert_eq!(runs.get("files/P0016_02.uc").unwrap().len(), 2); assert_eq!(runs.get("files/P0016_03.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0016_06.uc").unwrap().len(), 3); +} + +#[test] +fn check_p0017_for_header_fixture_counts() { + let runs = run_fixtures(FOR_HEADER_FIXTURES); assert_eq!(runs.get("files/P0017_01.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0017_02.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0017_04.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0017_05.uc").unwrap().len(), 1); +} + +#[test] +fn check_p0018_for_header_fixture_counts() { + let runs = run_fixtures(FOR_HEADER_FIXTURES); assert_eq!(runs.get("files/P0018_01.uc").unwrap().len(), 2); assert_eq!(runs.get("files/P0018_02.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0018_03.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0018_06.uc").unwrap().len(), 1); +} + +#[test] +fn check_p0019_for_header_fixture_counts() { + let runs = run_fixtures(FOR_HEADER_FIXTURES); assert_eq!(runs.get("files/P0019_01.uc").unwrap().len(), 2); assert_eq!(runs.get("files/P0019_02.uc").unwrap().len(), 1); @@ -709,12 +724,22 @@ fn check_for_header_fixture_counts() { assert_eq!(runs.get("files/P0019_04.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0019_06.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0019_07.uc").unwrap().len(), 1); +} + +#[test] +fn check_p0020_for_header_fixture_counts() { + let runs = run_fixtures(FOR_HEADER_FIXTURES); assert_eq!(runs.get("files/P0020_01.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0020_02.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0020_03.uc").unwrap().len(), 2); assert_eq!(runs.get("files/P0020_04.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0020_08.uc").unwrap().len(), 1); +} + +#[test] +fn check_p0021_for_header_fixture_counts() { + let runs = run_fixtures(FOR_HEADER_FIXTURES); assert_eq!(runs.get("files/P0021_01.uc").unwrap().len(), 1); assert_eq!(runs.get("files/P0021_02.uc").unwrap().len(), 1); @@ -1493,7 +1518,7 @@ fn check_p0021_for_header_fixtures() { } #[test] -fn check_for_header_body_recovery_fixtures() { +fn check_p0013_for_header_body_recovery_fixtures() { let runs = run_fixtures(FOR_HEADER_FIXTURES); assert_diagnostic( @@ -1590,4 +1615,307 @@ fn check_for_header_body_recovery_fixtures() { notes: &[], }, ); +} + +pub(super) const P0022_FIXTURES: &[Fixture] = &[ + Fixture { + label: "files/P0022_01.uc", + source: "return ] ;", + }, + Fixture { + label: "files/P0022_02.uc", + source: "return\n ]\n;\n", + }, + Fixture { + label: "files/P0022_03.uc", + source: "return\n}\n", + }, +]; + +#[test] +fn check_p0022_fixtures() { + let runs = run_fixtures(P0022_FIXTURES); + + assert_eq!(runs.get("files/P0022_01.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0022_02.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0022_03.uc").unwrap().len(), 1); + + assert_diagnostic( + &runs.get_any("files/P0022_01.uc"), + &ExpectedDiagnostic { + headline: "expected return value expression or `;` after `return`, found `]`", + severity: Severity::Error, + code: Some("P0022"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(2), + end: TokenPosition(2), + }, + message: "unexpected `]`", + }), + secondary_labels: &[], + help: None, + notes: &[], + }, + ); + + assert_diagnostic( + &runs.get_any("files/P0022_02.uc"), + &ExpectedDiagnostic { + headline: "expected return value expression or `;` after `return`, found `]`", + severity: Severity::Error, + code: Some("P0022"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(3), + end: TokenPosition(3), + }, + message: "unexpected `]`", + }), + secondary_labels: &[ExpectedLabel { + span: TokenSpan { + start: TokenPosition(0), + end: TokenPosition(0), + }, + message: "after this `return`, a value expression or `;` was expected", + }], + help: None, + notes: &[], + }, + ); + + assert_diagnostic( + &runs.get_any("files/P0022_03.uc"), + &ExpectedDiagnostic { + headline: "expected return value expression or `;` after `return`, found `}`", + severity: Severity::Error, + code: Some("P0022"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(2), + end: TokenPosition(2), + }, + message: "unexpected `}`", + }), + secondary_labels: &[ExpectedLabel { + span: TokenSpan { + start: TokenPosition(0), + end: TokenPosition(0), + }, + message: "after this `return`, a value expression or `;` was expected", + }], + help: None, + notes: &[], + }, + ); +} + +pub(super) const P0023_FIXTURES: &[Fixture] = &[ + Fixture { + label: "files/P0023_01.uc", + source: "break ] ;", + }, + Fixture { + label: "files/P0023_02.uc", + source: "break\n \n\n\n\n ]\n;\n", + }, + Fixture { + label: "files/P0023_03.uc", + source: "break\n}\n", + }, +]; + +#[test] +fn check_p0023_fixtures() { + let runs = run_fixtures(P0023_FIXTURES); + + assert_eq!(runs.get("files/P0023_01.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0023_02.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0023_03.uc").unwrap().len(), 1); + + assert_diagnostic( + &runs.get_any("files/P0023_01.uc"), + &ExpectedDiagnostic { + headline: "expected break value expression or `;` after `break`, found `]`", + severity: Severity::Error, + code: Some("P0023"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(2), + end: TokenPosition(2), + }, + message: "unexpected `]`", + }), + secondary_labels: &[], + help: None, + notes: &[], + }, + ); + + assert_diagnostic( + &runs.get_any("files/P0023_02.uc"), + &ExpectedDiagnostic { + headline: "expected break value expression or `;` after `break`, found `]`", + severity: Severity::Error, + code: Some("P0023"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(8), + end: TokenPosition(8), + }, + message: "unexpected `]`", + }), + secondary_labels: &[ExpectedLabel { + span: TokenSpan { + start: TokenPosition(0), + end: TokenPosition(0), + }, + message: "after this `break`, a value expression or `;` was expected", + }], + help: None, + notes: &[], + }, + ); + + assert_diagnostic( + &runs.get_any("files/P0023_03.uc"), + &ExpectedDiagnostic { + headline: "expected break value expression or `;` after `break`, found `}`", + severity: Severity::Error, + code: Some("P0023"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(2), + end: TokenPosition(2), + }, + message: "unexpected `}`", + }), + secondary_labels: &[ExpectedLabel { + span: TokenSpan { + start: TokenPosition(0), + end: TokenPosition(0), + }, + message: "after this `break`, a value expression or `;` was expected", + }], + help: None, + notes: &[], + }, + ); +} + +pub(super) const P0024_FIXTURES: &[Fixture] = &[ + Fixture { + label: "files/P0024_01.uc", + source: "goto;", + }, + Fixture { + label: "files/P0024_02.uc", + source: "goto\n ;\n", + }, + Fixture { + label: "files/P0024_03.uc", + source: "goto\n ]\n;\n", + }, + Fixture { + label: "files/P0024_04.uc", + source: "goto", + }, +]; + +#[test] +fn check_p0024_fixtures() { + let runs = run_fixtures(P0024_FIXTURES); + + assert_eq!(runs.get("files/P0024_01.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0024_02.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0024_03.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0024_04.uc").unwrap().len(), 1); + + assert_diagnostic( + &runs.get_any("files/P0024_01.uc"), + &ExpectedDiagnostic { + headline: "expected label after `goto`, found `;`", + severity: Severity::Error, + code: Some("P0024"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(1), + end: TokenPosition(1), + }, + message: "unexpected `;`", + }), + secondary_labels: &[], + help: None, + notes: &[], + }, + ); + + assert_diagnostic( + &runs.get_any("files/P0024_02.uc"), + &ExpectedDiagnostic { + headline: "expected label after `goto`, found `;`", + severity: Severity::Error, + code: Some("P0024"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(3), + end: TokenPosition(3), + }, + message: "unexpected `;`", + }), + secondary_labels: &[ExpectedLabel { + span: TokenSpan { + start: TokenPosition(0), + end: TokenPosition(0), + }, + message: "after this `goto`, a label was expected", + }], + help: None, + notes: &[], + }, + ); + + assert_diagnostic( + &runs.get_any("files/P0024_03.uc"), + &ExpectedDiagnostic { + headline: "expected label after `goto`, found `]`", + severity: Severity::Error, + code: Some("P0024"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(3), + end: TokenPosition(3), + }, + message: "unexpected `]`", + }), + secondary_labels: &[ExpectedLabel { + span: TokenSpan { + start: TokenPosition(0), + end: TokenPosition(0), + }, + message: "after this `goto`, a label was expected", + }], + help: None, + notes: &[], + }, + ); + + assert_diagnostic( + &runs.get_any("files/P0024_04.uc"), + &ExpectedDiagnostic { + headline: "expected label after `goto`, found end of file", + severity: Severity::Error, + code: Some("P0024"), + primary_label: Some(ExpectedLabel { + span: TokenSpan { + start: TokenPosition(1), + end: TokenPosition(1), + }, + message: "reached end of file here", + }), + secondary_labels: &[], + help: None, + notes: &[], + }, + ); } \ No newline at end of file