Clean control flow code

This commit is contained in:
dkanus 2026-04-28 21:28:34 +07:00
parent 519d0cd3a7
commit 150bd2f5cf
13 changed files with 1307 additions and 838 deletions

View File

@ -23,128 +23,50 @@ mod pretty;
/// Add, remove, or edit entries here.
/// Using `(&str, &str)` gives each case a human-readable label.
const TEST_CASES: &[(&str, &str)] = &[
// P0016: invalid initializer start after `for (`
// P0022: invalid return value start
(
"files/P0016_01.uc",
"for\n(] ; )",
"files/P0022_01.uc",
"return ] ;",
),
(
"files/P0016_02.uc",
"for (\n ]\n ;\n)\n Body();\n",
"files/P0022_02.uc",
"return\n ]\n;\n",
),
(
"files/P0016_03.uc",
"for (\n }\n ;\n)\n",
),
(
"files/P0016_06.uc",
"for (\n ]\n\n\n ; Step)\n",
"files/P0022_03.uc",
"return\n}\n",
),
// P0017: initializer parsed, but first `;` is missing
// P0023: invalid break value start
(
"files/P0017_01.uc",
"for (Init ] ; )",
"files/P0023_01.uc",
"break ] ;",
),
(
"files/P0017_02.uc",
"for (Init\n ]\n ;\n)\n",
"files/P0023_02.uc",
"break\n \n\n\n\n ]\n;\n",
),
(
"files/P0017_04.uc",
"for (Init {\n Body();\n}; )\n",
),
(
"files/P0017_05.uc",
"for (Init",
"files/P0023_03.uc",
"break\n}\n",
),
// P0018: invalid condition start after first `;`
// P0024: goto target is missing or not a label token
(
"files/P0018_01.uc",
"for \n\n (; ] ; )",
"files/P0024_01.uc",
"goto;",
),
(
"files/P0018_02.uc",
"for (;\n ]\n ;\n)\n Body();\n",
"files/P0024_02.uc",
"goto\n ;\n",
),
(
"files/P0018_03.uc",
"for (;\n }\n ;\n)\n",
"files/P0024_03.uc",
"goto\n ]\n;\n",
),
(
"files/P0018_06.uc",
"for (;",
),
// P0019: condition parsed, but second `;` is missing
(
"files/P0019_01.uc",
"for (; bCondition )",
),
(
"files/P0019_02.uc",
"for (; bCondition\n)\n Body();\n",
),
(
"files/P0019_03.uc",
"for (; bCondition ] ; )",
),
(
"files/P0019_04.uc",
"for (; bCondition\n{\n Body();\n}\n;\n)\n",
),
(
"files/P0019_06.uc",
"for (; bCondition",
),
(
"files/P0019_07.uc",
"for (; bCondition Step)",
),
// P0020: invalid step start after second `;`
(
"files/P0020_01.uc",
"for (;;;)",
),
(
"files/P0020_02.uc",
"for (;;\n ;\n)\n",
),
(
"files/P0020_03.uc",
"for (;; ])",
),
(
"files/P0020_04.uc",
"for (;;\n }\n)\n",
),
(
"files/P0020_08.uc",
"for (;;\n ]\n",
),
// P0021: missing `)` to close `for` header
(
"files/P0021_01.uc",
"for (;;",
),
(
"files/P0021_02.uc",
"for (;; Step",
),
(
"files/P0021_03.uc",
"for (;; Step;\n Body();\n",
),
(
"files/P0021_05.uc",
"for (Init; bCondition; Step\n{\n Body();\n}\n",
),
(
"files/P0021_09.uc",
"for\n(Init;\n bCondition;\n Step\n]\n",
"files/P0024_04.uc",
"goto",
),
];

View File

@ -105,7 +105,7 @@ pub enum Expression<'src, 'arena> {
/// bodies, optional trailing semicolons, and recovery anchors.
If {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
then_body: BranchBody<'src, 'arena>,
else_body: Option<BranchBody<'src, 'arena>>,
},
/// `while (condition) body`
@ -123,7 +123,7 @@ pub enum Expression<'src, 'arena> {
/// The iteration source / iterator expression is stored as a normal
/// expression node because the language permits nontrivial syntax there.
ForEach {
iterated_expression: ExpressionRef<'src, 'arena>,
iterator_expression: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// Traditional three-part `for` loop.
@ -134,7 +134,7 @@ pub enum Expression<'src, 'arena> {
/// - `for (;cond;)`
/// - `for (;;step)`
For {
initialization: Option<ExpressionRef<'src, 'arena>>,
initializer: Option<ExpressionRef<'src, 'arena>>,
condition: Option<ExpressionRef<'src, 'arena>>,
step: Option<ExpressionRef<'src, 'arena>>,
body: BranchBody<'src, 'arena>,

View File

@ -677,3 +677,134 @@ pub(super) fn diagnostic_for_loop_header_missing_closing_parenthesis<'src>(
.code("P0021")
.build()
}
pub(super) fn diagnostic_return_value_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let return_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!(
"expected return value expression or `;` after `return`, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected return value expression or `;` after `return`, found end of file"
.to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected return value expression or `;` after `return`".to_string(),
"expected return value expression here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = return_keyword_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(
span,
"after this `return`, a value expression or `;` was expected",
);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0022")
.build()
}
pub(super) fn diagnostic_break_value_invalid_start<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let break_keyword_span = error
.related_spans
.get(diagnostic_labels::EXPRESSION_REQUIRED_BY)
.copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!(
"expected break value expression or `;` after `break`, found `{}`",
token_text
),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected break value expression or `;` after `break`, found end of file"
.to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected break value expression or `;` after `break`".to_string(),
"expected break value expression here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = break_keyword_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(
span,
"after this `break`, a value expression or `;` was expected",
);
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0023")
.build()
}
pub(super) fn diagnostic_goto_missing_label<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
let goto_keyword_span = error.related_spans.get("goto_keyword").copied();
let found = found_at(file, error.blame_span.end);
let (header_text, primary_text) = match found {
FoundAt::Token(token_text) => (
format!("expected label after `goto`, found `{}`", token_text),
format!("unexpected `{}`", token_text),
),
FoundAt::EndOfFile => (
"expected label after `goto`, found end of file".to_string(),
"reached end of file here".to_string(),
),
FoundAt::Unknown => (
"expected label after `goto`".to_string(),
"expected label here".to_string(),
),
};
let mut builder = DiagnosticBuilder::error(header_text);
if let Some(span) = goto_keyword_span
&& !file.same_line(span.start, error.blame_span.end)
{
builder = builder.secondary_label(span, "after this `goto`, a label was expected");
}
builder
.primary_label(error.blame_span, primary_text)
.code("P0024")
.build()
}

View File

@ -13,8 +13,8 @@ use super::{Diagnostic, DiagnosticBuilder};
use crate::lexer::{TokenPosition, TokenSpan, TokenizedFile};
use crate::parser::{ParseError, ParseErrorKind};
mod primary_expressions;
mod control_flow_expressions;
mod primary_expressions;
#[derive(Clone, Copy)]
enum FoundAt<'src> {
@ -69,8 +69,8 @@ pub(crate) fn diagnostic_from_parse_error<'src>(
error: ParseError,
file: &TokenizedFile<'src>,
) -> Diagnostic {
use primary_expressions::*;
use control_flow_expressions::*;
use primary_expressions::*;
match error.kind {
// primary_expressions.rs
ParseErrorKind::ParenthesizedExpressionInvalidStart => {
@ -102,18 +102,11 @@ pub(crate) fn diagnostic_from_parse_error<'src>(
diagnostic_new_argument_missing_comma(error, file)
}
// control_flow_expressions.rs
ParseErrorKind::ConditionExpected => {
diagnostic_condition_expected(error, file)
}
ParseErrorKind::ConditionExpected => diagnostic_condition_expected(error, file),
ParseErrorKind::ControlFlowBodyExpected => {
diagnostic_control_flow_body_expected(error, file)
}
ParseErrorKind::DoMissingUntil => {
diagnostic_do_missing_until(error, file)
}
ParseErrorKind::ForEachIteratorExpressionExpected => {
diagnostic_for_each_iterator_expression_expected(error, file)
}
ParseErrorKind::DoMissingUntil => diagnostic_do_missing_until(error, file),
ParseErrorKind::ForEachIteratorExpressionExpected => {
diagnostic_for_each_iterator_expression_expected(error, file)
}
@ -135,9 +128,14 @@ pub(crate) fn diagnostic_from_parse_error<'src>(
ParseErrorKind::ForLoopHeaderMissingClosingParenthesis => {
diagnostic_for_loop_header_missing_closing_parenthesis(error, file)
}
ParseErrorKind::ReturnValueInvalidStart => {
diagnostic_return_value_invalid_start(error, file)
}
ParseErrorKind::BreakValueInvalidStart => diagnostic_break_value_invalid_start(error, file),
ParseErrorKind::GotoMissingLabel => diagnostic_goto_missing_label(error, file),
_ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind))
.primary_label(error.covered_span, "happened here")
.build(),
}
}
}

View File

@ -36,6 +36,9 @@ error: expected one of `,`, `:`, or `}`, found `token_to`
3.
*/
// TODO: check if blue guidelines are sometimes red or vice versa
// TODO: tabs needs to be replaced with 1-width character
// These are abstract rendering events, not self-contained draw commands.
// They are emitted in increasing order of "significant lines" (range starts/ends).
// The actual source span for a label is recovered later from its LabelType.

View File

@ -58,6 +58,12 @@ pub enum ParseErrorKind {
ForLoopHeaderStepInvalidStart,
/// P0021
ForLoopHeaderMissingClosingParenthesis,
/// P0022
ReturnValueInvalidStart,
/// P0023
BreakValueInvalidStart,
/// P0024
GotoMissingLabel,
// ================== Old errors to be thrown away! ==================
/// Expression inside `(...)` could not be parsed and no closing `)`
/// was found.
@ -93,8 +99,6 @@ pub enum ParseErrorKind {
/// Found `case` arms after a `default` branch.
SwitchCasesAfterDefault,
SwitchMissingClosingBrace,
/// A `goto` was not followed by a label.
GotoMissingLabel,
/// Unexpected end of input while parsing.
UnexpectedEndOfFile,
/// Token looked like a numeric literal but could not be parsed as one.

View File

@ -1,712 +0,0 @@
//! Control expression parsing for Fermented `UnrealScript`.
//!
//! ## Condition boundary recovery and legacy compatibility
//!
//! Fermented `UnrealScript` allows omitting parentheses `(...)` around
//! condition expressions of `if`/`while`/`until` and similar constructs.
//! Conditions are therefore parsed as ordinary expressions by default.
//!
//! This means that a leading parenthesized expression may still be part of a
//! larger condition:
//!
//! ```unrealscript
//! if (2 + 2) * 2 < 7 { ... }
//! while (Index + 1) < Count DoWork();
//! ```
//!
//! For compatibility with older `UnrealScript` code, we apply one conservative
//! legacy cut-off rule:
//!
//! If the condition begins with a parenthesized expression, and the token after
//! the matching `)` is identifier-like, the parenthesized expression is treated
//! as the whole condition. The following identifier-like token is left for the
//! branch body.
//!
//! This prevents the parser from accidentally consuming the following
//! statement/body as part of the condition in older code such as:
//!
//! ```unrealscript
//! if ( AIController(Controller) != None ) Cross = vect(0,0,0);
//! ```
//!
//! Without the legacy cut-off, a permissive expression parser could interpret
//! `Cross` as a continuation of the condition in dialects where identifier-like
//! tokens may participate in operator syntax.
//!
//! Operator tokens such as `*`, `+`, `<`, `==`, etc. do not trigger this
//! legacy cut-off. They allow the normal expression parser to continue the
//! condition.
//!
//! Trade-off: if an identifier-like token after the closing `)` was intended as
//! a custom/named operator, the parser prefers the legacy interpretation and
//! ends the condition at the closing `)`. Write the condition with additional
//! parentheses or use an unambiguous operator form.
//!
//! ## Disambiguation of `for` as loop vs expression
//!
//! Unlike other control-flow keywords, `for` is disambiguated from a functions
//! or variables with the same name. This is done syntactically in
//! [`Parser::is_for_loop_header_ahead`]: a `for` token followed by
//! a `(` whose contents contain a top-level `;` is unambiguously a loop header.
//!
//! This rule is lightweight, local, and robust, and mirrors the fixed grammar
//! `for (init; condition; step)` without requiring name resolution.
//!
//! ### Why this is not done for `if` / `while` / `do`
//!
//! No similarly reliable way to discriminate `if`, `while`, or related
//! keywords at this stage of parsing: their parenthesized forms are
//! indistinguishable from single argument function calls.
//!
//! Supporting these keywords as identifiers would complicate parsing
//! disproportionately and we always treat them as openers for conditional and
//! cycle expressions. This matches common `UnrealScript` usage and intentionally
//! drops support for moronic design choices where such names were reused
//! as variables or functions (like what author did by declaring
//! a `For` function in Acedia).
//!
//! ### But what about `switch`?
//!
//! `switch` is handled separately because, in existing `UnrealScript` code,
//! it may appear either as a keyword-led construct or as an identifier.
//!
//! Its disambiguation rule is simpler than for `for`: if the next token is
//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains
//! available as an identifier.
//!
//! This rule is local and purely syntactic, matching the behavior expected by
//! the existing codebase we support. The actual parsing of `switch` expressions
//! lives in a separate module because the construct itself is more involved
//! than the control-flow forms handled here.
use crate::ast::{BranchBody, Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{
ParseErrorKind, ParseExpressionResult, ParseResult, Parser, ResultRecoveryExt, SyncLevel,
diagnostic_labels,
};
struct ParsedForHeader<'src, 'arena> {
initialization: OptionalExpression<'src, 'arena>,
condition: OptionalExpression<'src, 'arena>,
step: OptionalExpression<'src, 'arena>,
right_parenthesis_position: Option<TokenPosition>,
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Returns whether a leading parenthesized condition should be cut off
/// at its closing `)` for legacy compatibility.
///
/// This checks for the shape:
///
/// ```text
/// ( ... ) identifier-like-token
/// ```
///
/// When this shape is found, the parser stops the condition at the matching
/// `)` and leaves the following identifier-like token for the branch body.
///
/// This preserves old single-line forms such as:
///
/// ```unrealscript
/// if (Condition) Cross = 7;
/// ```
///
/// while still allowing ordinary operator continuations such as:
///
/// ```unrealscript
/// if (2 + 2) * 2 < 7 { ... }
/// ```
fn should_apply_legacy_parenthesized_condition_cutoff(&mut self) -> bool {
if self.peek_token() != Some(Token::LeftParenthesis) {
return false;
}
return true;
let mut nesting_depth: usize = 1;
let mut lookahead_token_offset: usize = 1;
while let Some(next_token) = self.peek_token_at(lookahead_token_offset) {
match next_token {
Token::LeftParenthesis => nesting_depth += 1,
Token::RightParenthesis => {
if nesting_depth <= 1 {
return self
.peek_token_at(lookahead_token_offset + 1)
.map(|token| token.is_valid_identifier_name())
.unwrap_or_default();
}
nesting_depth -= 1;
}
_ => (),
}
lookahead_token_offset += 1;
}
// End-of-file is reached before finding matching `)` - a clear error;
// doesn't matter if we parse it like legacy condition.
false
}
// TODO: note how weird returned result here is
/// Parses a control-flow condition.
///
/// Conditions are parsed as ordinary expressions by default.
///
/// For legacy compatibility, if the condition starts with a parenthesized
/// expression followed by an identifier-like token, the parenthesized
/// expression is treated as the complete condition and returned as
/// [`Expression::Parentheses`]. The following identifier-like token is left
/// for the branch body.
///
/// This preserves old forms like:
///
/// ```unrealscript
/// if (Condition) Cross -= 3;
/// ```
///
/// while still allowing common operator continuations like:
///
/// ```unrealscript
/// if (2 + 2) * 2 < 7 { ... }
/// ```
fn parse_condition(
&mut self,
error_kind: ParseErrorKind,
) -> ParseExpressionResult<'src, 'arena> {
if self.next_token_definitely_cannot_start_expression() {
let keyword_position = self.last_consumed_position_or_start();
let error_position = self.peek_position_or_eof();
return Err(self
.make_error_at(error_kind, error_position)
.blame_token(error_position)
.related_token(diagnostic_labels::EXPRESSION_REQUIRED_BY, keyword_position));
}
if self.should_apply_legacy_parenthesized_condition_cutoff()
&& let Some(left_parenthesis_position) = self.eat_with_position(Token::LeftParenthesis)
{
Ok(self.parse_parenthesized_expression_tail(left_parenthesis_position))
} else {
Ok(self.parse_expression())
}
}
/// Parses a branch body for a control-flow construct.
///
/// Normalizes the following source forms into a [`BranchBody`]:
///
/// - empty body with semicolon: `if (cond);`
/// - empty body before a closing `}`: `if (cond) }`
/// - non-empty block body: `if (cond) { ... }`
/// - non-empty single-expression body: `if (cond) expr;`
///
/// For non-block bodies, this method consumes a trailing `;` when present
/// and records its position in the returned [`BranchBody`].
fn parse_branch_body(
&mut self,
control_keyword_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
let Some((first_token, first_token_position)) = self.peek_token_and_position() else {
let error = self
.make_error_at_last_consumed(ParseErrorKind::ControlFlowBodyExpected)
.blame_token(self.file.eof())
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
control_keyword_position,
)
.related_token(
diagnostic_labels::EXPRESSION_EXPECTED_AFTER,
self.last_consumed_position_or_start(),
);
let end_anchor_token_position = error.covered_span.end;
self.report_error(error);
return BranchBody {
expression: None,
semicolon_position: None,
end_anchor_token_position,
};
};
// `if (is_condition);`
if first_token == Token::Semicolon {
self.advance(); // ';'
return BranchBody {
expression: None,
semicolon_position: Some(first_token_position),
end_anchor_token_position: first_token_position,
};
}
// `{ ... if (is_condition) }`
if first_token == Token::RightBrace {
return BranchBody {
expression: None,
semicolon_position: None,
end_anchor_token_position: self.last_consumed_position_or_start(),
};
}
let branch_expression = self
.parse_expression_with_start_error(
ParseErrorKind::ControlFlowBodyExpected,
control_keyword_position,
self.last_consumed_position_or_start(),
)
.unwrap_or_fallback(self);
let end_anchor_token_position = branch_expression.span().end;
// A block body in `if {...}` or `if {...};` owns its own terminator;
// a following `;` does not belong to the branch body.
if let Expression::Block(_) = *branch_expression {
return BranchBody {
expression: Some(branch_expression),
semicolon_position: None,
end_anchor_token_position,
};
}
// For single-expression bodies, consume a trailing semicolon if present
let trailing_semicolon_position = if self.eat(Token::Semicolon) {
self.last_consumed_position()
} else {
None
};
BranchBody {
expression: Some(branch_expression),
semicolon_position: trailing_semicolon_position,
end_anchor_token_position: trailing_semicolon_position
.unwrap_or(end_anchor_token_position),
}
}
fn parse_condition_and_branch_body(
&mut self,
condition_context: TokenPosition,
error_kind: ParseErrorKind,
) -> ParseResult<'src, 'arena, (ExpressionRef<'src, 'arena>, BranchBody<'src, 'arena>)> {
let first_position = self.peek_position_or_eof();
let condition = self.parse_condition(error_kind)?;
if let Expression::Block(..) = *condition
&& self.next_token_definitely_cannot_start_expression()
{
return Err(self
.make_error_at(error_kind, first_position)
.blame_token(first_position)
.related_token(diagnostic_labels::EXPRESSION_REQUIRED_BY, condition_context)
.related("branch_body", *condition.span()));
}
let body = self.parse_branch_body(condition_context);
Ok((condition, body))
}
/// Parses an `if` expression after the `if` keyword.
///
/// The resulting [`Expression::If`] spans from `if_keyword_position` to the
/// end of the `if` body, or to the end of the `else` body if one is
/// present.
#[must_use]
pub(crate) fn parse_if_tail(
&mut self,
if_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (condition, body) = match self
.parse_condition_and_branch_body(if_keyword_position, ParseErrorKind::ConditionExpected)
{
Ok(good_result) => good_result,
Err(error) => return error.fallback(self),
};
let (else_body, if_end_position) = if self.peek_keyword() == Some(Keyword::Else) {
self.advance(); // 'else'
let else_body = self.parse_branch_body(self.last_consumed_position_or_start());
let else_body_end = else_body.end_anchor_token_position;
(Some(else_body), else_body_end)
} else {
(None, body.end_anchor_token_position)
};
let span = TokenSpan::range(if_keyword_position, if_end_position);
self.arena.alloc_node(
Expression::If {
condition,
body,
else_body,
},
span,
)
}
/// Parses a `while` expression after the `while` keyword.
///
/// The resulting [`Expression::While`] spans from `while_keyword_position`
/// to the end of its body.
#[must_use]
pub(crate) fn parse_while_tail(
&mut self,
while_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (condition, body) = match self.parse_condition_and_branch_body(
while_keyword_position,
ParseErrorKind::ConditionExpected,
) {
Ok(good_result) => good_result,
Err(error) => return error.fallback(self),
};
let span = TokenSpan::range(while_keyword_position, body.end_anchor_token_position);
self.arena
.alloc_node(Expression::While { condition, body }, span)
}
/// Parses a `do ... until ...` expression after the `do` keyword.
///
/// The resulting [`Expression::DoUntil`] spans from `do_keyword_position`
/// to the end of the condition.
#[must_use]
pub(crate) fn parse_do_until_tail(
&mut self,
do_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let body = self.parse_branch_body(do_keyword_position);
let condition = if self
.expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil)
.widen_error_span_from(do_keyword_position)
.related_token("do_keyword", do_keyword_position)
.report_error(self)
{
crate::arena::ArenaNode::new_in(
Expression::Error,
TokenSpan::new(body.end_anchor_token_position),
self.arena,
)
} else {
self.parse_condition(ParseErrorKind::ConditionExpected)
.related_token("do_keyword", do_keyword_position)
.unwrap_or_fallback(self)
};
let span = TokenSpan::range(do_keyword_position, condition.span().end);
self.arena
.alloc_node(Expression::DoUntil { condition, body }, span)
}
/// Parses a `foreach` expression after the `foreach` keyword.
///
/// The iterator part is consumed as a regular expression, followed by a
/// branch body.
///
/// The resulting [`Expression::ForEach`] spans from
/// `foreach_keyword_position` to the end of the body.
#[must_use]
pub(crate) fn parse_foreach_tail(
&mut self,
foreach_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
if self
.peek_token()
.map(|error| !error.is_valid_identifier_name())
.unwrap_or_default()
{
let error_position = self.peek_position_or_eof();
return self
.make_error_at(
ParseErrorKind::ForEachIteratorExpressionExpected,
error_position,
)
.blame_token(error_position)
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
foreach_keyword_position,
)
.fallback(self);
}
let iterated_expression =
match self.parse_condition(ParseErrorKind::ForEachIteratorExpressionExpected) {
Ok(good_result) => good_result,
Err(error) => return error.fallback(self),
};
let body = self.parse_branch_body(foreach_keyword_position);
let span = TokenSpan::range(foreach_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::ForEach {
iterated_expression,
body,
},
span,
)
}
/// Returns whether the upcoming tokens have the syntactic shape of a
/// `for (...)` header.
///
/// More precisely, this returns `true` iff the next token is `(` and a
/// top-level `;` appears before the matching `)` is closed or input ends.
///
/// This is used only for loop-vs-identifier disambiguation.
pub(super) fn is_for_loop_header_ahead(&mut self) -> Option<TokenPosition> {
let Some((Token::LeftParenthesis, left_parenthesis_position)) =
self.peek_token_and_position()
else {
return None;
};
let mut nesting_depth: usize = 1;
let mut lookahead_token_offset: usize = 1;
while let Some(next_token) = self.peek_token_at(lookahead_token_offset) {
match next_token {
Token::LeftParenthesis => nesting_depth += 1,
Token::RightParenthesis => {
if nesting_depth <= 1 {
// End of the immediate `for (...)` group without a
// top-level `;`: not a loop header.
return None;
}
nesting_depth -= 1;
}
Token::Semicolon if nesting_depth == 1 => return Some(left_parenthesis_position),
_ => (),
}
lookahead_token_offset += 1;
}
// EOF before closing the immediate `for (...)` group. Treat this as an
// incomplete `for` loop header, not as a function call, so recovery can
// produce `P0017` / `P0021`-style diagnostics.
Some(left_parenthesis_position)
//None
}
/// Parses a `for` expression after the `for` keyword.
///
/// This method expects the standard header shape
/// `for (initialization; condition; step)` and then parses a branch body.
///
/// Each header component may be omitted. The resulting [`Expression::For`]
/// spans from `for_keyword_position` to the end of the body.
#[must_use]
pub(crate) fn parse_for_tail(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let header = self.parse_for_header(for_keyword_position, left_parenthesis_position);
if header.right_parenthesis_position.is_none() {
return self.arena.alloc_node(
Expression::Error,
TokenSpan::range(for_keyword_position, self.last_consumed_position_or_start()),
);
}
let body = self.parse_branch_body(for_keyword_position);
let span = TokenSpan::range(for_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::For {
initialization: header.initialization,
condition: header.condition,
step: header.step,
body,
},
span,
)
}
fn parse_for_optional_expression(
&mut self,
bad_start_error_kind: crate::parser::ParseErrorKind,
stop_token: Token,
for_keyword_position: crate::lexer::TokenPosition,
left_parenthesis_position: crate::lexer::TokenPosition,
) -> OptionalExpression<'src, 'arena> {
if let Some(next_token) = self.peek_token()
&& next_token != stop_token
{
Some(
self.parse_expression_with_start_error(
bad_start_error_kind,
for_keyword_position,
left_parenthesis_position,
)
.sync_error_until(self, SyncLevel::CloseParenthesis)
.unwrap_or_fallback(self),
)
} else {
None
}
}
fn parse_for_header(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ParsedForHeader<'src, 'arena> {
let mut header = ParsedForHeader {
initialization: None,
condition: None,
step: None,
right_parenthesis_position: None,
};
header.initialization = self.parse_for_optional_expression(
ParseErrorKind::ForLoopHeaderInitializerInvalidStart,
Token::Semicolon,
for_keyword_position,
left_parenthesis_position,
);
let error_token = match self.peek_token_and_position() {
Some((Token::Semicolon, _)) => {
self.advance();
None
}
Some((Token::RightParenthesis, right_parenthesis_position)) => {
header.right_parenthesis_position = Some(right_parenthesis_position);
self.advance();
Some(right_parenthesis_position)
}
Some((_, next_token_position)) => Some(next_token_position),
None => Some(self.peek_position_or_eof()),
};
if let Some(error_token) = error_token {
if let Some(ref a) = header.initialization {
if matches!(**a, Expression::Error) {
return header;
}
}
let mut error = self
.make_error_at(
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterInitializer,
error_token,
)
.widen_error_span_from(for_keyword_position)
.blame_token(error_token);
if let Some(ref a) = header.initialization {
error = error.related("for_header_initializer", *a.span())
};
error.report_error(self);
return header;
}
let first_semicolon_position = self.last_consumed_position_or_start();
header.condition = self.parse_for_optional_expression(
ParseErrorKind::ForLoopHeaderConditionInvalidStart,
Token::Semicolon,
for_keyword_position,
first_semicolon_position,
);
let error_token = match self.peek_token_and_position() {
Some((Token::Semicolon, _)) => {
self.advance();
None
}
Some((Token::RightParenthesis, right_parenthesis_position)) => {
header.right_parenthesis_position = Some(right_parenthesis_position);
self.advance();
Some(right_parenthesis_position)
}
Some((_, next_token_position)) => Some(next_token_position),
None => Some(self.peek_position_or_eof()),
};
if let Some(error_token) = error_token {
if let Some(ref a) = header.condition {
if matches!(**a, Expression::Error) {
return header;
}
}
let mut error = self
.make_error_at(
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterCondition,
error_token,
)
.widen_error_span_from(for_keyword_position)
.blame_token(error_token);
if let Some(ref a) = header.condition {
error = error.related("for_header_condition", *a.span())
};
error.report_error(self);
return header;
}
let second_semicolon_position = self.last_consumed_position_or_start();
header.step = self.parse_for_optional_expression(
ParseErrorKind::ForLoopHeaderStepInvalidStart,
Token::RightParenthesis,
for_keyword_position,
second_semicolon_position,
);
// //////////////////////////////////
if let Some(ref a) = header.step
&& matches!(**a, Expression::Error)
{
if let Some((Token::RightParenthesis, right_parenthesis_position)) =
self.peek_token_and_position()
{
header.right_parenthesis_position = Some(right_parenthesis_position);
self.advance();
}
return header;
}
header.right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ForLoopHeaderMissingClosingParenthesis,
)
.widen_error_span_from(for_keyword_position)
.related_token("for_header_start", left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.ok_or_report(self);
// //////////////////////////////////
header
}
/// Parses the continuation of a `return` expression after its keyword.
///
/// If the next token is not `;`, consumes a return value expression.
/// The terminating `;` is not consumed here.
#[must_use]
pub(crate) fn parse_return_tail(
&mut self,
return_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() == Some(Token::Semicolon) {
(None, TokenSpan::new(return_keyword_position))
} else {
let returned_value = self.parse_expression();
let span = TokenSpan::range(return_keyword_position, returned_value.span().end);
(Some(returned_value), span)
};
self.arena.alloc_node(Expression::Return(value), span)
}
/// Parses the continuation of a `break` expression after its keyword.
///
/// If the next token is not `;`, consumes a break value expression.
/// The terminating `;` is not consumed here.
#[must_use]
pub(crate) fn parse_break_tail(
&mut self,
break_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (value, span) = if self.peek_token() == Some(Token::Semicolon) {
(None, TokenSpan::new(break_keyword_position))
} else {
let returned_value = self.parse_expression();
let span = TokenSpan::range(break_keyword_position, returned_value.span().end);
(Some(returned_value), span)
};
self.arena.alloc_node(Expression::Break(value), span)
}
/// Parses the continuation of a `goto` expression after its keyword.
///
/// Accepts either a name literal or an identifier as the target label.
#[must_use]
pub(crate) fn parse_goto_tail(
&mut self,
goto_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
if let Some((label_token, label_position)) = self.peek_token_and_position()
&& (label_token == Token::NameLiteral || label_token == Token::Identifier)
{
self.advance();
return self.arena.alloc_node_between(
Expression::Goto(label_position),
goto_keyword_position,
label_position,
);
}
self.make_error_at_last_consumed(ParseErrorKind::GotoMissingLabel)
.widen_error_span_from(goto_keyword_position)
.sync_error_until(self, SyncLevel::Statement)
.report_error(self);
crate::arena::ArenaNode::new_in(
Expression::Error,
TokenSpan::new(goto_keyword_position),
self.arena,
)
}
}

View File

@ -0,0 +1,282 @@
//! Parser for `for` loop expressions in Fermented UnrealScript.
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
use crate::lexer::{self, Token, TokenPosition};
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
#[derive(Debug)]
struct ParsedForHeader<'src, 'arena> {
initializer: OptionalExpression<'src, 'arena>,
condition: OptionalExpression<'src, 'arena>,
step: OptionalExpression<'src, 'arena>,
right_parenthesis_position: Option<TokenPosition>,
}
impl<'src, 'arena> ParsedForHeader<'src, 'arena> {
#[must_use]
fn new() -> Self {
Self {
initializer: None,
condition: None,
step: None,
right_parenthesis_position: None,
}
}
}
impl<'src, 'arena> Parser<'src, 'arena> {
/// Returns the position of the next `(` when it can start a `for (...)`
/// header.
///
/// Recognizes a header by a top-level `;` before the matching `)`.
/// Incomplete headers are accepted so later parsing can produce
/// `for`-specific diagnostics.
///
/// Performs a lookahead-only check used for loop-vs-identifier
/// disambiguation.
pub(in super::super) fn peek_for_loop_header_left_parenthesis_position(
&mut self,
) -> Option<TokenPosition> {
let Some((Token::LeftParenthesis, left_parenthesis_position)) =
self.peek_token_and_position()
else {
return None;
};
let mut nesting_depth: usize = 1;
let mut lookahead_offset: usize = 1;
while let Some(next_token) = self.peek_token_at(lookahead_offset) {
match next_token {
Token::LeftParenthesis => nesting_depth += 1,
Token::RightParenthesis => {
if nesting_depth <= 1 {
// A closed immediate group without a top-level `;`
// is not a loop header.
return None;
}
nesting_depth -= 1;
}
Token::Semicolon if nesting_depth == 1 => return Some(left_parenthesis_position),
_ => (),
}
lookahead_offset += 1;
}
// Prefer treating incomplete `for (` as a loop header so recovery
// reports header diagnostics instead of call-like diagnostics.
Some(left_parenthesis_position)
}
/// Parses a `for` expression after `for` and the opening `(` have been
/// consumed.
///
/// Header components may be omitted. Returns [`Expression::Error`] if the
/// header cannot be closed; otherwise returns [`Expression::For`] spanning
/// from `for_keyword_position` through the parsed body.
#[must_use]
pub(in super::super) fn parse_for_tail(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let header = self.parse_for_header(for_keyword_position, left_parenthesis_position);
if header.right_parenthesis_position.is_none() {
return self.arena.alloc_node(
Expression::Error,
lexer::TokenSpan::range(
for_keyword_position,
self.last_consumed_position_or_start(),
),
);
}
let body = self.parse_branch_body(for_keyword_position);
let span = lexer::TokenSpan::range(for_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::For {
initializer: header.initializer,
condition: header.condition,
step: header.step,
body,
},
span,
)
}
/// Parses one optional `for` header component before `terminator_token`.
///
/// Uses `component_start_anchor_position` to anchor invalid-start
/// diagnostics at the position where an expression was expected.
fn parse_optional_for_header_expression(
&mut self,
invalid_start_error_kind: ParseErrorKind,
terminator_token: Token,
for_keyword_position: TokenPosition,
component_start_anchor_position: TokenPosition,
) -> OptionalExpression<'src, 'arena> {
if let Some(next_token) = self.peek_token()
&& next_token != terminator_token
{
Some(
self.parse_expression_with_start_error(
invalid_start_error_kind,
for_keyword_position,
component_start_anchor_position,
)
// Header recovery must not consume the next `;`;
// it belongs to the surrounding `for` header.
.sync_error_until(self, SyncLevel::CloseParenthesis)
.unwrap_or_fallback(self),
)
} else {
None
}
}
fn optional_expression_is_error(expression: &OptionalExpression<'src, 'arena>) -> bool {
expression
.as_ref()
.is_some_and(|expression| matches!(**expression, Expression::Error))
}
/// Consumes the next header semicolon or recovers at `)`.
///
/// Suppresses the missing-semicolon diagnostic when the component
/// expression has already failed.
fn consume_for_header_semicolon_or_recover(
&mut self,
for_keyword_position: TokenPosition,
right_parenthesis_position: &mut Option<TokenPosition>,
component_expression: &OptionalExpression<'src, 'arena>,
missing_semicolon_error_kind: ParseErrorKind,
component_diagnostic_label: &'static str,
) -> bool {
let diagnostic_position = match self.peek_token_and_position() {
Some((Token::Semicolon, _)) => {
self.advance();
return true;
}
Some((Token::RightParenthesis, position)) => {
*right_parenthesis_position = Some(position);
self.advance();
position
}
Some((_, position)) => position,
None => self.peek_position_or_eof(),
};
// Do not add a missing-semicolon error on top of a component
// parse error.
if Self::optional_expression_is_error(component_expression) {
return false;
}
let mut error = self
.make_error_at(missing_semicolon_error_kind, diagnostic_position)
.widen_error_span_from(for_keyword_position)
.blame_token(diagnostic_position);
if let Some(expression) = component_expression {
error = error.related(component_diagnostic_label, *expression.span());
}
error.report_error(self);
false
}
/// Finishes recovery after an invalid step expression.
///
/// Consumes a following `)` when present and suppresses the missing-`)`
/// check to avoid a cascading header diagnostic. Returns `true` when header
/// parsing should stop.
fn recover_after_invalid_step_expression(
&mut self,
header: &mut ParsedForHeader<'src, 'arena>,
) -> bool {
if !Self::optional_expression_is_error(&header.step) {
return false;
}
if let Some((Token::RightParenthesis, right_parenthesis_position)) =
self.peek_token_and_position()
{
header.right_parenthesis_position = Some(right_parenthesis_position);
self.advance();
}
true
}
/// Consumes the closing `)` of a `for` header or reports
/// a header-level error.
///
/// Links the diagnostic back to the opening `(` so incomplete headers point
/// to the whole header region.
fn consume_for_header_closing_parenthesis(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
header: &mut ParsedForHeader<'src, 'arena>,
) {
header.right_parenthesis_position = self
.expect(
Token::RightParenthesis,
ParseErrorKind::ForLoopHeaderMissingClosingParenthesis,
)
.widen_error_span_from(for_keyword_position)
.related_token("for_header_start", left_parenthesis_position)
.sync_error_at(self, SyncLevel::CloseParenthesis)
.ok_or_report(self);
}
/// Parses the initializer, condition, and step expressions of a `for`
/// header.
///
/// Stops after the first unrecovered separator error so later recovery
/// does not produce duplicate header diagnostics.
fn parse_for_header(
&mut self,
for_keyword_position: TokenPosition,
left_parenthesis_position: TokenPosition,
) -> ParsedForHeader<'src, 'arena> {
let mut header = ParsedForHeader::new();
header.initializer = self.parse_optional_for_header_expression(
ParseErrorKind::ForLoopHeaderInitializerInvalidStart,
Token::Semicolon,
for_keyword_position,
left_parenthesis_position,
);
if !self.consume_for_header_semicolon_or_recover(
for_keyword_position,
&mut header.right_parenthesis_position,
&header.initializer,
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterInitializer,
"for_header_initializer",
) {
return header;
}
let initializer_semicolon_position = self.last_consumed_position_or_start();
header.condition = self.parse_optional_for_header_expression(
ParseErrorKind::ForLoopHeaderConditionInvalidStart,
Token::Semicolon,
for_keyword_position,
initializer_semicolon_position,
);
if !self.consume_for_header_semicolon_or_recover(
for_keyword_position,
&mut header.right_parenthesis_position,
&header.condition,
ParseErrorKind::ForLoopHeaderMissingSemicolonAfterCondition,
"for_header_condition",
) {
return header;
}
let condition_semicolon_position = self.last_consumed_position_or_start();
header.step = self.parse_optional_for_header_expression(
ParseErrorKind::ForLoopHeaderStepInvalidStart,
Token::RightParenthesis,
for_keyword_position,
condition_semicolon_position,
);
if self.recover_after_invalid_step_expression(&mut header) {
return header;
}
self.consume_for_header_closing_parenthesis(
for_keyword_position,
left_parenthesis_position,
&mut header,
);
header
}
}

View File

@ -0,0 +1,502 @@
//! Control expression parsing for Fermented `UnrealScript`.
//!
//! ## Condition boundary recovery and legacy compatibility
//!
//! Fermented `UnrealScript` allows omitting parentheses `(...)` around
//! condition expressions of `if`/`while`/`until` and similar constructs.
//! Conditions are therefore parsed as ordinary expressions by default.
//!
//! This means that a leading parenthesized expression may still be part of a
//! larger condition:
//!
//! ```unrealscript
//! if (2 + 2) * 2 < 7 { ... }
//! while (Index + 1) < Count DoWork();
//! ```
//!
//! For compatibility with older `UnrealScript` code, we apply one conservative
//! legacy cut-off rule:
//!
//! If the condition begins with a parenthesized expression, and the token after
//! the matching `)` is identifier-like, the parenthesized expression is treated
//! as the whole condition. The following identifier-like token is left for the
//! branch body.
//!
//! This prevents the parser from accidentally consuming the following
//! statement/body as part of the condition in older code such as:
//!
//! ```unrealscript
//! if ( AIController(Controller) != None ) Cross = vect(0,0,0);
//! ```
//!
//! Without the legacy cut-off, a permissive expression parser could interpret
//! `Cross` as a continuation of the condition in dialects where identifier-like
//! tokens may participate in operator syntax.
//!
//! Operator tokens such as `*`, `+`, `<`, `==`, etc. do not trigger this
//! legacy cut-off. They allow the normal expression parser to continue the
//! condition.
//!
//! Trade-off: if an identifier-like token after the closing `)` was intended as
//! a custom/named operator, the parser prefers the legacy interpretation and
//! ends the condition at the closing `)`. Write the condition with additional
//! parentheses or use an unambiguous operator form.
//!
//! ## Disambiguation of `for` as loop vs expression
//!
//! Unlike other control-flow keywords, `for` is disambiguated from functions
//! and variables with the same name. This is done syntactically in
//! [`Parser::is_for_loop_header_ahead`]: a `for` token followed by
//! a `(` whose contents contain a top-level `;` is unambiguously a loop header.
//!
//! This rule is lightweight, local, and robust, and mirrors the fixed grammar
//! `for (init; condition; step)` without requiring name resolution.
//!
//! ### Why this is not done for `if` / `while` / `do`
//!
//! There is no similarly reliable way to discriminate `if`, `while`, or related
//! keywords at this stage of parsing: their parenthesized forms are
//! indistinguishable from single argument function calls.
//!
//! Supporting these keywords as identifiers would complicate parsing
//! disproportionately and we always treat them as openers for conditional and
//! loop expressions. This matches common `UnrealScript` usage and
//! intentionally drops support for moronic design choices where such names were
//! reused as variables or functions (like what author did by declaring
//! a `For` function in Acedia).
//!
//! ### But what about `switch`?
//!
//! `switch` is handled separately because, in existing `UnrealScript` code,
//! it may appear either as a keyword-led construct or as an identifier.
//!
//! Its disambiguation rule is simpler than for `for`: if the next token is
//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains
//! available as an identifier.
//!
//! This rule is local and purely syntactic, matching the behavior expected by
//! the existing codebase we support. The actual parsing of `switch` expressions
//! lives in a separate module because the construct itself is more involved
//! than the control-flow forms handled here.
use crate::ast::{BranchBody, Expression, ExpressionRef};
use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan};
use crate::parser::{
ParseErrorKind, ParseExpressionResult, ParseResult, Parser, ResultRecoveryExt,
diagnostic_labels,
};
pub(super) mod for_loop;
impl<'src, 'arena> Parser<'src, 'arena> {
/// Returns the opening `(` of a legacy parenthesized condition cut-off
/// when it applies.
fn find_legacy_parenthesized_condition_opening_position(&mut self) -> Option<TokenPosition> {
let Some((Token::LeftParenthesis, left_parenthesis_position)) =
self.peek_token_and_position()
else {
return None;
};
let mut nesting_depth: usize = 1;
let mut lookahead_token_offset: usize = 1;
while let Some(lookahead_token) = self.peek_token_at(lookahead_token_offset) {
match lookahead_token {
Token::LeftParenthesis => nesting_depth += 1,
Token::RightParenthesis => {
if nesting_depth == 1 {
return self
.peek_token_at(lookahead_token_offset + 1)
.is_some_and(|token| token.is_valid_identifier_name())
.then_some(left_parenthesis_position);
}
nesting_depth -= 1;
}
_ => (),
}
lookahead_token_offset += 1;
}
// Recovery is left to normal expression parsing when the closing `)`
// is missing.
None
}
/// Parses a control-flow condition.
///
/// Conditions are parsed as ordinary expressions unless the legacy
/// parenthesized-condition cut-off applies.
fn parse_condition(
&mut self,
invalid_start_error_kind: ParseErrorKind,
) -> ParseExpressionResult<'src, 'arena> {
if self.next_token_definitely_cannot_start_expression() {
let keyword_position = self.last_consumed_position_or_start();
let error_position = self.peek_position_or_eof();
return Err(self
.make_error_at(invalid_start_error_kind, error_position)
.blame_token(error_position)
.related_token(diagnostic_labels::EXPRESSION_REQUIRED_BY, keyword_position));
}
if let Some(left_parenthesis_position) =
self.find_legacy_parenthesized_condition_opening_position()
{
self.advance();
Ok(self.parse_parenthesized_expression_tail(left_parenthesis_position))
} else {
Ok(self.parse_expression())
}
}
/// Parses a branch body for a control-flow construct.
///
/// Normalizes the following source forms into a [`BranchBody`]:
///
/// - empty body with semicolon: `if (cond);`
/// - empty body before a closing `}`: `if (cond) }`
/// - non-empty block body: `if (cond) { ... }`
/// - non-empty single-expression body: `if (cond) expr;`
///
/// For non-block bodies, this method consumes a trailing `;` when present
/// and records its position in the returned [`BranchBody`].
fn parse_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
let Some((first_token, first_token_position)) = self.peek_token_and_position() else {
return self.recover_missing_branch_body(branch_owner_keyword_position);
};
// `if (is_condition);`
if first_token == Token::Semicolon {
return self.parse_empty_semicolon_branch_body(first_token_position);
}
// `{ ... if (is_condition) }`
if first_token == Token::RightBrace {
return self.make_empty_branch_body_before_closing_brace();
}
self.parse_non_empty_branch_body(branch_owner_keyword_position)
}
fn recover_missing_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
let error = self
.make_error_at_last_consumed(ParseErrorKind::ControlFlowBodyExpected)
.blame_token(self.file.eof())
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
branch_owner_keyword_position,
)
.related_token(
diagnostic_labels::EXPRESSION_EXPECTED_AFTER,
self.last_consumed_position_or_start(),
);
let end_anchor_token_position = error.covered_span.end;
self.report_error(error);
BranchBody {
expression: None,
semicolon_position: None,
end_anchor_token_position,
}
}
fn parse_empty_semicolon_branch_body(
&mut self,
semicolon_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
self.advance(); // ';'
BranchBody {
expression: None,
semicolon_position: Some(semicolon_position),
end_anchor_token_position: semicolon_position,
}
}
fn make_empty_branch_body_before_closing_brace(&mut self) -> BranchBody<'src, 'arena> {
BranchBody {
expression: None,
semicolon_position: None,
end_anchor_token_position: self.last_consumed_position_or_start(),
}
}
fn parse_non_empty_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
) -> BranchBody<'src, 'arena> {
let branch_expression = self
.parse_expression_with_start_error(
ParseErrorKind::ControlFlowBodyExpected,
branch_owner_keyword_position,
self.last_consumed_position_or_start(),
)
.unwrap_or_fallback(self);
let end_anchor_token_position = branch_expression.span().end;
// A block body in `if {...}` or `if {...};` owns its own terminator;
// a following `;` does not belong to the branch body.
if let Expression::Block(_) = *branch_expression {
return BranchBody {
expression: Some(branch_expression),
semicolon_position: None,
end_anchor_token_position,
};
}
// Single-expression bodies own their optional trailing `;`.
let trailing_semicolon_position = if self.eat(Token::Semicolon) {
self.last_consumed_position()
} else {
None
};
BranchBody {
expression: Some(branch_expression),
semicolon_position: trailing_semicolon_position,
end_anchor_token_position: trailing_semicolon_position
.unwrap_or(end_anchor_token_position),
}
}
fn parse_condition_and_branch_body(
&mut self,
branch_owner_keyword_position: TokenPosition,
condition_expected_error_kind: ParseErrorKind,
) -> ParseResult<'src, 'arena, (ExpressionRef<'src, 'arena>, BranchBody<'src, 'arena>)> {
let condition_start_position = self.peek_position_or_eof();
let condition = self.parse_condition(condition_expected_error_kind)?;
// Prefer diagnosing `if {...}` as a missing condition instead of
// treating the block as the condition and then reporting
// a missing body.
if let Expression::Block(..) = *condition
&& self.next_token_definitely_cannot_start_expression()
{
return Err(self
.make_error_at(condition_expected_error_kind, condition_start_position)
.blame_token(condition_start_position)
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
branch_owner_keyword_position,
)
.related("branch_body", *condition.span()));
}
let branch_body = self.parse_branch_body(branch_owner_keyword_position);
Ok((condition, branch_body))
}
/// Parses an `if` expression after the `if` keyword.
///
/// The resulting [`Expression::If`] spans from `if_keyword_position` to the
/// end of the `if` body, or to the end of the `else` body if one is
/// present.
#[must_use]
pub(super) fn parse_if_tail(
&mut self,
if_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (condition, then_body) = match self
.parse_condition_and_branch_body(if_keyword_position, ParseErrorKind::ConditionExpected)
{
Ok(condition_and_body) => condition_and_body,
Err(error) => return error.fallback(self),
};
let (else_body, if_expression_end_position) = if self.peek_keyword() == Some(Keyword::Else)
{
self.advance(); // 'else'
let else_body = self.parse_branch_body(self.last_consumed_position_or_start());
let else_body_end_position = else_body.end_anchor_token_position;
(Some(else_body), else_body_end_position)
} else {
(None, then_body.end_anchor_token_position)
};
let span = TokenSpan::range(if_keyword_position, if_expression_end_position);
self.arena.alloc_node(
Expression::If {
condition,
then_body,
else_body,
},
span,
)
}
/// Parses a `while` expression after the `while` keyword.
///
/// The resulting [`Expression::While`] spans from `while_keyword_position`
/// to the end of its body.
#[must_use]
pub(super) fn parse_while_tail(
&mut self,
while_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (condition, body) = match self.parse_condition_and_branch_body(
while_keyword_position,
ParseErrorKind::ConditionExpected,
) {
Ok(condition_and_body) => condition_and_body,
Err(error) => return error.fallback(self),
};
let span = TokenSpan::range(while_keyword_position, body.end_anchor_token_position);
self.arena
.alloc_node(Expression::While { condition, body }, span)
}
/// Parses a `do ... until ...` expression after the `do` keyword.
///
/// The resulting [`Expression::DoUntil`] spans from `do_keyword_position`
/// to the end of the condition.
#[must_use]
pub(super) fn parse_do_until_tail(
&mut self,
do_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let body = self.parse_branch_body(do_keyword_position);
let until_keyword_was_missing = self
.expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil)
.widen_error_span_from(do_keyword_position)
.related_token("do_keyword", do_keyword_position)
.report_error(self);
let condition = if until_keyword_was_missing {
self.make_error_expression_at(body.end_anchor_token_position)
} else {
self.parse_condition(ParseErrorKind::ConditionExpected)
.related_token("do_keyword", do_keyword_position)
.unwrap_or_fallback(self)
};
let span = TokenSpan::range(do_keyword_position, condition.span().end);
self.arena
.alloc_node(Expression::DoUntil { condition, body }, span)
}
/// Parses a `foreach` expression after the `foreach` keyword.
///
/// The iterator expression must start with an identifier-like token. Later
/// stages validate its full shape.
///
/// The resulting [`Expression::ForEach`] spans from
/// `foreach_keyword_position` to the end of the body.
#[must_use]
pub(super) fn parse_foreach_tail(
&mut self,
foreach_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// End-of-file is allowed through so the shared condition parser can
// produce the missing-iterator diagnostic.
if self
.peek_token()
.is_some_and(|token| !token.is_valid_identifier_name())
{
let error_position = self.peek_position_or_eof();
return self
.make_error_at(
ParseErrorKind::ForEachIteratorExpressionExpected,
error_position,
)
.blame_token(error_position)
.related_token(
diagnostic_labels::EXPRESSION_REQUIRED_BY,
foreach_keyword_position,
)
.fallback(self);
}
let iterator_expression =
match self.parse_condition(ParseErrorKind::ForEachIteratorExpressionExpected) {
Ok(iterator_expression) => iterator_expression,
Err(error) => return error.fallback(self),
};
let body = self.parse_branch_body(foreach_keyword_position);
let span = TokenSpan::range(foreach_keyword_position, body.end_anchor_token_position);
self.arena.alloc_node(
Expression::ForEach {
iterator_expression,
body,
},
span,
)
}
/// Parses a `return` expression after the `return` keyword.
///
/// Consumes an optional return value expression. The terminating `;` is
/// left for the surrounding expression parser.
#[must_use]
pub(super) fn parse_return_tail(
&mut self,
return_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (return_value, span) = match self.peek_token() {
// Non-`;` followers are parsed here so invalid return values are
// reported as return-specific diagnostics.
None | Some(Token::Semicolon) => (None, TokenSpan::new(return_keyword_position)),
_ => {
let return_value = self
.parse_expression_with_start_error(
ParseErrorKind::ReturnValueInvalidStart,
return_keyword_position,
return_keyword_position,
)
.unwrap_or_fallback(self);
let span = TokenSpan::range(return_keyword_position, return_value.span().end);
(Some(return_value), span)
}
};
self.arena
.alloc_node(Expression::Return(return_value), span)
}
/// Parses a `break` expression after the `break` keyword.
///
/// Consumes an optional break value expression. The terminating `;` is left
/// for the surrounding expression parser.
#[must_use]
pub(super) fn parse_break_tail(
&mut self,
break_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
let (break_value, span) = match self.peek_token() {
// Non-`;` followers are parsed here so invalid break values are
// reported as break-specific diagnostics.
None | Some(Token::Semicolon) => (None, TokenSpan::new(break_keyword_position)),
_ => {
let break_value = self
.parse_expression_with_start_error(
ParseErrorKind::BreakValueInvalidStart,
break_keyword_position,
break_keyword_position,
)
.unwrap_or_fallback(self);
let span = TokenSpan::range(break_keyword_position, break_value.span().end);
(Some(break_value), span)
}
};
self.arena.alloc_node(Expression::Break(break_value), span)
}
/// Parses the continuation of a `goto` expression after its keyword.
///
/// Accepts either a name literal or an identifier as the target label.
#[must_use]
pub(super) fn parse_goto_tail(
&mut self,
goto_keyword_position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
// Labels may be written either as UnrealScript name literals or
// as bare names.
if let Some((label_token, label_position)) = self.peek_token_and_position()
&& (label_token == Token::NameLiteral || label_token.is_valid_identifier_name())
{
self.advance();
return self.arena.alloc_node_between(
Expression::Goto(label_position),
goto_keyword_position,
label_position,
);
}
let error_position = self.peek_position_or_eof();
self.make_error_at_last_consumed(ParseErrorKind::GotoMissingLabel)
.widen_error_span_from(goto_keyword_position)
.blame_token(error_position)
.related_token("goto_keyword", goto_keyword_position)
.report_error(self);
self.make_error_expression_at(goto_keyword_position)
}
}

View File

@ -33,6 +33,7 @@
//! - [`super::precedence`] - operator precedence definitions
use crate::ast::{self, Expression, ExpressionRef};
use crate::lexer::TokenPosition;
use crate::parser::{self, ParseExpressionResult, Parser, ResultRecoveryExt, diagnostic_labels};
pub use super::precedence::PrecedenceRank;
@ -108,6 +109,17 @@ impl<'src, 'arena> Parser<'src, 'arena> {
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
}
pub(super) fn make_error_expression_at(
&self,
position: TokenPosition,
) -> ExpressionRef<'src, 'arena> {
crate::arena::ArenaNode::new_in(
Expression::Error,
crate::lexer::TokenSpan::new(position),
self.arena,
)
}
/// Parses an expression, including only operators with binding power
/// at least `min_precedence_rank` (as tight or tighter).
fn parse_expression_with_min_precedence_rank(

View File

@ -119,7 +119,7 @@ impl<'src, 'arena> Parser<'src, 'arena> {
// These keywords remain valid identifiers unless the following
// tokens commit to the keyword-led form.
Keyword::For
if let Some(left_parenthesis_position) = self.is_for_loop_header_ahead() =>
if let Some(left_parenthesis_position) = self.peek_for_loop_header_left_parenthesis_position() =>
{
self.advance(); // `(`
self.parse_for_tail(token_position, left_parenthesis_position)

View File

@ -332,11 +332,10 @@ impl<'src, 'arena> Parser<'src, 'arena> {
return self.report_error_with_fallback(error);
}
NewClassSpecifierParseAction::Parse => self.parse_expression(),
NewClassSpecifierParseAction::Skip => crate::arena::ArenaNode::new_in(
Expression::Error,
TokenSpan::new(self.peek_position_or_eof()),
self.arena,
),
NewClassSpecifierParseAction::Skip => {
let error_position = self.peek_position_or_eof();
self.make_error_expression_at(error_position)
}
}
}
}

View File

@ -685,23 +685,38 @@ pub(super) const FOR_HEADER_FIXTURES: &[Fixture] = &[
];
#[test]
fn check_for_header_fixture_counts() {
fn check_p0016_for_header_fixture_counts() {
let runs = run_fixtures(FOR_HEADER_FIXTURES);
assert_eq!(runs.get("files/P0016_01.uc").unwrap().len(), 3);
assert_eq!(runs.get("files/P0016_02.uc").unwrap().len(), 2);
assert_eq!(runs.get("files/P0016_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0016_06.uc").unwrap().len(), 3);
}
#[test]
fn check_p0017_for_header_fixture_counts() {
let runs = run_fixtures(FOR_HEADER_FIXTURES);
assert_eq!(runs.get("files/P0017_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0017_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0017_04.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0017_05.uc").unwrap().len(), 1);
}
#[test]
fn check_p0018_for_header_fixture_counts() {
let runs = run_fixtures(FOR_HEADER_FIXTURES);
assert_eq!(runs.get("files/P0018_01.uc").unwrap().len(), 2);
assert_eq!(runs.get("files/P0018_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0018_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0018_06.uc").unwrap().len(), 1);
}
#[test]
fn check_p0019_for_header_fixture_counts() {
let runs = run_fixtures(FOR_HEADER_FIXTURES);
assert_eq!(runs.get("files/P0019_01.uc").unwrap().len(), 2);
assert_eq!(runs.get("files/P0019_02.uc").unwrap().len(), 1);
@ -709,12 +724,22 @@ fn check_for_header_fixture_counts() {
assert_eq!(runs.get("files/P0019_04.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0019_06.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0019_07.uc").unwrap().len(), 1);
}
#[test]
fn check_p0020_for_header_fixture_counts() {
let runs = run_fixtures(FOR_HEADER_FIXTURES);
assert_eq!(runs.get("files/P0020_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0020_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0020_03.uc").unwrap().len(), 2);
assert_eq!(runs.get("files/P0020_04.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0020_08.uc").unwrap().len(), 1);
}
#[test]
fn check_p0021_for_header_fixture_counts() {
let runs = run_fixtures(FOR_HEADER_FIXTURES);
assert_eq!(runs.get("files/P0021_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0021_02.uc").unwrap().len(), 1);
@ -1493,7 +1518,7 @@ fn check_p0021_for_header_fixtures() {
}
#[test]
fn check_for_header_body_recovery_fixtures() {
fn check_p0013_for_header_body_recovery_fixtures() {
let runs = run_fixtures(FOR_HEADER_FIXTURES);
assert_diagnostic(
@ -1590,4 +1615,307 @@ fn check_for_header_body_recovery_fixtures() {
notes: &[],
},
);
}
pub(super) const P0022_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0022_01.uc",
source: "return ] ;",
},
Fixture {
label: "files/P0022_02.uc",
source: "return\n ]\n;\n",
},
Fixture {
label: "files/P0022_03.uc",
source: "return\n}\n",
},
];
#[test]
fn check_p0022_fixtures() {
let runs = run_fixtures(P0022_FIXTURES);
assert_eq!(runs.get("files/P0022_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0022_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0022_03.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0022_01.uc"),
&ExpectedDiagnostic {
headline: "expected return value expression or `;` after `return`, found `]`",
severity: Severity::Error,
code: Some("P0022"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(2),
end: TokenPosition(2),
},
message: "unexpected `]`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0022_02.uc"),
&ExpectedDiagnostic {
headline: "expected return value expression or `;` after `return`, found `]`",
severity: Severity::Error,
code: Some("P0022"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(3),
end: TokenPosition(3),
},
message: "unexpected `]`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(0),
},
message: "after this `return`, a value expression or `;` was expected",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0022_03.uc"),
&ExpectedDiagnostic {
headline: "expected return value expression or `;` after `return`, found `}`",
severity: Severity::Error,
code: Some("P0022"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(2),
end: TokenPosition(2),
},
message: "unexpected `}`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(0),
},
message: "after this `return`, a value expression or `;` was expected",
}],
help: None,
notes: &[],
},
);
}
pub(super) const P0023_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0023_01.uc",
source: "break ] ;",
},
Fixture {
label: "files/P0023_02.uc",
source: "break\n \n\n\n\n ]\n;\n",
},
Fixture {
label: "files/P0023_03.uc",
source: "break\n}\n",
},
];
#[test]
fn check_p0023_fixtures() {
let runs = run_fixtures(P0023_FIXTURES);
assert_eq!(runs.get("files/P0023_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0023_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0023_03.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0023_01.uc"),
&ExpectedDiagnostic {
headline: "expected break value expression or `;` after `break`, found `]`",
severity: Severity::Error,
code: Some("P0023"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(2),
end: TokenPosition(2),
},
message: "unexpected `]`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0023_02.uc"),
&ExpectedDiagnostic {
headline: "expected break value expression or `;` after `break`, found `]`",
severity: Severity::Error,
code: Some("P0023"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(8),
end: TokenPosition(8),
},
message: "unexpected `]`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(0),
},
message: "after this `break`, a value expression or `;` was expected",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0023_03.uc"),
&ExpectedDiagnostic {
headline: "expected break value expression or `;` after `break`, found `}`",
severity: Severity::Error,
code: Some("P0023"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(2),
end: TokenPosition(2),
},
message: "unexpected `}`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(0),
},
message: "after this `break`, a value expression or `;` was expected",
}],
help: None,
notes: &[],
},
);
}
pub(super) const P0024_FIXTURES: &[Fixture] = &[
Fixture {
label: "files/P0024_01.uc",
source: "goto;",
},
Fixture {
label: "files/P0024_02.uc",
source: "goto\n ;\n",
},
Fixture {
label: "files/P0024_03.uc",
source: "goto\n ]\n;\n",
},
Fixture {
label: "files/P0024_04.uc",
source: "goto",
},
];
#[test]
fn check_p0024_fixtures() {
let runs = run_fixtures(P0024_FIXTURES);
assert_eq!(runs.get("files/P0024_01.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0024_02.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0024_03.uc").unwrap().len(), 1);
assert_eq!(runs.get("files/P0024_04.uc").unwrap().len(), 1);
assert_diagnostic(
&runs.get_any("files/P0024_01.uc"),
&ExpectedDiagnostic {
headline: "expected label after `goto`, found `;`",
severity: Severity::Error,
code: Some("P0024"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(1),
end: TokenPosition(1),
},
message: "unexpected `;`",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0024_02.uc"),
&ExpectedDiagnostic {
headline: "expected label after `goto`, found `;`",
severity: Severity::Error,
code: Some("P0024"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(3),
end: TokenPosition(3),
},
message: "unexpected `;`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(0),
},
message: "after this `goto`, a label was expected",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0024_03.uc"),
&ExpectedDiagnostic {
headline: "expected label after `goto`, found `]`",
severity: Severity::Error,
code: Some("P0024"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(3),
end: TokenPosition(3),
},
message: "unexpected `]`",
}),
secondary_labels: &[ExpectedLabel {
span: TokenSpan {
start: TokenPosition(0),
end: TokenPosition(0),
},
message: "after this `goto`, a label was expected",
}],
help: None,
notes: &[],
},
);
assert_diagnostic(
&runs.get_any("files/P0024_04.uc"),
&ExpectedDiagnostic {
headline: "expected label after `goto`, found end of file",
severity: Severity::Error,
code: Some("P0024"),
primary_label: Some(ExpectedLabel {
span: TokenSpan {
start: TokenPosition(1),
end: TokenPosition(1),
},
message: "reached end of file here",
}),
secondary_labels: &[],
help: None,
notes: &[],
},
);
}