rott/rottlib/src/ast/expressions.rs
dkanus 588790b9b4 Refactor everything
Huge dump of refactored code. Still in the middle of the changes that
are to be squashed later in a one huge monster commit, because there is
no value in anything atomic here.
2026-04-05 20:32:11 +07:00

291 lines
11 KiB
Rust

//! Expression AST nodes.
//!
//! This module defines ordinary expressions together with expression-shaped
//! control-flow and block forms parsed by the language.
use super::{
AstSpan, IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator,
QualifiedIdentifierRef, StatementRef,
};
use crate::arena::ArenaVec;
use super::super::lexer::TokenPosition;
use crate::arena::{Arena, ArenaNode, ArenaString};
/// Expression node used for both ordinary expressions and expression-shaped
/// statement/control-flow forms.
///
/// This AST is intentionally broad: besides operators and literals, it also
/// includes blocks and control-flow constructs that syntactically occupy
/// expression parsing positions in the language.
#[allow(clippy::large_enum_variant)]
#[derive(Debug, PartialEq)]
pub enum Expression<'src, 'arena> {
/// Plain identifier expression.
Identifier(IdentifierToken),
/// String literal.
///
/// The contents stored in arena memory are transformed (unescaped) version
/// of raw strings from the source.
String(ArenaString<'arena>),
/// Integer literal.
Integer(u128),
/// Floating-point literal.
Float(f64),
/// Boolean literal.
Bool(bool),
/// `None` literal / null-like language value.
None,
/// Explicit parenthesized subexpression: `(expr)`.
///
/// Parentheses are preserved as a node instead of being discarded so later
/// stages can retain grouping information for diagnostics, formatting, or
/// source-faithful reconstruction.
Parentheses(ExpressionRef<'src, 'arena>),
/// Class-type reference parsed as a qualified identifier path.
///
/// This is used for class-like type mentions that are not represented as a
/// tagged name literal.
ClassType(QualifiedIdentifierRef<'arena>),
/// Tagged or untagged quoted name literal.
///
/// Examples:
/// - `class'Foo'`
/// - `Texture'Pkg.Group.Name'`
/// - `'Pkg.Group.Name'` if the grammar permits an untagged form
///
/// `tag` stores the leading identifier token when present. `name` is the
/// raw content between quotes and is preserved exactly as written.
NameLiteral {
tag: Option<IdentifierToken>,
name: &'src str,
},
/// Indexing operation: `target[index]`.
///
/// This is produced after postfix parsing and binds tighter than any infix
/// operator.
Index {
target: ExpressionRef<'src, 'arena>,
index: ExpressionRef<'src, 'arena>,
},
/// Member access: `target.name`.
///
/// The member name is stored as a token reference rather than an owned
/// string so later stages can resolve exact spelling and source location
/// from the lexer/token stream.
Member {
target: ExpressionRef<'src, 'arena>,
name: IdentifierToken,
},
/// Call expression: `callee(arg1, arg2, ...)`.
///
/// Arguments are stored as `Option<ExpressionRef>` to preserve omitted
/// arguments in syntaxes that allow empty slots.
Call {
callee: ExpressionRef<'src, 'arena>,
arguments: ArenaVec<'arena, Option<ExpressionRef<'src, 'arena>>>,
},
/// Prefix unary operator application: `op rhs`.
PrefixUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
/// Postfix unary operator application: `lhs op`.
PostfixUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
/// Binary operator application: `lhs op rhs`.
Binary(
ExpressionRef<'src, 'arena>,
InfixOperator,
ExpressionRef<'src, 'arena>,
),
/// Block expression / statement block: `{ ... }`.
///
/// The contained statements are preserved in source order.
Block(StatementList<'src, 'arena>),
/// Conditional expression / statement.
///
/// Both arms use `BranchBody` so the parser can preserve legacy one-line
/// bodies, optional trailing semicolons, and recovery anchors.
If {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
else_body: Option<BranchBody<'src, 'arena>>,
},
/// `while (condition) body`
While {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// `do body until (condition)`
DoUntil {
condition: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// `foreach iterator body`
///
/// The iteration source / iterator expression is stored as a normal
/// expression node because the language permits nontrivial syntax there.
ForEach {
iterated_expression: ExpressionRef<'src, 'arena>,
body: BranchBody<'src, 'arena>,
},
/// Traditional three-part `for` loop.
///
/// Each header component is optional to support forms such as:
/// - `for (;;)`
/// - `for (init;;)`
/// - `for (;cond;)`
/// - `for (;;step)`
For {
initialization: Option<ExpressionRef<'src, 'arena>>,
condition: Option<ExpressionRef<'src, 'arena>>,
step: Option<ExpressionRef<'src, 'arena>>,
body: BranchBody<'src, 'arena>,
},
/// `switch` construct.
///
/// `cases` contains all explicit case arms in source order.
/// `default_arm` stores the statements of the default branch, if present.
Switch {
selector: ExpressionRef<'src, 'arena>,
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
},
/// `goto` statement.
///
/// Stores the token position of the target token rather than duplicating
/// its textual representation in the AST. On successful parsing refers to
/// either identifier or name literal.
Goto(TokenPosition),
/// `continue` statement.
Continue,
/// `break` statement, optionally with an attached expression if the
/// language form allows one.
Break(Option<ExpressionRef<'src, 'arena>>),
/// `return` statement, optionally carrying a returned expression.
Return(Option<ExpressionRef<'src, 'arena>>),
/// Object construction / allocation form using the language's `new` syntax.
///
/// The first three arguments are optional positional control arguments.
/// `class_specifier` is the required class expression that identifies what
/// should be constructed.
New {
outer_argument: Option<ExpressionRef<'src, 'arena>>,
name_argument: Option<ExpressionRef<'src, 'arena>>,
flags_argument: Option<ExpressionRef<'src, 'arena>>,
class_specifier: ExpressionRef<'src, 'arena>,
},
/// Recovery placeholder inserted when an expression could not be parsed.
///
/// This allows the parser to continue building a larger AST and report more
/// than one error in a single pass.
Error,
}
/// Statements contained in a `{ ... }` block.
pub type StatementList<'src, 'arena> = ArenaVec<'arena, StatementRef<'src, 'arena>>;
/// Statements contained in a `{ ... }` block with a span.
#[derive(Debug, PartialEq)]
pub struct BlockBody<'src, 'arena> {
pub statements: StatementList<'src, 'arena>,
pub span: AstSpan,
}
/// Stable arena reference to an expression node.
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
/// Optional expression payload used in grammar positions where an expression
/// may be omitted entirely.
pub type OptionalExpression<'src, 'arena> = Option<ExpressionRef<'src, 'arena>>;
/// Body of a control-flow branch.
///
/// Branch bodies are stored separately so constructs such as `if`, `while`,
/// and `for` can preserve both the parsed body and branch-specific source
/// details.
#[derive(Debug, PartialEq)]
pub struct BranchBody<'src, 'arena> {
/// Parsed branch payload.
///
/// This is `None` when the body is absent or could not be parsed in a
/// recoverable way.
pub expression: Option<ExpressionRef<'src, 'arena>>,
/// Optional semicolon that appears immediately after a non-block branch
/// body in legacy constructs such as `if`, `for`, `while`, etc.
///
/// This is intentionally preserved rather than normalized away so later
/// stages can diagnose or reproduce source structure more precisely.
pub semicolon_position: Option<TokenPosition>,
/// Token position that can be used as a fallback end anchor for spans and
/// diagnostics when the body itself is missing.
///
/// In malformed constructs this may be the only reliable location attached
/// to the branch.
pub end_anchor_token_position: TokenPosition,
}
/// One `case` arm inside a `switch`.
///
/// UnrealScript-style syntax allows each arm to have multiple labels and uses
/// statement lists as bodies, with fallthrough being possible unless control
/// flow terminates explicitly.
#[derive(Debug, PartialEq)]
pub struct SwitchCase<'src, 'arena> {
/// Case labels associated with this arm.
///
/// Labels are stored as expressions because the language allows
/// expression-valued labels rather than only simple constants.
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>,
/// Statements belonging to the arm body.
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
}
/// Stable arena reference to a `switch` case arm.
pub type SwitchCaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
impl<'arena> Expression<'_, 'arena> {
/// Construct a binary expression and assign it a span from `left_hand_side`
/// through `right_hand_side`.
#[must_use]
pub fn new_binary(
arena: &'arena Arena,
left_hand_side: ArenaNode<'arena, Self>,
op: InfixOperator,
right_hand_side: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::merge(left_hand_side.span(), right_hand_side.span());
ArenaNode::new_in(
Self::Binary(left_hand_side, op, right_hand_side),
span,
arena,
)
}
/// Construct a prefix unary expression and assign it a span from the
/// operator token through the end of `right_hand_side`.
#[must_use]
pub fn new_prefix(
arena: &'arena Arena,
operation_position: TokenPosition,
operation: PrefixOperator,
right_hand_side: ArenaNode<'arena, Self>,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::range(operation_position, right_hand_side.span().token_to);
ArenaNode::new_in(Self::PrefixUnary(operation, right_hand_side), span, arena)
}
/// Construct a postfix unary expression and assign it a span from the start
/// of `left_hand_side` through the operator token.
#[must_use]
pub fn new_postfix(
arena: &'arena Arena,
left_hand_side: ArenaNode<'arena, Self>,
operation: PostfixOperator,
operation_position: TokenPosition,
) -> ArenaNode<'arena, Self> {
let span = AstSpan::range(left_hand_side.span().token_from, operation_position);
ArenaNode::new_in(Self::PostfixUnary(left_hand_side, operation), span, arena)
}
}