Huge dump of refactored code. Still in the middle of the changes that are to be squashed later in a one huge monster commit, because there is no value in anything atomic here.
291 lines
11 KiB
Rust
291 lines
11 KiB
Rust
//! Expression AST nodes.
|
|
//!
|
|
//! This module defines ordinary expressions together with expression-shaped
|
|
//! control-flow and block forms parsed by the language.
|
|
use super::{
|
|
AstSpan, IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator,
|
|
QualifiedIdentifierRef, StatementRef,
|
|
};
|
|
use crate::arena::ArenaVec;
|
|
|
|
use super::super::lexer::TokenPosition;
|
|
|
|
use crate::arena::{Arena, ArenaNode, ArenaString};
|
|
|
|
/// Expression node used for both ordinary expressions and expression-shaped
|
|
/// statement/control-flow forms.
|
|
///
|
|
/// This AST is intentionally broad: besides operators and literals, it also
|
|
/// includes blocks and control-flow constructs that syntactically occupy
|
|
/// expression parsing positions in the language.
|
|
#[allow(clippy::large_enum_variant)]
|
|
#[derive(Debug, PartialEq)]
|
|
pub enum Expression<'src, 'arena> {
|
|
/// Plain identifier expression.
|
|
Identifier(IdentifierToken),
|
|
/// String literal.
|
|
///
|
|
/// The contents stored in arena memory are transformed (unescaped) version
|
|
/// of raw strings from the source.
|
|
String(ArenaString<'arena>),
|
|
/// Integer literal.
|
|
Integer(u128),
|
|
/// Floating-point literal.
|
|
Float(f64),
|
|
/// Boolean literal.
|
|
Bool(bool),
|
|
/// `None` literal / null-like language value.
|
|
None,
|
|
/// Explicit parenthesized subexpression: `(expr)`.
|
|
///
|
|
/// Parentheses are preserved as a node instead of being discarded so later
|
|
/// stages can retain grouping information for diagnostics, formatting, or
|
|
/// source-faithful reconstruction.
|
|
Parentheses(ExpressionRef<'src, 'arena>),
|
|
/// Class-type reference parsed as a qualified identifier path.
|
|
///
|
|
/// This is used for class-like type mentions that are not represented as a
|
|
/// tagged name literal.
|
|
ClassType(QualifiedIdentifierRef<'arena>),
|
|
/// Tagged or untagged quoted name literal.
|
|
///
|
|
/// Examples:
|
|
/// - `class'Foo'`
|
|
/// - `Texture'Pkg.Group.Name'`
|
|
/// - `'Pkg.Group.Name'` if the grammar permits an untagged form
|
|
///
|
|
/// `tag` stores the leading identifier token when present. `name` is the
|
|
/// raw content between quotes and is preserved exactly as written.
|
|
NameLiteral {
|
|
tag: Option<IdentifierToken>,
|
|
name: &'src str,
|
|
},
|
|
/// Indexing operation: `target[index]`.
|
|
///
|
|
/// This is produced after postfix parsing and binds tighter than any infix
|
|
/// operator.
|
|
Index {
|
|
target: ExpressionRef<'src, 'arena>,
|
|
index: ExpressionRef<'src, 'arena>,
|
|
},
|
|
/// Member access: `target.name`.
|
|
///
|
|
/// The member name is stored as a token reference rather than an owned
|
|
/// string so later stages can resolve exact spelling and source location
|
|
/// from the lexer/token stream.
|
|
Member {
|
|
target: ExpressionRef<'src, 'arena>,
|
|
name: IdentifierToken,
|
|
},
|
|
/// Call expression: `callee(arg1, arg2, ...)`.
|
|
///
|
|
/// Arguments are stored as `Option<ExpressionRef>` to preserve omitted
|
|
/// arguments in syntaxes that allow empty slots.
|
|
Call {
|
|
callee: ExpressionRef<'src, 'arena>,
|
|
arguments: ArenaVec<'arena, Option<ExpressionRef<'src, 'arena>>>,
|
|
},
|
|
/// Prefix unary operator application: `op rhs`.
|
|
PrefixUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
|
|
/// Postfix unary operator application: `lhs op`.
|
|
PostfixUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
|
|
/// Binary operator application: `lhs op rhs`.
|
|
Binary(
|
|
ExpressionRef<'src, 'arena>,
|
|
InfixOperator,
|
|
ExpressionRef<'src, 'arena>,
|
|
),
|
|
/// Block expression / statement block: `{ ... }`.
|
|
///
|
|
/// The contained statements are preserved in source order.
|
|
Block(StatementList<'src, 'arena>),
|
|
/// Conditional expression / statement.
|
|
///
|
|
/// Both arms use `BranchBody` so the parser can preserve legacy one-line
|
|
/// bodies, optional trailing semicolons, and recovery anchors.
|
|
If {
|
|
condition: ExpressionRef<'src, 'arena>,
|
|
body: BranchBody<'src, 'arena>,
|
|
else_body: Option<BranchBody<'src, 'arena>>,
|
|
},
|
|
/// `while (condition) body`
|
|
While {
|
|
condition: ExpressionRef<'src, 'arena>,
|
|
body: BranchBody<'src, 'arena>,
|
|
},
|
|
/// `do body until (condition)`
|
|
DoUntil {
|
|
condition: ExpressionRef<'src, 'arena>,
|
|
body: BranchBody<'src, 'arena>,
|
|
},
|
|
/// `foreach iterator body`
|
|
///
|
|
/// The iteration source / iterator expression is stored as a normal
|
|
/// expression node because the language permits nontrivial syntax there.
|
|
ForEach {
|
|
iterated_expression: ExpressionRef<'src, 'arena>,
|
|
body: BranchBody<'src, 'arena>,
|
|
},
|
|
/// Traditional three-part `for` loop.
|
|
///
|
|
/// Each header component is optional to support forms such as:
|
|
/// - `for (;;)`
|
|
/// - `for (init;;)`
|
|
/// - `for (;cond;)`
|
|
/// - `for (;;step)`
|
|
For {
|
|
initialization: Option<ExpressionRef<'src, 'arena>>,
|
|
condition: Option<ExpressionRef<'src, 'arena>>,
|
|
step: Option<ExpressionRef<'src, 'arena>>,
|
|
body: BranchBody<'src, 'arena>,
|
|
},
|
|
/// `switch` construct.
|
|
///
|
|
/// `cases` contains all explicit case arms in source order.
|
|
/// `default_arm` stores the statements of the default branch, if present.
|
|
Switch {
|
|
selector: ExpressionRef<'src, 'arena>,
|
|
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
|
|
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
|
|
},
|
|
/// `goto` statement.
|
|
///
|
|
/// Stores the token position of the target token rather than duplicating
|
|
/// its textual representation in the AST. On successful parsing refers to
|
|
/// either identifier or name literal.
|
|
Goto(TokenPosition),
|
|
/// `continue` statement.
|
|
Continue,
|
|
/// `break` statement, optionally with an attached expression if the
|
|
/// language form allows one.
|
|
Break(Option<ExpressionRef<'src, 'arena>>),
|
|
/// `return` statement, optionally carrying a returned expression.
|
|
Return(Option<ExpressionRef<'src, 'arena>>),
|
|
/// Object construction / allocation form using the language's `new` syntax.
|
|
///
|
|
/// The first three arguments are optional positional control arguments.
|
|
/// `class_specifier` is the required class expression that identifies what
|
|
/// should be constructed.
|
|
New {
|
|
outer_argument: Option<ExpressionRef<'src, 'arena>>,
|
|
name_argument: Option<ExpressionRef<'src, 'arena>>,
|
|
flags_argument: Option<ExpressionRef<'src, 'arena>>,
|
|
class_specifier: ExpressionRef<'src, 'arena>,
|
|
},
|
|
/// Recovery placeholder inserted when an expression could not be parsed.
|
|
///
|
|
/// This allows the parser to continue building a larger AST and report more
|
|
/// than one error in a single pass.
|
|
Error,
|
|
}
|
|
|
|
/// Statements contained in a `{ ... }` block.
|
|
pub type StatementList<'src, 'arena> = ArenaVec<'arena, StatementRef<'src, 'arena>>;
|
|
|
|
/// Statements contained in a `{ ... }` block with a span.
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct BlockBody<'src, 'arena> {
|
|
pub statements: StatementList<'src, 'arena>,
|
|
pub span: AstSpan,
|
|
}
|
|
|
|
/// Stable arena reference to an expression node.
|
|
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
|
|
|
|
/// Optional expression payload used in grammar positions where an expression
|
|
/// may be omitted entirely.
|
|
pub type OptionalExpression<'src, 'arena> = Option<ExpressionRef<'src, 'arena>>;
|
|
|
|
/// Body of a control-flow branch.
|
|
///
|
|
/// Branch bodies are stored separately so constructs such as `if`, `while`,
|
|
/// and `for` can preserve both the parsed body and branch-specific source
|
|
/// details.
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct BranchBody<'src, 'arena> {
|
|
/// Parsed branch payload.
|
|
///
|
|
/// This is `None` when the body is absent or could not be parsed in a
|
|
/// recoverable way.
|
|
pub expression: Option<ExpressionRef<'src, 'arena>>,
|
|
|
|
/// Optional semicolon that appears immediately after a non-block branch
|
|
/// body in legacy constructs such as `if`, `for`, `while`, etc.
|
|
///
|
|
/// This is intentionally preserved rather than normalized away so later
|
|
/// stages can diagnose or reproduce source structure more precisely.
|
|
pub semicolon_position: Option<TokenPosition>,
|
|
|
|
/// Token position that can be used as a fallback end anchor for spans and
|
|
/// diagnostics when the body itself is missing.
|
|
///
|
|
/// In malformed constructs this may be the only reliable location attached
|
|
/// to the branch.
|
|
pub end_anchor_token_position: TokenPosition,
|
|
}
|
|
|
|
/// One `case` arm inside a `switch`.
|
|
///
|
|
/// UnrealScript-style syntax allows each arm to have multiple labels and uses
|
|
/// statement lists as bodies, with fallthrough being possible unless control
|
|
/// flow terminates explicitly.
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct SwitchCase<'src, 'arena> {
|
|
/// Case labels associated with this arm.
|
|
///
|
|
/// Labels are stored as expressions because the language allows
|
|
/// expression-valued labels rather than only simple constants.
|
|
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>,
|
|
|
|
/// Statements belonging to the arm body.
|
|
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
|
}
|
|
|
|
/// Stable arena reference to a `switch` case arm.
|
|
pub type SwitchCaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
|
|
|
|
impl<'arena> Expression<'_, 'arena> {
|
|
/// Construct a binary expression and assign it a span from `left_hand_side`
|
|
/// through `right_hand_side`.
|
|
#[must_use]
|
|
pub fn new_binary(
|
|
arena: &'arena Arena,
|
|
left_hand_side: ArenaNode<'arena, Self>,
|
|
op: InfixOperator,
|
|
right_hand_side: ArenaNode<'arena, Self>,
|
|
) -> ArenaNode<'arena, Self> {
|
|
let span = AstSpan::merge(left_hand_side.span(), right_hand_side.span());
|
|
ArenaNode::new_in(
|
|
Self::Binary(left_hand_side, op, right_hand_side),
|
|
span,
|
|
arena,
|
|
)
|
|
}
|
|
|
|
/// Construct a prefix unary expression and assign it a span from the
|
|
/// operator token through the end of `right_hand_side`.
|
|
#[must_use]
|
|
pub fn new_prefix(
|
|
arena: &'arena Arena,
|
|
operation_position: TokenPosition,
|
|
operation: PrefixOperator,
|
|
right_hand_side: ArenaNode<'arena, Self>,
|
|
) -> ArenaNode<'arena, Self> {
|
|
let span = AstSpan::range(operation_position, right_hand_side.span().token_to);
|
|
ArenaNode::new_in(Self::PrefixUnary(operation, right_hand_side), span, arena)
|
|
}
|
|
|
|
/// Construct a postfix unary expression and assign it a span from the start
|
|
/// of `left_hand_side` through the operator token.
|
|
#[must_use]
|
|
pub fn new_postfix(
|
|
arena: &'arena Arena,
|
|
left_hand_side: ArenaNode<'arena, Self>,
|
|
operation: PostfixOperator,
|
|
operation_position: TokenPosition,
|
|
) -> ArenaNode<'arena, Self> {
|
|
let span = AstSpan::range(left_hand_side.span().token_from, operation_position);
|
|
ArenaNode::new_in(Self::PostfixUnary(left_hand_side, operation), span, arena)
|
|
}
|
|
}
|