//! Literal decoding for Fermented `UnrealScript`. //! //! This module defines the semantic rules for interpreting literal tokens //! produced by the lexer. It is responsible only for *decoding* the textual //! representation of literals into their internal values. //! //! The rules implemented here intentionally mirror the quirks of //! Unreal Engine 2’s `UnrealScript`. use crate::parser::{ParseErrorKind, ParseResult}; impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { /// Decodes an integer literal string into [`u128`]. /// /// Syntax: /// - Optional base prefix: `0b` | `0o` | `0x` (case-insensitive). /// No prefix -> decimal. /// - Digits must match the base (`0-1`/`0-7`/`0-9A-F`). /// - Underscores are allowed and ignored (e.g., `1_000`, `0xDE_AD`). /// - No leading sign; parsed as a non-negative magnitude. /// - Must fit within [`u128`]. /// /// Examples: `42`, `0b1010_0011`, `0o755`, `0xDEAD_BEEF`. /// /// On failure, returns [`ParseErrorKind::InvalidNumericLiteral`] at /// the parser's current cursor position. pub(crate) fn decode_integer_literal(&self, literal: &str) -> ParseResult<'src, 'arena, u128> { let (base, content) = match literal.split_at_checked(2) { Some(("0b" | "0B", stripped)) => (2, stripped), Some(("0o" | "0O", stripped)) => (8, stripped), Some(("0x" | "0X", stripped)) => (16, stripped), _ => (10, literal), }; let digits_without_underscores = content.replace('_', ""); u128::from_str_radix(&digits_without_underscores, base) .map_err(|_| self.make_error_at_last_consumed(ParseErrorKind::InvalidNumericLiteral)) } /// Decodes a float literal as `f64`, following the permissive and only /// partially documented behavior of `UnrealScript`. /// /// Unreal Engine 2 does not define a precise and consistent set of rules /// for float literals and the original compiler contains several quirks. /// Because of this, we default to normalizing the text using a small set of /// UnrealScript-specific rules and then parse the result using rust's /// `f64` parser. /// /// Rules implemented here: /// - Only decimal floats and special literals (e.g. `NaN`, `inf`) /// are supported (no hex or binary formats). /// - A single trailing `f` or `F`, if present, is removed before parsing. /// - The literal text is scanned for periods (`.`). If a second period /// is found, everything from that second `.` onward is discarded. /// /// Examples: /// * `1.2.3e4` becomes `1.2` /// * `1.2e3.4` becomes `1.2e3` /// /// - After this truncation step, the remaining text is interpreted as a /// normal rust `f64` literal. This means it may contain digits, at /// most one decimal point, and an optional exponent part (for example /// `e3` or `E-2`), but it must otherwise follow rust's `f64` syntax. /// Underscores, spaces, and other unsupported characters cause a /// parse error. /// /// On failure, this function returns /// [`ParseErrorKind::InvalidNumericLiteral`] at the current parser /// position. pub(crate) fn decode_float_literal(&self, literal: &str) -> ParseResult<'src, 'arena, f64> { let content = literal .strip_suffix('f') .or_else(|| literal.strip_suffix('F')) .unwrap_or(literal); // Truncate after the second '.', matching UnrealScript behavior let content = content .match_indices('.') .nth(1) .and_then(|(period_index, _)| content.get(..period_index)) .unwrap_or(content); content .parse::() .map_err(|_| self.make_error_at_last_consumed(ParseErrorKind::InvalidNumericLiteral)) } /// Unescapes a tokenized string literal into an arena string. /// /// Supported escapes: `\n`, `\t`, `\"`, `\\`. /// Unknown escapes drop the backslash and emit the character unchanged /// (`UnrealScript` behavior). /// If `raw_string` ends with a trailing `\` (which should not happen for /// well-formed tokens), that backslash is simply ignored. /// /// This function assumes `raw_string` is the token text without surrounding /// quotes. pub(crate) fn unescape_string_literal( &self, raw_string: &str, ) -> crate::arena::ArenaString<'arena> { let mut buffer = String::with_capacity(raw_string.len()); let mut characters = raw_string.chars(); while let Some(next_character) = characters.next() { if next_character == '\\' { // Under the lexer contract, string tokens do not end with a lone // backslash, so there is always a following character. If this // invariant is broken, the final '\' is simply ignored here. if let Some(escaped_character) = characters.next() { match escaped_character { 'n' => buffer.push('\n'), 't' => buffer.push('\t'), '"' => buffer.push('"'), '\\' => buffer.push('\\'), // Simply leaving the escaped character matches // UnrealScript behavior. unrecognized_escape_char => buffer.push(unrecognized_escape_char), } } } else { buffer.push(next_character); } } self.arena.string(&buffer) } }