1312 lines
43 KiB
1312 lines
43 KiB
* Implements a simple `Parser` with built-in functions to parse simple
* UnrealScript's types and support for saving / restoring parser states.
* Copyright 2020 Anton Tarasenko
* This file is part of Acedia.
* Acedia is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License, or
* (at your option) any later version.
* Acedia is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with Acedia. If not, see <https://www.gnu.org/licenses/>.
class Parser extends AcediaObject
var public int BYTE_MAX;
var public int CODEPOINT_USMALL;
var public int CODEPOINT_ULARGE;
// The sequence of Unicode code points that this `Parser` is supposed to parse.
var private array<Text.Character> content;
// Incremented each time `Parser` is reinitialized with new `content`.
// Can be used to make `Parser` object completely independent from
// it's past, necessary since garbage collection is extra expensive in UE2
// and we want to reuse created objects as much as possible.
var private int version;
// Describes current state of the `Parser`, instance of this struct
// can be used to revert parser back to this state.
struct ParserState
// Record to which object (and of what version) this state belongs to.
// This information is used to make sure that we apply this state
// only to same `Parser` (of the same version) that it originated from.
var private AcediaObject ownerObject;
var private int ownerVersion;
// Has parser failed at some point?
var private bool failed;
// Points at the next symbol to be used next in parsing.
var private int pointer;
var private ParserState currentState;
// For convenience `Parser` will store one internal state that designates
// a state that's safe to revert to when some parsing attempt goes wrong.
// @see `Confirm()`, `R()`
var private ParserState confirmedState;
// Describes rules for translating escaped sequences ("\r", "\n", "\t")
// into appropriate code points.
var private const array<UnicodeData.CodePointMapping> escapeCharactersMap;
// Used to store a result of a `ParseSign()` function.
enum ParsedSign
* Initializes `Parser` with new data from a raw data
* (sequence of Unicode code points). Never fails.
* Any data from before this call is lost, any checkpoints are invalidated.
* @param source Sequence of Unicode code points that represents
* a string `Parser` will need to parse.
* @return Returns the calling object, to allow for function chaining.
public final function Parser InitializeRaw(array<Text.Character> source)
content = source;
version += 1;
currentState.ownerObject = self;
currentState.ownerVersion = version;
currentState.failed = false;
currentState.pointer = 0;
confirmedState = currentState;
return self;
* Initializes `Parser` with new data from a `string`. Never fails.
* Any data from before this call is lost, any checkpoints are invalidated.
* @param source String `Parser` will need to parse.
* @return Returns the calling object, to allow for function chaining.
public final function Parser Initialize
string source,
optional Text.StringType sourceType
InitializeRaw(_().text.StringToRaw(source, sourceType));
return self;
* Initializes `Parser` with new data from a `Test`.
* Can fail if passed `none` as a parameter.
* Any data from before this call is lost, any checkpoints are invalidated.
* @param source `Text` object `Parser` will need to parse.
* If `none` is passed - parser won't be initialized.
* @return Returns the calling object, to allow for function chaining.
public final function Parser InitializeT(Text source)
if (source == none) return self;
return self;
* Checks if `Parser` is in a failed state.
* Parser enters a failed state whenever any parsing call returns without
* completing it's job. `Parser` in a failed state will automatically fail
* any further parsing attempts until it gets reset via `R()` call.
* @return Returns 'false' if `Parser()` is in a failed state and
* `true` otherwise.
public final function bool Ok()
return (!currentState.failed);
* Returns copy of the current state of this parser.
* As long as caller `Parser` was not reinitialized, returned `ParserState`
* structure can be used to revert this `Parser` to it's current condition
* by a `RestoreState()` call.
* @see `RestoreState()`
* @return Copy of the current state of the caller `Parser`.
public final function ParserState GetCurrentState()
return currentState;
* Returns copy of (currently) last confirmed state of this parser.
* As long as caller `Parser` was not reinitialized, returned `ParserState`
* structure can be used to revert this `Parser` to it's current confirmed
* state by a `RestoreState()` call.
* @see `RestoreState()`, `Confirm()`, `R()`
* @return Copy of (currently) last confirmed state of this parser.
public final function ParserState GetConfirmedState()
return confirmedState;
* Checks if given `stateToCheck` is valid for the caller `Parser`, i.e.:
* 1. It is a state generated by either `GetCurrentState()` or
* `GetConfirmedState()` calls on the caller `Parser`.
* 2. Caller `Parser` was not reinitialized since a call
* that generated given `stateToCheck`.
* @param stateToCheck `ParserState` to check for validity for
* caller `Parser`.
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
public final function bool IsStateValid(ParserState stateToCheck)
if (stateToCheck.ownerObject != self) return false;
if (stateToCheck.ownerVersion != version) return false;
return true;
* Checks if calling `RestoreState()` for passed state will return a `Parser`
* in an "Ok" state (not failed), i.e. state is valid and
* was generated when `Parser` was in a non-failed state.
* @param stateToCheck `ParserState` to check for corresponding to
* `Parser` being in a non-failed state.
* By definition must also be valid for the caller `Parser`.
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
public final function bool IsStateOk(ParserState stateToCheck)
if (!IsStateValid(stateToCheck)) return false;
return (!stateToCheck.failed);
* Resets parser to a state, given by `stateToRestore` argument
* (so a state `Parser` was in at the moment given `stateToRestore`
* was obtained).
* If given `stateToRestore` is from a different `Parser` or
* the owner `Parser` was reinitialized after passed state was obtained, -
* function will simply put caller `Parser` into a failed state.
* Note that caller `Parser` being put in a failed state after this call
* doesn't mean that described issues are actually present:
* `stateToRestore` can also describe a failed state of the `Parser`.
* @param stateToRestore `ParserState` that this method will attempt
* to set for the caller `Parser`.
* @return Returns the calling object, to allow for function chaining.
public final function Parser RestoreState(ParserState stateToRestore)
if (!IsStateValid(stateToRestore))
currentState.failed = true;
return self;
currentState = stateToRestore;
return self;
* Remembers current state of `Parser` in an internal checkpoint variable,
* that can later be restored by an `R()` call.
* Can only save non-failed states and will only fail if caller `Parser` is
* in a failed state.
* `Confirm()` and `R()` are essentially convenience wrapper functions for
* `GetCurrentState()` and `RestoreState()` calls +
* state storage variable.
* @return `true` if current state is recorded in `Parser` as confirmed and
* `false` otherwise.
public final function bool Confirm()
if (!Ok()) return false;
confirmedState = currentState;
return true;
* Resets `Parser` to a last state recorded as confirmed by a last successful
* `Confirm()` function call. If there weren't any such call -
* reverts `Parser` to it's state right after initialization.
* Always resets failed state of a `Parser`. Cannot fail.
* `Confirm()` and `R()` are essentially convenience wrapper functions for
* `GetCurrentState()` and `RestoreState()` calls + state storage variable.
* @return Returns the calling object, to allow for function chaining.
public final function Parser R()
currentState = confirmedState;
return self;
* Shifts parsing pointer forward.
* Can only shift forward. To revert to a previous state in case of failure use
* combination of `GetCurrentState()` and `RestoreState()` functions.
* @param shift How much to shift parsing pointer?
* Values of zero and below are discarded and `1` is used instead
* (i.e. by default this method shifts pointer by `1` position).
* @return Returns the calling object, to allow for function chaining.
protected final function Parser ShiftPointer(optional int shift)
shift = Max(1, shift);
currentState.pointer = Min(currentState.pointer + shift, content.length);
return self;
* Returns a code point from this `Parser`'s content, relative to next
* code point that caller `Parser` must handle.
* @param `shift` If `0` (default value) or negative value is passed -
* simply asks for the code point that caller `Parser` must handle.
* Otherwise shifts that index `shift` code points, i.e.
* `1` to return next code point or `2` to return code point after
* the next one.
* @return Returns code point at a given shift. If `shift` is too small/large
* and does not fit `Parser`'s contents, returns `-1`.
* `GetCodePoint()` with default (`0`) parameter can also return `-1` if
* contents of the caller `Parser` are empty or it has already consumed
* all input.
protected final function Text.Character GetCharacter(optional int shift)
local Text.Character invalidCharacter;
local int absoluteAddress;
absoluteAddress = currentState.pointer + Max(0, shift);
if (absoluteAddress < 0 || absoluteAddress >= content.length)
invalidCharacter.codePoint = -1;
return invalidCharacter;
return content[absoluteAddress];
* Forces caller `Parser` to enter a failed state.
* @return Returns the calling object, to allow for a quick exit from
* a parsing function by `return Fail();`.
protected final function Parser Fail()
currentState.failed = true;
return self;
* Returns amount of code points that have already been parsed,
* provided that caller `Parser` is in a correct state.
* @return Returns how many Unicode code points have already been parsed if
* caller `Parser` is in correct state;
* otherwise return value is undefined.
public final function int GetParsedLength()
return Max(0, currentState.pointer);
* Returns amount of code points that have not yet been parsed,
* provided that caller `Parser` is in a correct state.
* @return Returns how many Unicode code points are still unparsed if
* caller `Parser` is in correct state;
* otherwise return value is undefined.
public final function int GetRemainingLength()
return Max(0, content.length - currentState.pointer);
* Checks if caller `Parser` has already parsed all of it's content.
* Uninitialized `Parser` has no content and, therefore, parsed it all.
* Should return `true` iff `GetRemainingLength() == 0`.
* @return `true` if caller `Parser` has no more data to parse.
public final function bool HasFinished()
return (currentState.pointer >= content.length);
* Returns still unparsed part of caller `Parser`'s source as an array of
* Unicode code points.
* @return Unparsed part of caller `Parser`'s source as an array of
* Unicode code points.
public final function array<Text.Character> GetRemainderRaw()
local int i;
local array<Text.Character> result;
for (i = 0; i < GetRemainingLength(); i += 1)
result[result.length] = GetCharacter(i);
return result;
* Returns still unparsed part of caller `Parser`'s source as a `string`.
* @return Unparsed part of caller `Parser`'s source as a `string`.
public final function string GetRemainder()
local int i;
local array<Text.Character> rawResult;
for (i = 0; i < GetRemainingLength(); i += 1)
rawResult[rawResult.length] = GetCharacter(i);
return _().text.RawToString(rawResult, STRING_Plain);
* Returns still unparsed part of caller `Parser`'s source as `Text`.
* @return Unparsed part of caller `Parser`'s source as `Text`.
public final function Text GetRemainderT()
local int i;
local array<Text.Character> rawResult;
for (i = 0; i < GetRemainingLength(); i += 1)
rawResult[rawResult.length] = GetCharacter(i);
return _().text.FromRaw(rawResult);
* Matches any sequence of whitespace symbols, without returning it.
* Starts from where previous parsing function finished.
* Can never cause parser to enter failed state.
* What symbols exactly are considered whitespace refer to the description of
* `TextAPI.IsWhitespace()` function.
* @param whitespacesAmount Returns how many whitespace symbols
* were skipped. Any given value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser Skip(optional out int whitespacesAmount)
local TextAPI api;
if (!Ok()) return self;
api = _().text;
whitespacesAmount = 0;
// Cycle will end once we either reach a non-whitespace symbol or
// there's not more code points to get
while (api.IsWhitespace(GetCharacter(whitespacesAmount)))
whitespacesAmount += 1;
return self;
* Function that tries to match given data in `Parser`'s content,
* starting from where previous parsing function finished.
* Does nothing if caller `Parser` was in failed state.
* @param data Data that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseInsensitive If `false` the matching will have to be exact,
* using `true` will make this method to ignore the case,
* where it's applicable.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MatchRaw
array<Text.Character> data,
optional bool caseInsensitive
local int i;
local TextAPI api;
if (!Ok()) return self;
if (data.length > GetRemainingLength()) return Fail();
api = _().text;
for (i = 0; i < data.length; i += 1)
if (!api.AreEqual(data[i], GetCharacter(i), caseInsensitive))
return Fail();
return self;
* Function that tries to match given `string`, starting from where
* previous parsing function finished.
* Does nothing if caller `Parser` was in failed state.
* @param word String that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseInsensitive If `false` the matching will have to be exact,
* using `true` will make this method to ignore the case,
* where it's applicable.
* @return Returns the calling object, to allow for function chaining.
public final function Parser Match(string word, optional bool caseInsensitive)
return MatchRaw(_().text.StringToRaw(word), caseInsensitive);
* Function that tries to match given `Text`, starting from where
* previous parsing function finished.
* Does nothing if caller `Parser` was in failed state.
* @param word Text that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseInsensitive If `false` the matching will have to be exact,
* using `true` will make this method to ignore the case,
* where it's applicable.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MatchT(Text word, optional bool caseInsensitive)
if (!Ok()) return self;
if (word == none) return Fail();
return MatchRaw(word.ToRaw(), caseInsensitive);
* Internal function for parsing unsigned integers in any base from 2 to 36.
* This parsing can fail, putting `Parser` into a failed state.
* @param result If parsing is successful, this value will contain
* parsed integer, otherwise value is undefined.
* Any passed value is discarded.
* @param base Base, in which integer in question is recorded.
* @param numberLength If this parameter is less or equal to zero,
* function will stop parsing the moment it can't recognize a character as
* belonging to a number in a given base.
* It will only fail if it couldn't parse a single character;
* If this parameter is set to be positive (`> 0`), function will
* attempt to use exactly `numberLength` character for parsing and will
* fail if they would not constitute a valid number.
* @param consumedCodePoints Amount of code point used (consumed) to parse
* this number; undefined, if parsing is unsuccessful.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MUnsignedInteger
out int result,
optional int base,
optional int numberLength,
optional out int consumedCodePoints
local bool parsingFixedLength;
local int nextPosition;
numberLength = Max(0, numberLength);
parsingFixedLength = (numberLength != 0);
if (base == 0)
base = 10;
else if (base < 2 || base > 36)
return Fail();
result = 0;
consumedCodePoints = 0;
while (!HasFinished())
if (parsingFixedLength && consumedCodePoints >= numberLength) break;
nextPosition = _().text.CharacterToInt(GetCharacter(), base);
if (nextPosition < 0) break;
result = result * base + nextPosition;
consumedCodePoints += 1;
if ( parsingFixedLength && consumedCodePoints != numberLength
|| consumedCodePoints < 1)
return Fail();
return self;
* Parses escaped sequence of the type that is usually used in
* string literals: backslash "\"", followed by any character
* (called escaped character later) or, in special cases, several characters.
* For most characters escaped sequence resolved into
* an escaped character's code point.
* Several escaped symbols:
* \n, \r, \t, \b, \f, \v
* are translated into a different code point corresponding to
* a control symbols, normally denoted by these sequences.
* A Unicode code point can also be directly entered with either of the two
* commands:
* \U0056
* \u56
* The difference is that `\U` allows you to enter two-byte code point, while
* `\u` only allows to define code points that fit into 1 byte,
* but is more compact.
* @param denotedCodePoint If parsing is successful, parameter will contain
* appropriate code point, denoted by a parsed escaped sequence;
* If parsing is unsuccessful, value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MEscapedSequence
out Text.Character denotedCharacter
local int i;
if (!Ok()) return self;
// Need at least two characters to parse escaped sequence
if (GetRemainingLength() < 2) return Fail();
if (GetCharacter().codePoint != CODEPOINT_BACKSLASH) return Fail();
denotedCharacter = GetCharacter(1);
// Escaped character denotes some special code point
for (i = 0; i < escapeCharactersMap.length; i += 1)
if (escapeCharactersMap[i].from == denotedCharacter.codePoint)
denotedCharacter.codePoint = escapeCharactersMap[i].to;
return self;
// Escaped character denotes declaration of arbitrary Unicode code point
if (denotedCharacter.codePoint == CODEPOINT_ULARGE)
MUnsignedInteger(denotedCharacter.codePoint, 16, 4);
else if (denotedCharacter.codePoint == CODEPOINT_USMALL)
MUnsignedInteger(denotedCharacter.codePoint, 16, 2);
return self;
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
* @param result If parsing is successful, this array will contain the
* contents of string literal with resolved escaped sequences;
* if parsing has failed, it's value is undefined.
* Any passed contents are simply discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MStringLiteralRaw(out array<Text.Character> result)
local TextAPI api;
local Text.Character nextCharacter;
local Text.Character usedQuotationMark;
local Text.Character escapedCharacter;
if (!Ok()) return self;
usedQuotationMark = GetCharacter();
if (!_().text.IsQuotationMark(usedQuotationMark)) return Fail();
ShiftPointer(); // Skip opening quotation mark
api = _().text;
result.length = 0;
while (!HasFinished())
nextCharacter = GetCharacter();
// Closing quote
if (api.AreEqual(nextCharacter, usedQuotationMark))
return self;
// Escaped characters
if (api.IsCodePoint(nextCharacter, CODEPOINT_BACKSLASH))
if (!MEscapedSequence(escapedCharacter).Ok())
return Fail(); // Backslash MUST mean valid escape sequence
result[result.length] = escapedCharacter;
// Any other code point
result[result.length] = nextCharacter;
// Content ended without a closing quote.
return Fail();
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
* @param result If parsing is successful, this `string` will contain the
* contents of string literal with resolved escaped sequences;
* if parsing has failed, it's value is undefined.
* Any passed contents are simply discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MStringLiteral(out string result)
local array<Text.Character> rawResult;
if (!Ok()) return self;
if (MStringLiteralRaw(rawResult).Ok())
result = _().text.RawToString(rawResult, STRING_Plain);
return self;
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
* @param result If parsing is successful, this `Text` will contain the
* contents of string literal with resolved escaped sequences;
* if parsing has failed, it's value is undefined.
* Any passed contents are simply discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MStringLiteralT(out Text result)
local array<Text.Character> rawResult;
if (!Ok()) return self;
if (MStringLiteralRaw(rawResult).Ok())
result = _().text.FromRaw(rawResult);
return self;
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
* @param result Any content before one of the break symbols
* will be recorded into this array as a sequence of Unicode code points.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces)
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the calling object, to allow for function chaining.
public final function Parser MUntilRaw
out array<Text.Character> result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak
local Text.Character nextCharacter;
local TextAPI api;
if (!Ok()) return self;
api = _().text;
result.length = 0;
while (!HasFinished())
nextCharacter = GetCharacter();
if (api.AreEqual(nextCharacter, characterBreak)) break;
if (whitespacesBreak && api.IsWhitespace(nextCharacter)) break;
if (quotesBreak && api.IsQuotationMark(nextCharacter)) break;
result[result.length] = nextCharacter;
return self;
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
* @param result Any content before one of the break symbols
* will be recorded into this `string`.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces)
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the calling object, to allow for function chaining.
public final function Parser MUntil
out string result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak
local array<Text.Character> rawResult;
if (!Ok()) return self;
MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak);
result = _().text.RawToString(rawResult, STRING_Plain);
return self;
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
* @param result Any content before one of the break symbols
* will be recorded into this `Text`.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces)
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the calling object, to allow for function chaining.
public final function Parser MUntilT
out Text result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak
local array<Text.Character> rawResult;
if (!Ok()) return self;
MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak);
result = _().text.FromRaw(rawResult);
return self;
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteralRaw()`.
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MStringRaw(out array<Text.Character> result)
if (!Ok()) return self;
if (_().text.IsQuotationMark(GetCharacter()))
MUntilRaw(result,, true, true);
return self;
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteral()`.
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MString(out string result)
local array<Text.Character> rawResult;
if (!Ok()) return self;
result = _().text.RawToString(rawResult, STRING_Plain);
return self;
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteralT()`.
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MStringT(out Text result)
local array<Text.Character> rawResult;
if (!Ok()) return self;
result = _().text.FromRaw(rawResult);
return self;
* Matches a non-empty sequence of whitespace symbols.
* Cannot fail (not being able to read any input is not considered a failure).
* @param result If parsing was successful - whitespaces' Unicode code points
* will be recorded in this array, otherwise - undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MWhitespacesRaw(out array<Text.Character> result)
local Text.Character nextCharacter;
local TextAPI api;
if (!Ok()) return self;
api = _().text;
result.length = 0;
while (!HasFinished())
nextCharacter = GetCharacter();
if (!api.IsWhitespace(nextCharacter)) break;
result[result.length] = nextCharacter;
return self;
* Matches a non-empty sequence of whitespace symbols.
* Cannot fail (not being able to read any input is not considered a failure).
* @param result If parsing was successful - whitespaces will be
* recorded here, otherwise - undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MWhitespaces(out string result)
local array<Text.Character> rawResult;
if (!Ok()) return self;
result = _().text.RawToString(rawResult, STRING_Plain);
return self;
* Matches a non-empty sequence of whitespace symbols.
* Cannot fail (not being able to read any input is not considered a failure).
* @param result If parsing was successful - whitespaces will be
* recorded here, otherwise - undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MWhitespacesT(out Text result)
local array<Text.Character> rawResult;
if (!Ok()) return self;
result = _().text.FromRaw(rawResult);
return self;
* Parses next code point as itself.
* Can only fail if caller `Parser` has already exhausted all available data.
* @param result If parsing was successful - next Unicode code point,
* otherwise - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MCharacter(out Text.Character result)
if (!Ok()) return self;
if (HasFinished()) return Fail();
result = GetCharacter();
return self;
* Parses next code point as as byte.
* Can fail if caller `Parser` has already exhausted all available data or
* next Unicode code point cannot fit into the `byte` value range.
* @param result If parsing was successful - next Unicode code point as
* a byte, otherwise - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MByte(out byte result)
local Text.Character character;
if (!Ok()) return self;
if (!MCharacter(character).Ok())
return Fail();
if (character.codePoint < 0 || character.codePoint > BYTE_MAX)
return Fail();
result = character.codePoint;
return self;
* Tries to parse a sign: either "+" or "-".
* @param result Value of `ParsedSign` will be recorded here,
* depending on what sign was encountered.
* `SIGN_Missing` value is only possible if we allow sign to be missing.
* @param allowMissingSign By default `false` means that parsing will fail
* if next character is neither "+" or "-";
* `true` means that parsing will not fail even if there is not sign, -
* method will then consume in input and will return `SIGN_Missing`
* as a result.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MSign
out ParsedSign result,
optional bool allowMissingSign
local ParserState checkpoint;
if (!Ok()) return self;
// Read sign
checkpoint = GetCurrentState();
if (Match("-").Ok())
result = SIGN_Minus;
else if (RestoreState(checkpoint).Match("+").Ok())
result = SIGN_Plus;
else if (allowMissingSign)
result = SIGN_Missing;
return self;
* Tries to parse a number prefix that determines a base system for denoting
* integer numbers:
* 1. `0x` means hexadecimal;
* 2. `0b` means binary;
* 3. `0o` means octal;
* 4. otherwise we use decimal system.
* This parsing method cannot fail.
* Parser consumes appropriate prefix; nothing if decimal system is determined.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MBase(out int base)
local ParserState checkpoint;
if (!Ok()) return self;
checkpoint = GetCurrentState();
if (Match("0x").Ok())
base = 16;
else if (RestoreState(checkpoint).Match("0b").Ok())
base = 2;
else if (RestoreState(checkpoint).Match("0o").Ok())
base = 8;
base = 10;
return self;
* Parses signed integer either in a directly given base (`base`) or in an
* auto-determined one (based on prefix, @see `MBase()`).
* Integers are expected in form: (+/-)(0x/0b/0o)<sequence of digits>.
* Examples: 78, 0o34, -2, 0b0101001, -0x78aC.
* @param result If parsing is successful - parsed value will be
* recorded here; if parsing fails - value is undetermined.
* Any passed value is discarded.
* @param base base in which function must attempt to parse a number;
* Default value (`0`) means function must auto-determine base,
* based on the prefix, otherwise must be between 2 and 36.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MInteger(out int result, optional int base)
local ParsedSign integerSign;
if (!Ok()) return self;
MSign(integerSign, true);
if (base == 0)
MUnsignedInteger(result, base);
if (integerSign == SIGN_Minus)
result *= -1;
return self;
// Internal function for parsing fractional part (including the dot ".")
// of the text representation for floating point number (decimal system only).
// Cannot fail, returns `0.0` if it couldn't parse anything.
protected final function Parser MFractionalPart(out float result)
local ParserState checkpoint;
local int fractionalInt;
local int digitsRead;
if (!Ok()) return self;
result = 0.0;
checkpoint = GetCurrentState();
if (!Match(".").Ok())
return self;
checkpoint = GetCurrentState();
if (!MUnsignedInteger(fractionalInt,,, digitsRead).Ok())
fractionalInt = 0.0;
return self;
result = float(fractionalInt) * (0.1 ** digitsRead);
return self;
// Internal function for parsing exponent part (including the symbol "e")
// of the text representation for floating point number (decimal system only).
// Can only fail if symbol "e" / "E" is present, but there is no valid
// integer right after it (whitespace symbols in-between are forbidden).
// Returns `0.0` if there was not exponent to parse.
protected final function Parser MExponentPart(out int result)
local ParserState checkpoint;
local ParsedSign exponendSign;
if (!Ok()) return self;
// Is there even an exponential part?
checkpoint = GetCurrentState();
if (!Match("e", true).Ok())
return self;
// If yes - parse it:
result = 0.0;
MSign(exponendSign, true).MUnsignedInteger(result, 10);
if (exponendSign == SIGN_Minus)
result *= -1;
return self;
// Internal function for parsing optional suffix of the text representation
// for floating point number ("f" or "F").
// Cannot fail. Can only consume one Unicode code point,
// when it is either "f" or "F".
protected final function Parser MFloatSuffix()
local ParserState checkpoint;
if (!Ok()) return self;
checkpoint = GetCurrentState();
if (!Match("f", true).Ok())
return self;
* Parses signed floating point number in JSON form + optional "f" / "F"
* suffix at the end.
* @param result If parsing is successful - parsed value will be
* recorded here; if parsing fails - value is undetermined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
public final function Parser MNumber(out float result)
local ParsedSign sign;
local int integerPart, exponentPart;
local float fractionalPart;
if (!Ok()) return self;
self.MSign(sign, true)
.MUnsignedInteger(integerPart, 10)
if (!Ok())
return self;
result = float(integerPart) + fractionalPart;
result *= 10.0 ** exponentPart;
if (sign == SIGN_Minus)
result *= -1;
return self;
// Start with no initializations done
version = 0
BYTE_MAX = 255
escapeCharactersMap(0)=(from=110,to=10) // \n
escapeCharactersMap(1)=(from=114,to=13) // \r
escapeCharactersMap(2)=(from=116,to=9) // \t
escapeCharactersMap(3)=(from=98,to=8) // \b
escapeCharactersMap(4)=(from=102,to=12) // \f
escapeCharactersMap(5)=(from=118,to=11) // \v
} |