/** * Implements a simple `Parser` with built-in functions to parse simple * UnrealScript's types and support for saving / restoring parser states. * Copyright 2020 Anton Tarasenko *------------------------------------------------------------------------------ * This file is part of Acedia. * * Acedia is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 3 of the License, or * (at your option) any later version. * * Acedia is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Acedia. If not, see . */ class Parser extends AcediaObject dependson(Text) dependson(UnicodeData); var public int BYTE_MAX; var public int CODEPOINT_BACKSLASH; var public int CODEPOINT_USMALL; var public int CODEPOINT_ULARGE; // The sequence of Unicode code points that this `Parser` is supposed to parse. var private array content; // Incremented each time `Parser` is reinitialized with new `content`. // Can be used to make `Parser` object completely independent from // it's past, necessary since garbage collection is extra expensive in UE2 // and we want to reuse created objects as much as possible. var private int version; // Describes current state of the `Parser`, instance of this struct // can be used to revert parser back to this state. struct ParserState { // Record to which object (and of what version) this state belongs to. // This information is used to make sure that we apply this state // only to same `Parser` (of the same version) that it originated from. var private AcediaObject ownerObject; var private int ownerVersion; // Has parser failed at some point? var private bool failed; // Points at the next symbol to be used next in parsing. var private int pointer; }; var private ParserState currentState; // For convenience `Parser` will store one internal state that designates // a state that's safe to revert to when some parsing attempt goes wrong. // @see `Confirm()`, `R()` var private ParserState confirmedState; // Describes rules for translating escaped sequences ("\r", "\n", "\t") // into appropriate code points. var private const array escapeCharactersMap; // Used to store a result of a `ParseSign()` function. enum ParsedSign { SIGN_Missing, SIGN_Plus, SIGN_Minus }; /** * Initializes `Parser` with new data from a raw data * (sequence of Unicode code points). Never fails. * * Any data from before this call is lost, any checkpoints are invalidated. * * @param source Sequence of Unicode code points that represents * a string `Parser` will need to parse. * @return Returns the calling object, to allow for function chaining. */ public final function Parser InitializeRaw(array source) { content = source; version += 1; currentState.ownerObject = self; currentState.ownerVersion = version; currentState.failed = false; currentState.pointer = 0; confirmedState = currentState; return self; } /** * Initializes `Parser` with new data from a `string`. Never fails. * * Any data from before this call is lost, any checkpoints are invalidated. * * @param source String `Parser` will need to parse. * @return Returns the calling object, to allow for function chaining. */ public final function Parser Initialize ( string source, optional Text.StringType sourceType ) { InitializeRaw(_().text.StringToRaw(source, sourceType)); return self; } /** * Initializes `Parser` with new data from a `Test`. * * Can fail if passed `none` as a parameter. * * Any data from before this call is lost, any checkpoints are invalidated. * * @param source `Text` object `Parser` will need to parse. * If `none` is passed - parser won't be initialized. * @return Returns the calling object, to allow for function chaining. */ public final function Parser InitializeT(Text source) { if (source == none) return self; InitializeRaw(source.ToRaw()); return self; } /** * Checks if `Parser` is in a failed state. * * Parser enters a failed state whenever any parsing call returns without * completing it's job. `Parser` in a failed state will automatically fail * any further parsing attempts until it gets reset via `R()` call. * * @return Returns 'false' if `Parser()` is in a failed state and * `true` otherwise. */ public final function bool Ok() { return (!currentState.failed); } /** * Returns copy of the current state of this parser. * * As long as caller `Parser` was not reinitialized, returned `ParserState` * structure can be used to revert this `Parser` to it's current condition * by a `RestoreState()` call. * * @see `RestoreState()` * @return Copy of the current state of the caller `Parser`. */ public final function ParserState GetCurrentState() { return currentState; } /** * Returns copy of (currently) last confirmed state of this parser. * * As long as caller `Parser` was not reinitialized, returned `ParserState` * structure can be used to revert this `Parser` to it's current confirmed * state by a `RestoreState()` call. * * @see `RestoreState()`, `Confirm()`, `R()` * @return Copy of (currently) last confirmed state of this parser. */ public final function ParserState GetConfirmedState() { return confirmedState; } /** * Checks if given `stateToCheck` is valid for the caller `Parser`, i.e.: * 1. It is a state generated by either `GetCurrentState()` or * `GetConfirmedState()` calls on the caller `Parser`. * 2. Caller `Parser` was not reinitialized since a call * that generated given `stateToCheck`. * * @param stateToCheck `ParserState` to check for validity for * caller `Parser`. * @return `true` if given `stateToCheck` is valid and `false` otherwise. */ public final function bool IsStateValid(ParserState stateToCheck) { if (stateToCheck.ownerObject != self) return false; if (stateToCheck.ownerVersion != version) return false; return true; } /** * Checks if calling `RestoreState()` for passed state will return a `Parser` * in an "Ok" state (not failed), i.e. state is valid and * was generated when `Parser` was in a non-failed state. * * @param stateToCheck `ParserState` to check for corresponding to * `Parser` being in a non-failed state. * By definition must also be valid for the caller `Parser`. * @return `true` if given `stateToCheck` is valid and `false` otherwise. */ public final function bool IsStateOk(ParserState stateToCheck) { if (!IsStateValid(stateToCheck)) return false; return (!stateToCheck.failed); } /** * Resets parser to a state, given by `stateToRestore` argument * (so a state `Parser` was in at the moment given `stateToRestore` * was obtained). * * If given `stateToRestore` is from a different `Parser` or * the owner `Parser` was reinitialized after passed state was obtained, - * function will simply put caller `Parser` into a failed state. * Note that caller `Parser` being put in a failed state after this call * doesn't mean that described issues are actually present: * `stateToRestore` can also describe a failed state of the `Parser`. * * @param stateToRestore `ParserState` that this method will attempt * to set for the caller `Parser`. * @return Returns the calling object, to allow for function chaining. */ public final function Parser RestoreState(ParserState stateToRestore) { if (!IsStateValid(stateToRestore)) { currentState.failed = true; return self; } currentState = stateToRestore; return self; } /** * Remembers current state of `Parser` in an internal checkpoint variable, * that can later be restored by an `R()` call. * * Can only save non-failed states and will only fail if caller `Parser` is * in a failed state. * * `Confirm()` and `R()` are essentially convenience wrapper functions for * `GetCurrentState()` and `RestoreState()` calls + * state storage variable. * * @return `true` if current state is recorded in `Parser` as confirmed and * `false` otherwise. */ public final function bool Confirm() { if (!Ok()) return false; confirmedState = currentState; return true; } /** * Resets `Parser` to a last state recorded as confirmed by a last successful * `Confirm()` function call. If there weren't any such call - * reverts `Parser` to it's state right after initialization. * * Always resets failed state of a `Parser`. Cannot fail. * * `Confirm()` and `R()` are essentially convenience wrapper functions for * `GetCurrentState()` and `RestoreState()` calls + state storage variable. * * @return Returns the calling object, to allow for function chaining. */ public final function Parser R() { currentState = confirmedState; return self; } /** * Shifts parsing pointer forward. * * Can only shift forward. To revert to a previous state in case of failure use * combination of `GetCurrentState()` and `RestoreState()` functions. * * @param shift How much to shift parsing pointer? * Values of zero and below are discarded and `1` is used instead * (i.e. by default this method shifts pointer by `1` position). * @return Returns the calling object, to allow for function chaining. */ protected final function Parser ShiftPointer(optional int shift) { shift = Max(1, shift); currentState.pointer = Min(currentState.pointer + shift, content.length); return self; } /** * Returns a code point from this `Parser`'s content, relative to next * code point that caller `Parser` must handle. * * @param `shift` If `0` (default value) or negative value is passed - * simply asks for the code point that caller `Parser` must handle. * Otherwise shifts that index `shift` code points, i.e. * `1` to return next code point or `2` to return code point after * the next one. * @return Returns code point at a given shift. If `shift` is too small/large * and does not fit `Parser`'s contents, returns `-1`. * `GetCodePoint()` with default (`0`) parameter can also return `-1` if * contents of the caller `Parser` are empty or it has already consumed * all input. */ protected final function Text.Character GetCharacter(optional int shift) { local Text.Character invalidCharacter; local int absoluteAddress; absoluteAddress = currentState.pointer + Max(0, shift); if (absoluteAddress < 0 || absoluteAddress >= content.length) { invalidCharacter.codePoint = -1; return invalidCharacter; } return content[absoluteAddress]; } /** * Forces caller `Parser` to enter a failed state. * * @return Returns the calling object, to allow for a quick exit from * a parsing function by `return Fail();`. */ protected final function Parser Fail() { currentState.failed = true; return self; } /** * Returns amount of code points that have already been parsed, * provided that caller `Parser` is in a correct state. * * @return Returns how many Unicode code points have already been parsed if * caller `Parser` is in correct state; * otherwise return value is undefined. */ public final function int GetParsedLength() { return Max(0, currentState.pointer); } /** * Returns amount of code points that have not yet been parsed, * provided that caller `Parser` is in a correct state. * * @return Returns how many Unicode code points are still unparsed if * caller `Parser` is in correct state; * otherwise return value is undefined. */ public final function int GetRemainingLength() { return Max(0, content.length - currentState.pointer); } /** * Checks if caller `Parser` has already parsed all of it's content. * Uninitialized `Parser` has no content and, therefore, parsed it all. * * Should return `true` iff `GetRemainingLength() == 0`. * * @return `true` if caller `Parser` has no more data to parse. */ public final function bool HasFinished() { return (currentState.pointer >= content.length); } /** * Returns still unparsed part of caller `Parser`'s source as an array of * Unicode code points. * * @return Unparsed part of caller `Parser`'s source as an array of * Unicode code points. */ public final function array GetRemainderRaw() { local int i; local array result; for (i = 0; i < GetRemainingLength(); i += 1) { result[result.length] = GetCharacter(i); } return result; } /** * Returns still unparsed part of caller `Parser`'s source as a `string`. * * @return Unparsed part of caller `Parser`'s source as a `string`. */ public final function string GetRemainder() { local int i; local array rawResult; for (i = 0; i < GetRemainingLength(); i += 1) { rawResult[rawResult.length] = GetCharacter(i); } return _().text.RawToString(rawResult, STRING_Plain); } /** * Returns still unparsed part of caller `Parser`'s source as `Text`. * * @return Unparsed part of caller `Parser`'s source as `Text`. */ public final function Text GetRemainderT() { local int i; local array rawResult; for (i = 0; i < GetRemainingLength(); i += 1) { rawResult[rawResult.length] = GetCharacter(i); } return _().text.FromRaw(rawResult); } /** * Matches any sequence of whitespace symbols, without returning it. * Starts from where previous parsing function finished. * * Can never cause parser to enter failed state. * * What symbols exactly are considered whitespace refer to the description of * `TextAPI.IsWhitespace()` function. * * @param whitespacesAmount Returns how many whitespace symbols * were skipped. Any given value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser Skip(optional out int whitespacesAmount) { local TextAPI api; if (!Ok()) return self; api = _().text; whitespacesAmount = 0; // Cycle will end once we either reach a non-whitespace symbol or // there's not more code points to get while (api.IsWhitespace(GetCharacter(whitespacesAmount))) { whitespacesAmount += 1; } ShiftPointer(whitespacesAmount); return self; } /** * Function that tries to match given data in `Parser`'s content, * starting from where previous parsing function finished. * * Does nothing if caller `Parser` was in failed state. * * @param data Data that must be matched to the `Parser`'s * contents, starting from where previous parsing function finished. * @param caseInsensitive If `false` the matching will have to be exact, * using `true` will make this method to ignore the case, * where it's applicable. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MatchRaw ( array data, optional bool caseInsensitive ) { local int i; local TextAPI api; if (!Ok()) return self; if (data.length > GetRemainingLength()) return Fail(); api = _().text; for (i = 0; i < data.length; i += 1) { if (!api.AreEqual(data[i], GetCharacter(i), caseInsensitive)) { return Fail(); } } ShiftPointer(data.length); return self; } /** * Function that tries to match given `string`, starting from where * previous parsing function finished. * * Does nothing if caller `Parser` was in failed state. * * @param word String that must be matched to the `Parser`'s * contents, starting from where previous parsing function finished. * @param caseInsensitive If `false` the matching will have to be exact, * using `true` will make this method to ignore the case, * where it's applicable. * @return Returns the calling object, to allow for function chaining. */ public final function Parser Match(string word, optional bool caseInsensitive) { return MatchRaw(_().text.StringToRaw(word), caseInsensitive); } /** * Function that tries to match given `Text`, starting from where * previous parsing function finished. * * Does nothing if caller `Parser` was in failed state. * * @param word Text that must be matched to the `Parser`'s * contents, starting from where previous parsing function finished. * @param caseInsensitive If `false` the matching will have to be exact, * using `true` will make this method to ignore the case, * where it's applicable. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MatchT(Text word, optional bool caseInsensitive) { if (!Ok()) return self; if (word == none) return Fail(); return MatchRaw(word.ToRaw(), caseInsensitive); } /** * Internal function for parsing unsigned integers in any base from 2 to 36. * * This parsing can fail, putting `Parser` into a failed state. * * @param result If parsing is successful, this value will contain * parsed integer, otherwise value is undefined. * Any passed value is discarded. * @param base Base, in which integer in question is recorded. * @param numberLength If this parameter is less or equal to zero, * function will stop parsing the moment it can't recognize a character as * belonging to a number in a given base. * It will only fail if it couldn't parse a single character; * If this parameter is set to be positive (`> 0`), function will * attempt to use exactly `numberLength` character for parsing and will * fail if they would not constitute a valid number. * @param consumedCodePoints Amount of code point used (consumed) to parse * this number; undefined, if parsing is unsuccessful. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MUnsignedInteger ( out int result, optional int base, optional int numberLength, optional out int consumedCodePoints ) { local bool parsingFixedLength; local int nextPosition; numberLength = Max(0, numberLength); parsingFixedLength = (numberLength != 0); if (base == 0) { base = 10; } else if (base < 2 || base > 36) { return Fail(); } result = 0; consumedCodePoints = 0; while (!HasFinished()) { if (parsingFixedLength && consumedCodePoints >= numberLength) break; nextPosition = _().text.CharacterToInt(GetCharacter(), base); if (nextPosition < 0) break; result = result * base + nextPosition; consumedCodePoints += 1; ShiftPointer(); } if ( parsingFixedLength && consumedCodePoints != numberLength || consumedCodePoints < 1) { return Fail(); } return self; } /** * Parses escaped sequence of the type that is usually used in * string literals: backslash "\"", followed by any character * (called escaped character later) or, in special cases, several characters. * For most characters escaped sequence resolved into * an escaped character's code point. * * Several escaped symbols: * \n, \r, \t, \b, \f, \v * are translated into a different code point corresponding to * a control symbols, normally denoted by these sequences. * * A Unicode code point can also be directly entered with either of the two * commands: * \u0056 * \U56 * The difference is that `\u` allows you to enter two-byte code point, while * `\U` only allows to define code points that fit into 1 byte, * but is more compact. * * @param denotedCodePoint If parsing is successful, parameter will contain * appropriate code point, denoted by a parsed escaped sequence; * If parsing is unsuccessful, value is undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MEscapedSequence ( out Text.Character denotedCharacter ) { local int i; if (!Ok()) return self; // Need at least two characters to parse escaped sequence if (GetRemainingLength() < 2) return Fail(); if (GetCharacter().codePoint != CODEPOINT_BACKSLASH) return Fail(); denotedCharacter = GetCharacter(1); ShiftPointer(2); // Escaped character denotes some special code point for (i = 0; i < escapeCharactersMap.length; i += 1) { if (escapeCharactersMap[i].from == denotedCharacter.codePoint) { denotedCharacter.codePoint = escapeCharactersMap[i].to; return self; } } // Escaped character denotes declaration of arbitrary Unicode code point if (denotedCharacter.codePoint == CODEPOINT_USMALL) { MUnsignedInteger(denotedCharacter.codePoint, 16, 4); } else if (denotedCharacter.codePoint == CODEPOINT_ULARGE) { MUnsignedInteger(denotedCharacter.codePoint, 16, 2); } return self; } /** * Attempts to parse a string literal: a string enclosed in either of * the following quotation marks: ", ', `. * String literals can contain escaped sequences. * String literals MUST end with closing quotation mark. * @see `MEscapedSequence()` * * @param result If parsing is successful, this array will contain the * contents of string literal with resolved escaped sequences; * if parsing has failed, it's value is undefined. * Any passed contents are simply discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MStringLiteralRaw(out array result) { local TextAPI api; local Text.Character nextCharacter; local Text.Character usedQuotationMark; local Text.Character escapedCharacter; if (!Ok()) return self; usedQuotationMark = GetCharacter(); if (!_().text.IsQuotationMark(usedQuotationMark)) return Fail(); ShiftPointer(); // Skip opening quotation mark api = _().text; result.length = 0; while (!HasFinished()) { nextCharacter = GetCharacter(); // Closing quote if (api.AreEqual(nextCharacter, usedQuotationMark)) { ShiftPointer(); return self; } // Escaped characters if (api.IsCodePoint(nextCharacter, CODEPOINT_BACKSLASH)) { if (!MEscapedSequence(escapedCharacter).Ok()) { return Fail(); // Backslash MUST mean valid escape sequence } result[result.length] = escapedCharacter; } // Any other code point else { result[result.length] = nextCharacter; ShiftPointer(); } } // Content ended without a closing quote. return Fail(); } /** * Attempts to parse a string literal: a string enclosed in either of * the following quotation marks: ", ', `. * String literals can contain escaped sequences. * String literals MUST end with closing quotation mark. * @see `MEscapedSequence()` * * @param result If parsing is successful, this `string` will contain the * contents of string literal with resolved escaped sequences; * if parsing has failed, it's value is undefined. * Any passed contents are simply discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MStringLiteral(out string result) { local array rawResult; if (!Ok()) return self; if (MStringLiteralRaw(rawResult).Ok()) { result = _().text.RawToString(rawResult, STRING_Plain); } return self; } /** * Attempts to parse a string literal: a string enclosed in either of * the following quotation marks: ", ', `. * String literals can contain escaped sequences. * String literals MUST end with closing quotation mark. * @see `MEscapedSequence()` * * @param result If parsing is successful, this `Text` will contain the * contents of string literal with resolved escaped sequences; * if parsing has failed, it's value is undefined. * Any passed contents are simply discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MStringLiteralT(out Text result) { local array rawResult; if (!Ok()) return self; if (MStringLiteralRaw(rawResult).Ok()) { result = _().text.FromRaw(rawResult); } return self; } /** * Matches everything until it finds one of the breaking symbols: * 1. a specified code point (by default `0`); * 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); * 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). * This method cannot fail. * * @param result Any content before one of the break symbols * will be recorded into this array as a sequence of Unicode code points. * @param codePointBreak Method will stop parsing upon encountering this * code point (it will not be included in the `result`) * @param whitespacesBreak `true` if you want to also treat any * whitespace character as a break symbol * (@see `TextAPI.IsWhitespace()` for what symbols are * considered whitespaces) * @param quotesBreak `true` if you want to also treat any * quotation mark character as a break symbol * (@see `TextAPI.IsQuotation()` for what symbols are * considered quotation marks). * @return Returns the calling object, to allow for function chaining. */ public final function Parser MUntilRaw ( out array result, optional Text.Character characterBreak, optional bool whitespacesBreak, optional bool quotesBreak ) { local Text.Character nextCharacter; local TextAPI api; if (!Ok()) return self; api = _().text; result.length = 0; while (!HasFinished()) { nextCharacter = GetCharacter(); if (api.AreEqual(nextCharacter, characterBreak)) break; if (whitespacesBreak && api.IsWhitespace(nextCharacter)) break; if (quotesBreak && api.IsQuotationMark(nextCharacter)) break; result[result.length] = nextCharacter; ShiftPointer(); } return self; } /** * Matches everything until it finds one of the breaking symbols: * 1. a specified code point (by default `0`); * 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); * 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). * This method cannot fail. * * @param result Any content before one of the break symbols * will be recorded into this `string`. * @param codePointBreak Method will stop parsing upon encountering this * code point (it will not be included in the `result`) * @param whitespacesBreak `true` if you want to also treat any * whitespace character as a break symbol * (@see `TextAPI.IsWhitespace()` for what symbols are * considered whitespaces) * @param quotesBreak `true` if you want to also treat any * quotation mark character as a break symbol * (@see `TextAPI.IsQuotation()` for what symbols are * considered quotation marks). * @return Returns the calling object, to allow for function chaining. */ public final function Parser MUntil ( out string result, optional Text.Character characterBreak, optional bool whitespacesBreak, optional bool quotesBreak ) { local array rawResult; if (!Ok()) return self; MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak); result = _().text.RawToString(rawResult, STRING_Plain); return self; } /** * Matches everything until it finds one of the breaking symbols: * 1. a specified code point (by default `0`); * 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); * 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). * This method cannot fail. * * @param result Any content before one of the break symbols * will be recorded into this `Text`. * @param codePointBreak Method will stop parsing upon encountering this * code point (it will not be included in the `result`) * @param whitespacesBreak `true` if you want to also treat any * whitespace character as a break symbol * (@see `TextAPI.IsWhitespace()` for what symbols are * considered whitespaces) * @param quotesBreak `true` if you want to also treat any * quotation mark character as a break symbol * (@see `TextAPI.IsQuotation()` for what symbols are * considered quotation marks). * @return Returns the calling object, to allow for function chaining. */ public final function Parser MUntilT ( out Text result, optional Text.Character characterBreak, optional bool whitespacesBreak, optional bool quotesBreak ) { local array rawResult; if (!Ok()) return self; MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak); result = _().text.FromRaw(rawResult); return self; } /** * Parses a string as either "simple" or "quoted". * Not being able to read any symbols is not considered a failure. * * Reading empty string (either to lack of further data or * instantly encountering a break symbol) is not considered a failure. * * Quoted string starts with quotation mark and ends either * at the corresponding closing (un-escaped) mark * or when `Parser`'s input has been fully consumed. * If string started with a quotation mark, this method will act exactly * like `MStringLiteralRaw()`. * * @param result If parsing is successful - string's contents will be * recorded here; if parsing has failed - value is undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MStringRaw(out array result) { if (!Ok()) return self; if (_().text.IsQuotationMark(GetCharacter())) { MStringLiteralRaw(result); } else { MUntilRaw(result,, true, true); } return self; } /** * Parses a string as either "simple" or "quoted". * Not being able to read any symbols is not considered a failure. * * Reading empty string (either to lack of further data or * instantly encountering a break symbol) is not considered a failure. * * Quoted string starts with quotation mark and ends either * at the corresponding closing (un-escaped) mark * or when `Parser`'s input has been fully consumed. * If string started with a quotation mark, this method will act exactly * like `MStringLiteral()`. * * @param result If parsing is successful - string's contents will be * recorded here; if parsing has failed - value is undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MString(out string result) { local array rawResult; if (!Ok()) return self; MStringRaw(rawResult); result = _().text.RawToString(rawResult, STRING_Plain); return self; } /** * Parses a string as either "simple" or "quoted". * Not being able to read any symbols is not considered a failure. * * Reading empty string (either to lack of further data or * instantly encountering a break symbol) is not considered a failure. * * Quoted string starts with quotation mark and ends either * at the corresponding closing (un-escaped) mark * or when `Parser`'s input has been fully consumed. * If string started with a quotation mark, this method will act exactly * like `MStringLiteralT()`. * * @param result If parsing is successful - string's contents will be * recorded here; if parsing has failed - value is undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MStringT(out Text result) { local array rawResult; if (!Ok()) return self; MStringRaw(rawResult); result = _().text.FromRaw(rawResult); return self; } /** * Matches a non-empty sequence of whitespace symbols. * * Cannot fail (not being able to read any input is not considered a failure). * * @param result If parsing was successful - whitespaces' Unicode code points * will be recorded in this array, otherwise - undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MWhitespacesRaw(out array result) { local Text.Character nextCharacter; local TextAPI api; if (!Ok()) return self; api = _().text; result.length = 0; while (!HasFinished()) { nextCharacter = GetCharacter(); if (!api.IsWhitespace(nextCharacter)) break; result[result.length] = nextCharacter; ShiftPointer(); } return self; } /** * Matches a non-empty sequence of whitespace symbols. * * Cannot fail (not being able to read any input is not considered a failure). * * @param result If parsing was successful - whitespaces will be * recorded here, otherwise - undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MWhitespaces(out string result) { local array rawResult; if (!Ok()) return self; MWhitespacesRaw(rawResult); result = _().text.RawToString(rawResult, STRING_Plain); return self; } /** * Matches a non-empty sequence of whitespace symbols. * * Cannot fail (not being able to read any input is not considered a failure). * * @param result If parsing was successful - whitespaces will be * recorded here, otherwise - undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MWhitespacesT(out Text result) { local array rawResult; if (!Ok()) return self; MWhitespacesRaw(rawResult); result = _().text.FromRaw(rawResult); return self; } /** * Parses next code point as itself. * * Can only fail if caller `Parser` has already exhausted all available data. * * @param result If parsing was successful - next Unicode code point, * otherwise - value is undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MCharacter(out Text.Character result) { if (!Ok()) return self; if (HasFinished()) return Fail(); result = GetCharacter(); ShiftPointer(); return self; } /** * Parses next code point as as byte. * Can fail if caller `Parser` has already exhausted all available data or * next Unicode code point cannot fit into the `byte` value range. * * @param result If parsing was successful - next Unicode code point as * a byte, otherwise - value is undefined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MByte(out byte result) { local Text.Character character; if (!Ok()) return self; if (!MCharacter(character).Ok()) { return Fail(); } if (character.codePoint < 0 || character.codePoint > BYTE_MAX) { return Fail(); } result = character.codePoint; return self; } /** * Tries to parse a sign: either "+" or "-". * * @param result Value of `ParsedSign` will be recorded here, * depending on what sign was encountered. * `SIGN_Missing` value is only possible if we allow sign to be missing. * @param allowMissingSign By default `false` means that parsing will fail * if next character is neither "+" or "-"; * `true` means that parsing will not fail even if there is not sign, - * method will then consume in input and will return `SIGN_Missing` * as a result. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MSign ( out ParsedSign result, optional bool allowMissingSign ) { local ParserState checkpoint; if (!Ok()) return self; // Read sign checkpoint = GetCurrentState(); if (Match("-").Ok()) { result = SIGN_Minus; } else if (RestoreState(checkpoint).Match("+").Ok()) { result = SIGN_Plus; } else if (allowMissingSign) { result = SIGN_Missing; RestoreState(checkpoint); } return self; } /** * Tries to parse a number prefix that determines a base system for denoting * integer numbers: * 1. `0x` means hexadecimal; * 2. `0b` means binary; * 3. `0o` means octal; * 4. otherwise we use decimal system. * * This parsing method cannot fail. * * Parser consumes appropriate prefix; nothing if decimal system is determined. * * @return Returns the calling object, to allow for function chaining. */ public final function Parser MBase(out int base) { local ParserState checkpoint; if (!Ok()) return self; checkpoint = GetCurrentState(); if (Match("0x").Ok()) { base = 16; } else if (RestoreState(checkpoint).Match("0b").Ok()) { base = 2; } else if (RestoreState(checkpoint).Match("0o").Ok()) { base = 8; } else { RestoreState(checkpoint); base = 10; } return self; } /** * Parses signed integer either in a directly given base (`base`) or in an * auto-determined one (based on prefix, @see `MBase()`). * * Integers are expected in form: (+/-)(0x/0b/0o). * Examples: 78, 0o34, -2, 0b0101001, -0x78aC. * * @param result If parsing is successful - parsed value will be * recorded here; if parsing fails - value is undetermined. * Any passed value is discarded. * @param base base in which function must attempt to parse a number; * Default value (`0`) means function must auto-determine base, * based on the prefix, otherwise must be between 2 and 36. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MInteger(out int result, optional int base) { local ParsedSign integerSign; if (!Ok()) return self; MSign(integerSign, true); if (base == 0) { MBase(base); } MUnsignedInteger(result, base); if (integerSign == SIGN_Minus) { result *= -1; } return self; } // Internal function for parsing fractional part (including the dot ".") // of the text representation for floating point number (decimal system only). // Cannot fail, returns `0.0` if it couldn't parse anything. protected final function Parser MFractionalPart(out float result) { local ParserState checkpoint; local int fractionalInt; local int digitsRead; if (!Ok()) return self; result = 0.0; checkpoint = GetCurrentState(); if (!Match(".").Ok()) { RestoreState(checkpoint); return self; } checkpoint = GetCurrentState(); if (!MUnsignedInteger(fractionalInt,,, digitsRead).Ok()) { fractionalInt = 0.0; RestoreState(checkpoint); return self; } result = float(fractionalInt) * (0.1 ** digitsRead); return self; } // Internal function for parsing exponent part (including the symbol "e") // of the text representation for floating point number (decimal system only). // Can only fail if symbol "e" / "E" is present, but there is no valid // integer right after it (whitespace symbols in-between are forbidden). // Returns `0.0` if there was not exponent to parse. protected final function Parser MExponentPart(out int result) { local ParserState checkpoint; local ParsedSign exponendSign; if (!Ok()) return self; // Is there even an exponential part? checkpoint = GetCurrentState(); if (!Match("e", true).Ok()) { RestoreState(checkpoint); return self; } // If yes - parse it: result = 0.0; MSign(exponendSign, true).MUnsignedInteger(result, 10); if (exponendSign == SIGN_Minus) { result *= -1; } return self; } // Internal function for parsing optional suffix of the text representation // for floating point number ("f" or "F"). // Cannot fail. Can only consume one Unicode code point, // when it is either "f" or "F". protected final function Parser MFloatSuffix() { local ParserState checkpoint; if (!Ok()) return self; checkpoint = GetCurrentState(); if (!Match("f", true).Ok()) { RestoreState(checkpoint); } return self; } /** * Parses signed floating point number in JSON form + optional "f" / "F" * suffix at the end. * * @param result If parsing is successful - parsed value will be * recorded here; if parsing fails - value is undetermined. * Any passed value is discarded. * @return Returns the calling object, to allow for function chaining. */ public final function Parser MNumber(out float result) { local ParsedSign sign; local int integerPart, exponentPart; local float fractionalPart; if (!Ok()) return self; self.MSign(sign, true) .MUnsignedInteger(integerPart, 10) .MFractionalPart(fractionalPart) .MExponentPart(exponentPart) .MFloatSuffix(); if (!Ok()) { return self; } result = float(integerPart) + fractionalPart; result *= 10.0 ** exponentPart; if (sign == SIGN_Minus) { result *= -1; } return self; } defaultproperties { // Start with no initializations done version = 0 BYTE_MAX = 255 CODEPOINT_BACKSLASH = 92 // \ CODEPOINT_USMALL = 117 // u CODEPOINT_ULARGE = 85 // U escapeCharactersMap(0)=(from=110,to=10) // \n escapeCharactersMap(1)=(from=114,to=13) // \r escapeCharactersMap(2)=(from=116,to=9) // \t escapeCharactersMap(3)=(from=98,to=8) // \b escapeCharactersMap(4)=(from=102,to=12) // \f escapeCharactersMap(5)=(from=118,to=11) // \v }