/** * Implements a simple `Parser` with built-in functions to parse simple * UnrealScript's types and support for saving / restoring parser states. * Copyright 2021 Anton Tarasenko *------------------------------------------------------------------------------ * This file is part of Acedia. * * Acedia is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 3 of the License, or * (at your option) any later version. * * Acedia is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Acedia. If not, see . */ class Parser extends AcediaObject dependson(Text) dependson(UnicodeData); // Max value of a byte var public int BYTE_MAX; // Code points of symbols with important meaning in parsing. var public int CODEPOINT_BACKSLASH; var public int CODEPOINT_USMALL; var public int CODEPOINT_ULARGE; // Text object wit content that this `Parser` is supposed to parse // (only copies of any `Text` passed to us are stored here). var private Text content; // Incremented each time `Parser` is reinitialized with new `content`. // Can be used to make `Parser` object completely independent from // it's past after each re-initialization. // This helps to avoid needless reallocations. var private int version; // Describes current state of the `Parser`, instance of this struct // can be used to revert parser back to this state. struct ParserState { // Record to which object (and of what version) this state belongs to. // This information is used to make sure that we apply this state // only to same `Parser` (of the same version) that it originated from. var private AcediaObject ownerObject; var private int ownerVersion; // Has parser failed at some point? var private bool failed; // Points at the next symbol to be used next in parsing. var private int pointer; }; var private ParserState currentState; // For convenience `Parser` will store one internal state that designates // a state that's safe to revert to when some parsing attempt goes wrong. // @see `Confirm()`, `R()` var private ParserState confirmedState; // Describes rules for translating escaped sequences ("\r", "\n", "\t") // into appropriate code points. var private const array escapeCharactersMap; // Used to store a result of a `ParseSign()` function. enum ParsedSign { SIGN_Missing, SIGN_Plus, SIGN_Minus }; // TODO: add finalizer // Common logic for parser initialization. // Uses `source` as is, without copying, so public initialization method // must do it itself. private final function Parser _initialize(Text source) { if (source == none) return self; content = source; version += 1; currentState.ownerObject = self; currentState.ownerVersion = version; currentState.failed = false; currentState.pointer = 0; confirmedState = currentState; return self; } /** * Initializes `Parser` with new data from a `Text`. Never fails. * * Any data from before this call is lost, any checkpoints are invalidated. * * @param source `Text` object `Parser` will need to parse. * If `none` is passed - parser won't be initialized. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser Initialize(Text source) { if (source == none) { return self; } return _initialize(source.Copy()); } /** * Initializes `Parser` with new data from a plain `string`. * * Can fail if passed `none` as a parameter. * * Any data from before this call is lost, any checkpoints are invalidated. * * @param source String `Parser` will need to parse. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser InitializeS(string source) { _initialize(_.text.FromString(source)); return self; } /** * Checks if `Parser` is in a failed state. * * Parser enters a failed state whenever any parsing call returns without * completing it's job. `Parser` in a failed state will automatically fail * any further parsing attempts until it gets reset via `R()` call. * * @return Returns 'false' if `Parser()` is in a failed state and * `true` otherwise. */ public final function bool Ok() { return (!currentState.failed); } /** * Returns copy of the current state of this parser. * * As long as caller `Parser` was not reinitialized, returned `ParserState` * structure can be used to revert this `Parser` to it's current condition * by a `RestoreState()` call. * * @see `RestoreState()` * @return Copy of the current state of the caller `Parser`. */ public final function ParserState GetCurrentState() { return currentState; } /** * Returns copy of (currently) last confirmed state of this parser. * * As long as caller `Parser` was not reinitialized, returned `ParserState` * structure can be used to revert this `Parser` to it's current confirmed * state by a `RestoreState()` call. * * @see `RestoreState()`, `Confirm()`, `R()` * @return Copy of (currently) last confirmed state of this parser. */ public final function ParserState GetConfirmedState() { return confirmedState; } /** * Checks if given `stateToCheck` is valid for the caller `Parser`, i.e.: * 1. It is a state generated by either `GetCurrentState()` or * `GetConfirmedState()` calls on the caller `Parser`. * 2. Caller `Parser` was not reinitialized since a call * that generated given `stateToCheck`. * * @param stateToCheck `ParserState` to check for validity for * caller `Parser`. * @return `true` if given `stateToCheck` is valid and `false` otherwise. */ public final function bool IsStateValid(ParserState stateToCheck) { if (stateToCheck.ownerObject != self) return false; if (stateToCheck.ownerVersion != version) return false; return true; } /** * Checks if calling `RestoreState()` for passed state will return a `Parser` * in an "Ok" state (not failed), i.e. state is valid and * was generated when `Parser` was in a non-failed state. * * @param stateToCheck `ParserState` to check for corresponding to * `Parser` being in a non-failed state. * By definition must also be valid for the caller `Parser`. * @return `true` if given `stateToCheck` is valid and `false` otherwise. */ public final function bool IsStateOk(ParserState stateToCheck) { if (!IsStateValid(stateToCheck)) return false; return (!stateToCheck.failed); } /** * Resets parser to a state, given by `stateToRestore` argument * (so a state `Parser` was in at the moment given `stateToRestore` * was obtained). * * If given `stateToRestore` is from a different `Parser` or * the owner `Parser` was reinitialized after passed state was obtained, - * function will simply put caller `Parser` into a failed state. * Note that caller `Parser` being put in a failed state after this call * doesn't mean that described issues are actually present: * `stateToRestore` can also describe a failed state of the `Parser`. * * @param stateToRestore `ParserState` that this method will attempt * to set for the caller `Parser`. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser RestoreState(ParserState stateToRestore) { if (!IsStateValid(stateToRestore)) { currentState.failed = true; return self; } currentState = stateToRestore; return self; } /** * Remembers current state of `Parser` in an internal checkpoint variable, * that can later be restored by an `R()` call. * * Can only save non-failed states and will only fail if caller `Parser` is * in a failed state. * * `Confirm()` and `R()` are essentially convenience wrapper functions for * `GetCurrentState()` and `RestoreState()` calls + * state storage variable. * * @return `true` if current state is recorded in `Parser` as confirmed and * `false` otherwise. */ public final function bool Confirm() { if (!Ok()) return false; confirmedState = currentState; return true; } /** * Resets `Parser` to a last state recorded as confirmed by a last successful * `Confirm()` function call. If there weren't any such call - * reverts `Parser` to it's state right after initialization. * * Always resets failed state of a `Parser`. Cannot fail. * * `Confirm()` and `R()` are essentially convenience wrapper functions for * `GetCurrentState()` and `RestoreState()` calls + state storage variable. * * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser R() { currentState = confirmedState; return self; } /** * Shifts parsing pointer forward. * * Can only shift forward. To revert to a previous state in case of failure use * combination of `GetCurrentState()` and `RestoreState()` functions. * * @param shift How much to shift parsing pointer? * Values of zero and below are discarded and `1` is used instead * (i.e. by default this method shifts pointer by `1` position). * @return Returns the caller `Parser`, to allow for function chaining. */ protected final function Parser ShiftPointer(optional int shift) { if (content == none) return self; shift = Max(1, shift); currentState.pointer = Min( currentState.pointer + shift, content.GetLength()); return self; } /** * Returns a code point from this `Parser`'s content, relative to next * code point that caller `Parser` must handle. * * @param `shift` If `0` (default value) or negative value is passed - * simply asks for the code point that caller `Parser` must handle. * Otherwise shifts that index `shift` code points, i.e. * `1` to return next code point or `2` to return code point after * the next one. * @return Returns character at a given shift. If `shift` is too small/large * and does not fit `Parser`'s contents, returns invalid character. * `GetCodePoint()` with default (`0`) parameter can also return * invalid character if caller `Parser` was not initialized, * it's contents are empty or it has already consumed all input. */ protected final function Text.Character GetCharacter(optional int shift) { local Text.Character invalidCharacter; local int absoluteAddress; if (content == none) return _.text.GetInvalidCharacter(); absoluteAddress = currentState.pointer + Max(0, shift); if (absoluteAddress < 0 || absoluteAddress >= content.GetLength()) { invalidCharacter.codePoint = -1; return invalidCharacter; } return content.GetRawCharacter(absoluteAddress); } /** * Forces caller `Parser` to enter a failed state. * * @return Returns the caller `Parser`, to allow for a quick exit from * a parsing function by `return Fail();`. */ public final function Parser Fail() { currentState.failed = true; return self; } /** * Returns amount of code points that have already been parsed, * provided that caller `Parser` is in a correct state. * * @return Returns how many Unicode code points have already been parsed if * caller `Parser` is in correct state; * otherwise return value is undefined. */ public final function int GetParsedLength() { return Max(0, currentState.pointer); } /** * Returns amount of code points that have not yet been parsed, * provided that caller `Parser` is in a correct state. * * @return Returns how many Unicode code points are still unparsed if * caller `Parser` is in correct state; * otherwise return value is undefined. */ public final function int GetRemainingLength() { if (content == none) { return 0; } return Max(0, content.GetLength() - currentState.pointer); } /** * Checks if caller `Parser` has already parsed all of it's content. * Uninitialized `Parser` has no content and, therefore, parsed it all. * * Should return `true` iff `GetRemainingLength() == 0`. * * @return `true` if caller `Parser` has no more data to parse. */ public final function bool HasFinished() { if (content == none) { return true; } return (currentState.pointer >= content.GetLength()); } /** * Returns still unparsed part of caller `Parser`'s source as `Text`. * * @return Unparsed part of caller `Parser`'s source as `Text`. */ public final function Text GetRemainder() { local int i; local MutableText result; result = _.text.Empty(); for (i = 0; i < GetRemainingLength(); i += 1) { result.AppendCharacter(GetCharacter(i)); } return result; } /** * Returns still unparsed part of caller `Parser`'s source as a plain `string`. * * @return Unparsed part of caller `Parser`'s source as a plain `plain`. */ public final function string GetRemainderS() { local int i; local string result; local TextAPI api; api = _.text; for (i = 0; i < GetRemainingLength(); i += 1) { result $= api.CharacterToString(GetCharacter(i)); } return result; } /** * Auxiliary method for other methods that have to return resulting * `MutableText` instance. * * If passed instance if `none` or not currently allocated - a new one is * created, otherwise existing one is emptied. * * @param result `MutableText` instance to empty/create. * @return Returns the caller `Parser`, to allow for function chaining. */ protected final function Parser ResetResultText(out MutableText result) { if (result == none || !result.IsAllocated()) { result = _.text.Empty(); } else { result.Clear(); } return self; } /** * Matches any sequence of whitespace symbols, without returning it. * Starts from where previous parsing function finished. * * Can never cause parser to enter failed state. * * What symbols exactly are considered whitespace refer to the description of * `TextAPI.IsWhitespace()` function. * * @param whitespacesAmount Returns how many whitespace symbols * were skipped. Any given value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser Skip(optional out int whitespacesAmount) { local TextAPI api; if (!Ok()) return self; api = _.text; whitespacesAmount = 0; // Cycle will end once we either reach a non-whitespace symbol or // there's not more code points to get while (api.IsWhitespace(GetCharacter(whitespacesAmount))) { whitespacesAmount += 1; } if (whitespacesAmount > 0) { ShiftPointer(whitespacesAmount); } return self; } /** * Function that tries to match given data in `Parser`'s content, * starting from where previous parsing function finished. * * Does nothing if caller `Parser` was in failed state. * * @param word `Text` that must be matched to the `Parser`'s * contents, starting from where previous parsing function finished. * @param caseSensitivity Specifies whether `Match()` should try and * ignore the difference in case, where applicable. * By default it does not ignore case difference. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser Match( Text word, optional Text.CaseSensitivity caseSensitivity) { local int i; local int wordLength; local TextAPI api; if (word == none) return Fail(); if (!Ok()) return self; if (word.GetLength() > GetRemainingLength()) return Fail(); api = _.text; wordLength = word.GetLength(); for (i = 0; i < wordLength; i += 1) { if (!api.AreEqual( word.GetCharacter(i), GetCharacter(i), caseSensitivity)) { return Fail(); } } ShiftPointer(wordLength); return self; } /** * Function that tries to match given `string`, starting from where * previous parsing function finished. * * Does nothing if caller `Parser` was in failed state. * * @param word `string` that must be matched to the `Parser`'s * contents, starting from where previous parsing function finished. * @param caseSensitivity Specifies whether `Match()` should try and * ignore the difference in case, where applicable. * By default it does not ignore case difference. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MatchS( string word, optional Text.CaseSensitivity caseSensitivity) { local Text wrapper; wrapper = _.text.FromString(word); Match(wrapper, caseSensitivity); wrapper.FreeSelf(); return self; } /** * Internal function for parsing unsigned integers in any base from 2 to 36. * * This parsing can fail, putting `Parser` into a failed state. * * @param result If parsing is successful, this value will contain * parsed integer, otherwise value is undefined. * Any passed value is discarded. * @param base Base, in which integer in question is recorded. * @param numberLength If this parameter is less or equal to zero, * function will stop parsing the moment it can't recognize a character as * belonging to a number in a given base. * It will only fail if it couldn't parse a single character; * If this parameter is set to be positive (`> 0`), function will * attempt to use exactly `numberLength` character for parsing and will * fail if they would not constitute a valid number. * @param consumedCodePoints Amount of code point used (consumed) to parse * this number; undefined, if parsing is unsuccessful. * Any passed value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MUnsignedInteger( out int result, optional int base, optional int numberLength, optional out int consumedCodePoints) { local bool parsingFixedLength; local int nextPosition; numberLength = Max(0, numberLength); parsingFixedLength = (numberLength != 0); if (base == 0) { base = 10; } else if (base < 2 || base > 36) { return Fail(); } result = 0; consumedCodePoints = 0; while (!HasFinished()) { if (parsingFixedLength && consumedCodePoints >= numberLength) break; nextPosition = _.text.CharacterToInt(GetCharacter(), base); if (nextPosition < 0) break; result = result * base + nextPosition; consumedCodePoints += 1; ShiftPointer(); } if ( parsingFixedLength && consumedCodePoints != numberLength || consumedCodePoints < 1) { return Fail(); } return self; } /** * Parses escaped sequence of the type that is usually used in * string literals: backslash "\"", followed by any character * (called escaped character later) or, in special cases, several characters. * For most characters escaped sequence resolved into * an escaped character's code point. * * Several escaped symbols: * \n, \r, \t, \b, \f, \v * are translated into a different code point corresponding to * a control symbols, normally denoted by these sequences. * * A Unicode code point can also be directly entered with either of the two * commands: * \u0056 * \U56 * The difference is that `\u` allows you to enter two-byte code point, while * `\U` only allows to define code points that fit into 1 byte, * but is more compact. * * @param denotedCodePoint If parsing is successful, parameter will contain * appropriate code point, denoted by a parsed escaped sequence; * If parsing is unsuccessful, value is undefined. * Any passed value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MEscapedSequence( out Text.Character denotedCharacter) { local int i; if (!Ok()) return self; // Need at least two characters to parse escaped sequence if (GetRemainingLength() < 2) return Fail(); if (GetCharacter().codePoint != CODEPOINT_BACKSLASH) return Fail(); denotedCharacter = GetCharacter(1); ShiftPointer(2); // Escaped character denotes some special code point for (i = 0; i < escapeCharactersMap.length; i += 1) { if (escapeCharactersMap[i].from == denotedCharacter.codePoint) { denotedCharacter.codePoint = escapeCharactersMap[i].to; return self; } } // Escaped character denotes declaration of arbitrary Unicode code point if (denotedCharacter.codePoint == CODEPOINT_USMALL) { MUnsignedInteger(denotedCharacter.codePoint, 16, 4); } else if (denotedCharacter.codePoint == CODEPOINT_ULARGE) { MUnsignedInteger(denotedCharacter.codePoint, 16, 2); } return self; } /** * Attempts to parse a string literal: a string enclosed in either of * the following quotation marks: ", ', `. * * String literals can contain escaped sequences. * String literals MUST end with closing quotation mark. * @see `MEscapedSequence()` * * @param result If parsing is successful, this `MutableText` will contain * the contents of string literal with resolved escaped sequences; * if parsing has failed, it's value is undefined. * Any passed contents are simply discarded. * If passed `MutableText` equals to `none`, new instance will be * automatically allocated. This will be done regardless of whether * parsing fails. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MStringLiteral(out MutableText result) { local TextAPI api; local Text.Character nextCharacter; local Text.Character usedQuotationMark; local Text.Character escapedCharacter; ResetResultText(result); if (!Ok()) return self; usedQuotationMark = GetCharacter(); if (!_.text.IsQuotationMark(usedQuotationMark)) return Fail(); ShiftPointer(); // Skip opening quotation mark api = _.text; while (!HasFinished()) { nextCharacter = GetCharacter(); // Closing quote if (api.AreEqual(nextCharacter, usedQuotationMark)) { ShiftPointer(); return self; } // Escaped characters if (api.IsCodePoint(nextCharacter, CODEPOINT_BACKSLASH)) { if (!MEscapedSequence(escapedCharacter).Ok()) { return Fail(); // Backslash MUST mean valid escape sequence } result.AppendCharacter(escapedCharacter); } // Any other code point else { result.AppendCharacter(nextCharacter); ShiftPointer(); } } // Content ended without a closing quote. return Fail(); } /** * Attempts to parse a string literal: a string enclosed in either of * the following quotation marks: ", ', `. * * String literals can contain escaped sequences. * String literals MUST end with closing quotation mark. * @see `MEscapedSequence()` * * @param result If parsing is successful, this `string` will contain the * contents of string literal with resolved escaped sequences; * if parsing has failed, it's value is undefined. * Any passed contents are simply discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MStringLiteralS(out string result) { local MutableText wrapper; if (!Ok()) return self; wrapper = _.text.Empty(); if (MStringLiteral(wrapper).Ok()) { result = wrapper.ToPlainString(); } wrapper.FreeSelf(); return self; } /** * Matches everything until it finds one of the breaking symbols: * 1. a specified code point (by default `0`); * 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); * 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). * This method cannot fail. * * @param result Any content before one of the break symbols * will be recorded into this `MutableText`. If passed `MutableText` equals * to `none`, new instance will be automatically allocated. This will be * done regardless of whether parsing fails. * @param codePointBreak Method will stop parsing upon encountering this * code point (it will not be included in the `result`) * @param whitespacesBreak `true` if you want to also treat any * whitespace character as a break symbol * (@see `TextAPI.IsWhitespace()` for what symbols are * considered whitespaces). * @param quotesBreak `true` if you want to also treat any * quotation mark character as a break symbol * (@see `TextAPI.IsQuotation()` for what symbols are * considered quotation marks). * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MUntil( out MutableText result, optional Text.Character characterBreak, optional bool whitespacesBreak, optional bool quotesBreak) { local Text.Character nextCharacter; local TextAPI api; ResetResultText(result); if (!Ok()) return self; api = _.text; while (!HasFinished()) { nextCharacter = GetCharacter(); if (api.AreEqual(nextCharacter, characterBreak)) break; if (whitespacesBreak && api.IsWhitespace(nextCharacter)) break; if (quotesBreak && api.IsQuotationMark(nextCharacter)) break; result.AppendCharacter(nextCharacter); ShiftPointer(); } return self; } /** * Matches everything until it finds one of the breaking symbols: * 1. a specified code point (by default `0`); * 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); * 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). * This method cannot fail. * * @param result Any content before one of the break symbols * will be recorded into this `string`. * @param codePointBreak Method will stop parsing upon encountering this * code point (it will not be included in the `result`) * @param whitespacesBreak `true` if you want to also treat any * whitespace character as a break symbol * (@see `TextAPI.IsWhitespace()` for what symbols are * considered whitespaces). * @param quotesBreak `true` if you want to also treat any * quotation mark character as a break symbol * (@see `TextAPI.IsQuotation()` for what symbols are * considered quotation marks). * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MUntilS( out string result, optional Text.Character characterBreak, optional bool whitespacesBreak, optional bool quotesBreak) { local MutableText wrapper; if (!Ok()) return self; wrapper = _.text.Empty(); MUntil(wrapper, characterBreak, whitespacesBreak, quotesBreak); result = wrapper.ToPlainString(); wrapper.FreeSelf(); return self; } /** * Matches everything until it finds one of the breaking sequences: * 1. Any of the specified `separators`. * 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); * 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). * This method cannot fail. * * @param result Any content before one of the breaking sequences * will be recorded into this `MutableText`. If passed `MutableText` equals * to `none`, new instance will be automatically allocated. This will be * done regardless of whether parsing fails. * @param separators Method will stop parsing upon encountering any * of these `Text`s (but they will not be included in the `result`). * @param whitespacesBreak `true` if you want to also treat any * whitespace character as a break symbol * (@see `TextAPI.IsWhitespace()` for what symbols are * considered whitespaces). * @param quotesBreak `true` if you want to also treat any * quotation mark character as a break symbol * (@see `TextAPI.IsQuotation()` for what symbols are * considered quotation marks). * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MUntilMany( out MutableText result, array separators, optional bool whitespacesBreak, optional bool quotesBreak) { local bool foundEnd; local int i, pointerShift; local array completions; local Text.Character nextCharacter, separatorCharacter; ResetResultText(result); if (!Ok()) return self; completions.length = separators.length; while (pointerShift < GetRemainingLength()) { nextCharacter = GetCharacter(pointerShift); if (whitespacesBreak && _.text.IsWhitespace(nextCharacter)) break; if (quotesBreak && _.text.IsQuotationMark(nextCharacter)) break; for (i = 0; i < separators.length; i += 1) { if (separators[i] == none) continue; separatorCharacter = separators[i].GetCharacter(completions[i]); if (_.text.AreEqual(nextCharacter, separatorCharacter)) { completions[i] += 1; if (completions[i] == separators[i].GetLength()) { foundEnd = true; pointerShift -= completions[i] - 1; break; } } else { completions[i] = 0; } } if (foundEnd) { break; } pointerShift += 1; } for (i = 0; i < pointerShift; i += 1) { result.AppendCharacter(GetCharacter()); ShiftPointer(); } return self; } /** * Matches everything until it finds one of the breaking sequences: * 1. Any of the specified `separators`. * 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); * 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). * This method cannot fail. * * @param result Any content before one of the breaking sequences * will be recorded into this `MutableText`. If passed `MutableText` equals * to `none`, new instance will be automatically allocated. * @param separators Method will stop parsing upon encountering any * of these `string`s (but they won't not be included in the `result`). * @param whitespacesBreak `true` if you want to also treat any * whitespace character as a break symbol * (@see `TextAPI.IsWhitespace()` for what symbols are * considered whitespaces). * @param quotesBreak `true` if you want to also treat any * quotation mark character as a break symbol * (@see `TextAPI.IsQuotation()` for what symbols are * considered quotation marks). * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MUntilManyS( out string result, array endWords, optional bool whitespacesBreak, optional bool quotesBreak) { local MutableText wrapper; if (!Ok()) return self; wrapper = _.text.Empty(); MUntilMany(wrapper, endWords, whitespacesBreak, quotesBreak); result = wrapper.ToPlainString(); wrapper.FreeSelf(); return self; } /** * Parses a string as either "simple" or "quoted". * Not being able to read any symbols is not considered a failure. * * Reading empty string (either to lack of further data or * instantly encountering a break symbol) is not considered a failure. * * Quoted string starts with quotation mark and ends either * at the corresponding closing (un-escaped) mark * or when `Parser`'s input has been fully consumed. * If string started with a quotation mark, this method will act exactly * like `MStringLiteral()`. * * @param result If parsing is successful - string's contents will be * recorded here; if parsing has failed - value is undefined. * Any passed value is discarded. * If passed `MutableText` equals to `none`, new instance will be * automatically allocated. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MString(out MutableText result) { if (!Ok()) return self; if (_.text.IsQuotationMark(GetCharacter())) { MStringLiteral(result); } else { MUntil(result,, true, true); } return self; } /** * Parses a string as either "simple" or "quoted". * Not being able to read any symbols is not considered a failure. * * Reading empty string (either to lack of further data or * instantly encountering a break symbol) is not considered a failure. * * Quoted string starts with quotation mark and ends either * at the corresponding closing (un-escaped) mark * or when `Parser`'s input has been fully consumed. * If string started with a quotation mark, this method will act exactly * like `MStringLiteral()`. * * @param result If parsing is successful - string's contents will be * recorded here; if parsing has failed - value is undefined. * Any passed value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MStringS(out string result) { local MutableText wrapper; if (!Ok()) return self; wrapper = _.text.Empty(); MString(wrapper); result = wrapper.ToPlainString(); wrapper.FreeSelf(); return self; } /** * Matches a non-empty sequence of whitespace symbols. * * Cannot fail (not being able to read any input is not considered a failure). * * @param result If parsing was successful - whitespaces will be recorded * into this `MutableText`, otherwise - undefined. * Any passed value is discarded. * If passed `MutableText` equals to `none`, new instance will be * automatically allocated. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MWhitespaces(out MutableText result) { local Text.Character nextCharacter; local TextAPI api; if (!Ok()) return self; api = _.text; if (result == none) { result = api.Empty(); } else { result.Clear(); } while (!HasFinished()) { nextCharacter = GetCharacter(); if (!api.IsWhitespace(nextCharacter)) { break; } result.AppendCharacter(nextCharacter); ShiftPointer(); } return self; } /** * Matches a non-empty sequence of whitespace symbols. * * Cannot fail (not being able to read any input is not considered a failure). * * @param result If parsing was successful - whitespaces will be * recorded here, otherwise - undefined. * Any passed value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MWhitespacesS(out string result) { local MutableText wrapper; if (!Ok()) return self; wrapper = _.text.Empty(); MWhitespaces(wrapper); result = wrapper.ToPlainString(); wrapper.FreeSelf(); return self; } /** * Parses next code point as itself. * * Can only fail if caller `Parser` has already exhausted all available data. * * @param result If parsing was successful - next Unicode code point, * otherwise - value is undefined. * Any passed value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MCharacter(out Text.Character result) { if (!Ok()) return self; if (HasFinished()) return Fail(); result = GetCharacter(); ShiftPointer(); return self; } /** * Parses next code point as as byte. * Can fail if caller `Parser` has already exhausted all available data or * next Unicode code point cannot fit into the `byte` value range. * * @param result If parsing was successful - next Unicode code point as * a byte, otherwise - value is undefined. * Any passed value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MByte(out byte result) { local Text.Character character; if (!Ok()) return self; if (!MCharacter(character).Ok()) { return Fail(); } if (character.codePoint < 0 || character.codePoint > BYTE_MAX) { return Fail(); } result = character.codePoint; return self; } /** * Tries to parse a sign: either "+" or "-". * * @param result Value of `ParsedSign` will be recorded here, * depending on what sign was encountered. * `SIGN_Missing` value is only possible if we allow sign to be missing. * @param allowMissingSign By default `false` means that parsing will fail * if next character is neither "+" or "-"; * `true` means that parsing will not fail even if there is not sign, - * method will then consume in input and will return `SIGN_Missing` * as a result. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MSign( out ParsedSign result, optional bool allowMissingSign ) { local ParserState checkpoint; if (!Ok()) return self; // Read sign checkpoint = GetCurrentState(); if (MatchS("-").Ok()) { result = SIGN_Minus; } else if (RestoreState(checkpoint).MatchS("+").Ok()) { result = SIGN_Plus; } else if (allowMissingSign) { result = SIGN_Missing; RestoreState(checkpoint); } return self; } /** * Tries to parse a number prefix that determines a base system for denoting * integer numbers: * 1. `0x` means hexadecimal; * 2. `0b` means binary; * 3. `0o` means octal; * 4. otherwise we use decimal system. * * This parsing method cannot fail. * * Parser consumes appropriate prefix; nothing if decimal system is determined. * * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MBase(out int base) { local ParserState checkpoint; if (!Ok()) return self; checkpoint = GetCurrentState(); if (MatchS("0x").Ok()) { base = 16; } else if (RestoreState(checkpoint).MatchS("0b").Ok()) { base = 2; } else if (RestoreState(checkpoint).MatchS("0o").Ok()) { base = 8; } else { RestoreState(checkpoint); base = 10; } return self; } /** * Parses signed integer either in a directly given base (`base`) or in an * auto-determined one (based on prefix, @see `MBase()`). * * Integers are expected in form: (+/-)(0x/0b/0o). * Examples: 78, 0o34, -2, 0b0101001, -0x78aC. * * @param result If parsing is successful - parsed value will be * recorded here; if parsing fails - value is undetermined. * Any passed value is discarded. * @param base Base in which function must attempt to parse a number; * Default value (`0`) means function must auto-determine base, * based on the prefix, otherwise must be between 2 and 36. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MInteger(out int result, optional int base) { local ParsedSign integerSign; if (!Ok()) return self; MSign(integerSign, true); if (base == 0) { MBase(base); } MUnsignedInteger(result, base); if (integerSign == SIGN_Minus) { result *= -1; } return self; } // Internal function for parsing fractional part (including the dot ".") // of the text representation for floating point number (decimal system only). // Cannot fail, returns `0.0` if it couldn't parse anything. protected final function Parser MFractionalPart(out float result) { local ParserState checkpoint; local int fractionalInt; local int digitsRead; if (!Ok()) return self; result = 0.0; checkpoint = GetCurrentState(); if (!MatchS(".").Ok()) { RestoreState(checkpoint); return self; } checkpoint = GetCurrentState(); if (!MUnsignedInteger(fractionalInt,,, digitsRead).Ok()) { fractionalInt = 0.0; RestoreState(checkpoint); return self; } result = float(fractionalInt) * (0.1 ** digitsRead); return self; } // Internal function for parsing exponent part (including the symbol "e") // of the text representation for floating point number (decimal system only). // Can only fail if symbol "e" / "E" is present, but there is no valid // integer right after it (whitespace symbols in-between are forbidden). // Returns `0.0` if there was not exponent to parse. protected final function Parser MExponentPart(out int result) { local ParserState checkpoint; local ParsedSign exponendSign; if (!Ok()) return self; // Is there even an exponential part? checkpoint = GetCurrentState(); if (!MatchS("e", SCASE_INSENSITIVE).Ok()) { RestoreState(checkpoint); return self; } // If yes - parse it: result = 0.0; MSign(exponendSign, true).MUnsignedInteger(result, 10); if (exponendSign == SIGN_Minus) { result *= -1; } return self; } // Internal function for parsing optional suffix of the text representation // for floating point number ("f" or "F"). // Cannot fail. Can only consume one Unicode code point, // when it is either "f" or "F". protected final function Parser MFloatSuffix() { local ParserState checkpoint; if (!Ok()) return self; checkpoint = GetCurrentState(); if (!MatchS("f", SCASE_INSENSITIVE).Ok()) { RestoreState(checkpoint); } return self; } /** * Parses signed floating point number in JSON form + optional "f" / "F" * suffix at the end. * * @param result If parsing is successful - parsed value will be * recorded here; if parsing fails - value is undetermined. * Any passed value is discarded. * @return Returns the caller `Parser`, to allow for function chaining. */ public final function Parser MNumber(out float result) { local ParsedSign sign; local int integerPart, exponentPart; local float fractionalPart; if (!Ok()) return self; self.MSign(sign, true) .MUnsignedInteger(integerPart, 10) .MFractionalPart(fractionalPart) .MExponentPart(exponentPart) .MFloatSuffix(); if (!Ok()) { return self; } result = float(integerPart) + fractionalPart; result *= 10.0 ** exponentPart; if (sign == SIGN_Minus) { result *= -1; } return self; } defaultproperties { // Start with no initializations done version = 0 BYTE_MAX = 255 CODEPOINT_BACKSLASH = 92 // \ CODEPOINT_USMALL = 117 // u CODEPOINT_ULARGE = 85 // U escapeCharactersMap(0)=(from=110,to=10) // \n escapeCharactersMap(1)=(from=114,to=13) // \r escapeCharactersMap(2)=(from=116,to=9) // \t escapeCharactersMap(3)=(from=98,to=8) // \b escapeCharactersMap(4)=(from=102,to=12) // \f escapeCharactersMap(5)=(from=118,to=11) // \v }