You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1312 lines
43 KiB
1312 lines
43 KiB
/** |
|
* Implements a simple `Parser` with built-in functions to parse simple |
|
* UnrealScript's types and support for saving / restoring parser states. |
|
* Copyright 2020 Anton Tarasenko |
|
*------------------------------------------------------------------------------ |
|
* This file is part of Acedia. |
|
* |
|
* Acedia is free software: you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License as published by |
|
* the Free Software Foundation, version 3 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* Acedia is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with Acedia. If not, see <https://www.gnu.org/licenses/>. |
|
*/ |
|
class Parser extends AcediaObject |
|
dependson(Text) |
|
dependson(UnicodeData); |
|
|
|
var public int BYTE_MAX; |
|
var public int CODEPOINT_BACKSLASH; |
|
var public int CODEPOINT_USMALL; |
|
var public int CODEPOINT_ULARGE; |
|
|
|
// The sequence of Unicode code points that this `Parser` is supposed to parse. |
|
var private array<Text.Character> content; |
|
// Incremented each time `Parser` is reinitialized with new `content`. |
|
// Can be used to make `Parser` object completely independent from |
|
// it's past, necessary since garbage collection is extra expensive in UE2 |
|
// and we want to reuse created objects as much as possible. |
|
var private int version; |
|
|
|
// Describes current state of the `Parser`, instance of this struct |
|
// can be used to revert parser back to this state. |
|
struct ParserState |
|
{ |
|
// Record to which object (and of what version) this state belongs to. |
|
// This information is used to make sure that we apply this state |
|
// only to same `Parser` (of the same version) that it originated from. |
|
var private AcediaObject ownerObject; |
|
var private int ownerVersion; |
|
// Has parser failed at some point? |
|
var private bool failed; |
|
// Points at the next symbol to be used next in parsing. |
|
var private int pointer; |
|
}; |
|
var private ParserState currentState; |
|
// For convenience `Parser` will store one internal state that designates |
|
// a state that's safe to revert to when some parsing attempt goes wrong. |
|
// @see `Confirm()`, `R()` |
|
var private ParserState confirmedState; |
|
|
|
// Describes rules for translating escaped sequences ("\r", "\n", "\t") |
|
// into appropriate code points. |
|
var private const array<UnicodeData.CodePointMapping> escapeCharactersMap; |
|
|
|
// Used to store a result of a `ParseSign()` function. |
|
enum ParsedSign |
|
{ |
|
SIGN_Missing, |
|
SIGN_Plus, |
|
SIGN_Minus |
|
}; |
|
|
|
/** |
|
* Initializes `Parser` with new data from a raw data |
|
* (sequence of Unicode code points). Never fails. |
|
* |
|
* Any data from before this call is lost, any checkpoints are invalidated. |
|
* |
|
* @param source Sequence of Unicode code points that represents |
|
* a string `Parser` will need to parse. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser InitializeRaw(array<Text.Character> source) |
|
{ |
|
content = source; |
|
version += 1; |
|
currentState.ownerObject = self; |
|
currentState.ownerVersion = version; |
|
currentState.failed = false; |
|
currentState.pointer = 0; |
|
confirmedState = currentState; |
|
return self; |
|
} |
|
|
|
/** |
|
* Initializes `Parser` with new data from a `string`. Never fails. |
|
* |
|
* Any data from before this call is lost, any checkpoints are invalidated. |
|
* |
|
* @param source String `Parser` will need to parse. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser Initialize |
|
( |
|
string source, |
|
optional Text.StringType sourceType |
|
) |
|
{ |
|
InitializeRaw(_().text.StringToRaw(source, sourceType)); |
|
return self; |
|
} |
|
|
|
/** |
|
* Initializes `Parser` with new data from a `Test`. |
|
* |
|
* Can fail if passed `none` as a parameter. |
|
* |
|
* Any data from before this call is lost, any checkpoints are invalidated. |
|
* |
|
* @param source `Text` object `Parser` will need to parse. |
|
* If `none` is passed - parser won't be initialized. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser InitializeT(Text source) |
|
{ |
|
if (source == none) return self; |
|
InitializeRaw(source.ToRaw()); |
|
return self; |
|
} |
|
|
|
/** |
|
* Checks if `Parser` is in a failed state. |
|
* |
|
* Parser enters a failed state whenever any parsing call returns without |
|
* completing it's job. `Parser` in a failed state will automatically fail |
|
* any further parsing attempts until it gets reset via `R()` call. |
|
* |
|
* @return Returns 'false' if `Parser()` is in a failed state and |
|
* `true` otherwise. |
|
*/ |
|
public final function bool Ok() |
|
{ |
|
return (!currentState.failed); |
|
} |
|
|
|
/** |
|
* Returns copy of the current state of this parser. |
|
* |
|
* As long as caller `Parser` was not reinitialized, returned `ParserState` |
|
* structure can be used to revert this `Parser` to it's current condition |
|
* by a `RestoreState()` call. |
|
* |
|
* @see `RestoreState()` |
|
* @return Copy of the current state of the caller `Parser`. |
|
*/ |
|
public final function ParserState GetCurrentState() |
|
{ |
|
return currentState; |
|
} |
|
|
|
/** |
|
* Returns copy of (currently) last confirmed state of this parser. |
|
* |
|
* As long as caller `Parser` was not reinitialized, returned `ParserState` |
|
* structure can be used to revert this `Parser` to it's current confirmed |
|
* state by a `RestoreState()` call. |
|
* |
|
* @see `RestoreState()`, `Confirm()`, `R()` |
|
* @return Copy of (currently) last confirmed state of this parser. |
|
*/ |
|
public final function ParserState GetConfirmedState() |
|
{ |
|
return confirmedState; |
|
} |
|
|
|
/** |
|
* Checks if given `stateToCheck` is valid for the caller `Parser`, i.e.: |
|
* 1. It is a state generated by either `GetCurrentState()` or |
|
* `GetConfirmedState()` calls on the caller `Parser`. |
|
* 2. Caller `Parser` was not reinitialized since a call |
|
* that generated given `stateToCheck`. |
|
* |
|
* @param stateToCheck `ParserState` to check for validity for |
|
* caller `Parser`. |
|
* @return `true` if given `stateToCheck` is valid and `false` otherwise. |
|
*/ |
|
public final function bool IsStateValid(ParserState stateToCheck) |
|
{ |
|
if (stateToCheck.ownerObject != self) return false; |
|
if (stateToCheck.ownerVersion != version) return false; |
|
return true; |
|
} |
|
|
|
/** |
|
* Checks if calling `RestoreState()` for passed state will return a `Parser` |
|
* in an "Ok" state (not failed), i.e. state is valid and |
|
* was generated when `Parser` was in a non-failed state. |
|
* |
|
* @param stateToCheck `ParserState` to check for corresponding to |
|
* `Parser` being in a non-failed state. |
|
* By definition must also be valid for the caller `Parser`. |
|
* @return `true` if given `stateToCheck` is valid and `false` otherwise. |
|
*/ |
|
public final function bool IsStateOk(ParserState stateToCheck) |
|
{ |
|
if (!IsStateValid(stateToCheck)) return false; |
|
return (!stateToCheck.failed); |
|
} |
|
|
|
/** |
|
* Resets parser to a state, given by `stateToRestore` argument |
|
* (so a state `Parser` was in at the moment given `stateToRestore` |
|
* was obtained). |
|
* |
|
* If given `stateToRestore` is from a different `Parser` or |
|
* the owner `Parser` was reinitialized after passed state was obtained, - |
|
* function will simply put caller `Parser` into a failed state. |
|
* Note that caller `Parser` being put in a failed state after this call |
|
* doesn't mean that described issues are actually present: |
|
* `stateToRestore` can also describe a failed state of the `Parser`. |
|
* |
|
* @param stateToRestore `ParserState` that this method will attempt |
|
* to set for the caller `Parser`. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser RestoreState(ParserState stateToRestore) |
|
{ |
|
if (!IsStateValid(stateToRestore)) |
|
{ |
|
currentState.failed = true; |
|
return self; |
|
} |
|
currentState = stateToRestore; |
|
return self; |
|
} |
|
|
|
/** |
|
* Remembers current state of `Parser` in an internal checkpoint variable, |
|
* that can later be restored by an `R()` call. |
|
* |
|
* Can only save non-failed states and will only fail if caller `Parser` is |
|
* in a failed state. |
|
* |
|
* `Confirm()` and `R()` are essentially convenience wrapper functions for |
|
* `GetCurrentState()` and `RestoreState()` calls + |
|
* state storage variable. |
|
* |
|
* @return `true` if current state is recorded in `Parser` as confirmed and |
|
* `false` otherwise. |
|
*/ |
|
public final function bool Confirm() |
|
{ |
|
if (!Ok()) return false; |
|
|
|
confirmedState = currentState; |
|
return true; |
|
} |
|
|
|
/** |
|
* Resets `Parser` to a last state recorded as confirmed by a last successful |
|
* `Confirm()` function call. If there weren't any such call - |
|
* reverts `Parser` to it's state right after initialization. |
|
* |
|
* Always resets failed state of a `Parser`. Cannot fail. |
|
* |
|
* `Confirm()` and `R()` are essentially convenience wrapper functions for |
|
* `GetCurrentState()` and `RestoreState()` calls + state storage variable. |
|
* |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser R() |
|
{ |
|
currentState = confirmedState; |
|
return self; |
|
} |
|
|
|
/** |
|
* Shifts parsing pointer forward. |
|
* |
|
* Can only shift forward. To revert to a previous state in case of failure use |
|
* combination of `GetCurrentState()` and `RestoreState()` functions. |
|
* |
|
* @param shift How much to shift parsing pointer? |
|
* Values of zero and below are discarded and `1` is used instead |
|
* (i.e. by default this method shifts pointer by `1` position). |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
protected final function Parser ShiftPointer(optional int shift) |
|
{ |
|
shift = Max(1, shift); |
|
currentState.pointer = Min(currentState.pointer + shift, content.length); |
|
return self; |
|
} |
|
|
|
/** |
|
* Returns a code point from this `Parser`'s content, relative to next |
|
* code point that caller `Parser` must handle. |
|
* |
|
* @param `shift` If `0` (default value) or negative value is passed - |
|
* simply asks for the code point that caller `Parser` must handle. |
|
* Otherwise shifts that index `shift` code points, i.e. |
|
* `1` to return next code point or `2` to return code point after |
|
* the next one. |
|
* @return Returns code point at a given shift. If `shift` is too small/large |
|
* and does not fit `Parser`'s contents, returns `-1`. |
|
* `GetCodePoint()` with default (`0`) parameter can also return `-1` if |
|
* contents of the caller `Parser` are empty or it has already consumed |
|
* all input. |
|
*/ |
|
protected final function Text.Character GetCharacter(optional int shift) |
|
{ |
|
local Text.Character invalidCharacter; |
|
local int absoluteAddress; |
|
absoluteAddress = currentState.pointer + Max(0, shift); |
|
if (absoluteAddress < 0 || absoluteAddress >= content.length) |
|
{ |
|
invalidCharacter.codePoint = -1; |
|
return invalidCharacter; |
|
} |
|
return content[absoluteAddress]; |
|
} |
|
|
|
/** |
|
* Forces caller `Parser` to enter a failed state. |
|
* |
|
* @return Returns the calling object, to allow for a quick exit from |
|
* a parsing function by `return Fail();`. |
|
*/ |
|
protected final function Parser Fail() |
|
{ |
|
currentState.failed = true; |
|
return self; |
|
} |
|
|
|
/** |
|
* Returns amount of code points that have already been parsed, |
|
* provided that caller `Parser` is in a correct state. |
|
* |
|
* @return Returns how many Unicode code points have already been parsed if |
|
* caller `Parser` is in correct state; |
|
* otherwise return value is undefined. |
|
*/ |
|
public final function int GetParsedLength() |
|
{ |
|
return Max(0, currentState.pointer); |
|
} |
|
|
|
/** |
|
* Returns amount of code points that have not yet been parsed, |
|
* provided that caller `Parser` is in a correct state. |
|
* |
|
* @return Returns how many Unicode code points are still unparsed if |
|
* caller `Parser` is in correct state; |
|
* otherwise return value is undefined. |
|
*/ |
|
public final function int GetRemainingLength() |
|
{ |
|
return Max(0, content.length - currentState.pointer); |
|
} |
|
|
|
/** |
|
* Checks if caller `Parser` has already parsed all of it's content. |
|
* Uninitialized `Parser` has no content and, therefore, parsed it all. |
|
* |
|
* Should return `true` iff `GetRemainingLength() == 0`. |
|
* |
|
* @return `true` if caller `Parser` has no more data to parse. |
|
*/ |
|
public final function bool HasFinished() |
|
{ |
|
return (currentState.pointer >= content.length); |
|
} |
|
|
|
/** |
|
* Returns still unparsed part of caller `Parser`'s source as an array of |
|
* Unicode code points. |
|
* |
|
* @return Unparsed part of caller `Parser`'s source as an array of |
|
* Unicode code points. |
|
*/ |
|
public final function array<Text.Character> GetRemainderRaw() |
|
{ |
|
local int i; |
|
local array<Text.Character> result; |
|
for (i = 0; i < GetRemainingLength(); i += 1) |
|
{ |
|
result[result.length] = GetCharacter(i); |
|
} |
|
return result; |
|
} |
|
|
|
/** |
|
* Returns still unparsed part of caller `Parser`'s source as a `string`. |
|
* |
|
* @return Unparsed part of caller `Parser`'s source as a `string`. |
|
*/ |
|
public final function string GetRemainder() |
|
{ |
|
local int i; |
|
local array<Text.Character> rawResult; |
|
for (i = 0; i < GetRemainingLength(); i += 1) |
|
{ |
|
rawResult[rawResult.length] = GetCharacter(i); |
|
} |
|
return _().text.RawToString(rawResult, STRING_Plain); |
|
} |
|
|
|
/** |
|
* Returns still unparsed part of caller `Parser`'s source as `Text`. |
|
* |
|
* @return Unparsed part of caller `Parser`'s source as `Text`. |
|
*/ |
|
public final function Text GetRemainderT() |
|
{ |
|
local int i; |
|
local array<Text.Character> rawResult; |
|
for (i = 0; i < GetRemainingLength(); i += 1) |
|
{ |
|
rawResult[rawResult.length] = GetCharacter(i); |
|
} |
|
return _().text.FromRaw(rawResult); |
|
} |
|
|
|
/** |
|
* Matches any sequence of whitespace symbols, without returning it. |
|
* Starts from where previous parsing function finished. |
|
* |
|
* Can never cause parser to enter failed state. |
|
* |
|
* What symbols exactly are considered whitespace refer to the description of |
|
* `TextAPI.IsWhitespace()` function. |
|
* |
|
* @param whitespacesAmount Returns how many whitespace symbols |
|
* were skipped. Any given value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser Skip(optional out int whitespacesAmount) |
|
{ |
|
local TextAPI api; |
|
if (!Ok()) return self; |
|
|
|
api = _().text; |
|
whitespacesAmount = 0; |
|
// Cycle will end once we either reach a non-whitespace symbol or |
|
// there's not more code points to get |
|
while (api.IsWhitespace(GetCharacter(whitespacesAmount))) |
|
{ |
|
whitespacesAmount += 1; |
|
} |
|
ShiftPointer(whitespacesAmount); |
|
return self; |
|
} |
|
|
|
/** |
|
* Function that tries to match given data in `Parser`'s content, |
|
* starting from where previous parsing function finished. |
|
* |
|
* Does nothing if caller `Parser` was in failed state. |
|
* |
|
* @param data Data that must be matched to the `Parser`'s |
|
* contents, starting from where previous parsing function finished. |
|
* @param caseInsensitive If `false` the matching will have to be exact, |
|
* using `true` will make this method to ignore the case, |
|
* where it's applicable. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MatchRaw |
|
( |
|
array<Text.Character> data, |
|
optional bool caseInsensitive |
|
) |
|
{ |
|
local int i; |
|
local TextAPI api; |
|
if (!Ok()) return self; |
|
if (data.length > GetRemainingLength()) return Fail(); |
|
|
|
api = _().text; |
|
for (i = 0; i < data.length; i += 1) |
|
{ |
|
if (!api.AreEqual(data[i], GetCharacter(i), caseInsensitive)) |
|
{ |
|
return Fail(); |
|
} |
|
} |
|
ShiftPointer(data.length); |
|
return self; |
|
} |
|
|
|
/** |
|
* Function that tries to match given `string`, starting from where |
|
* previous parsing function finished. |
|
* |
|
* Does nothing if caller `Parser` was in failed state. |
|
* |
|
* @param word String that must be matched to the `Parser`'s |
|
* contents, starting from where previous parsing function finished. |
|
* @param caseInsensitive If `false` the matching will have to be exact, |
|
* using `true` will make this method to ignore the case, |
|
* where it's applicable. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser Match(string word, optional bool caseInsensitive) |
|
{ |
|
return MatchRaw(_().text.StringToRaw(word), caseInsensitive); |
|
} |
|
|
|
/** |
|
* Function that tries to match given `Text`, starting from where |
|
* previous parsing function finished. |
|
* |
|
* Does nothing if caller `Parser` was in failed state. |
|
* |
|
* @param word Text that must be matched to the `Parser`'s |
|
* contents, starting from where previous parsing function finished. |
|
* @param caseInsensitive If `false` the matching will have to be exact, |
|
* using `true` will make this method to ignore the case, |
|
* where it's applicable. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MatchT(Text word, optional bool caseInsensitive) |
|
{ |
|
if (!Ok()) return self; |
|
if (word == none) return Fail(); |
|
|
|
return MatchRaw(word.ToRaw(), caseInsensitive); |
|
} |
|
|
|
/** |
|
* Internal function for parsing unsigned integers in any base from 2 to 36. |
|
* |
|
* This parsing can fail, putting `Parser` into a failed state. |
|
* |
|
* @param result If parsing is successful, this value will contain |
|
* parsed integer, otherwise value is undefined. |
|
* Any passed value is discarded. |
|
* @param base Base, in which integer in question is recorded. |
|
* @param numberLength If this parameter is less or equal to zero, |
|
* function will stop parsing the moment it can't recognize a character as |
|
* belonging to a number in a given base. |
|
* It will only fail if it couldn't parse a single character; |
|
* If this parameter is set to be positive (`> 0`), function will |
|
* attempt to use exactly `numberLength` character for parsing and will |
|
* fail if they would not constitute a valid number. |
|
* @param consumedCodePoints Amount of code point used (consumed) to parse |
|
* this number; undefined, if parsing is unsuccessful. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MUnsignedInteger |
|
( |
|
out int result, |
|
optional int base, |
|
optional int numberLength, |
|
optional out int consumedCodePoints |
|
) |
|
{ |
|
local bool parsingFixedLength; |
|
local int nextPosition; |
|
numberLength = Max(0, numberLength); |
|
parsingFixedLength = (numberLength != 0); |
|
if (base == 0) |
|
{ |
|
base = 10; |
|
} |
|
else if (base < 2 || base > 36) |
|
{ |
|
return Fail(); |
|
} |
|
result = 0; |
|
consumedCodePoints = 0; |
|
while (!HasFinished()) |
|
{ |
|
if (parsingFixedLength && consumedCodePoints >= numberLength) break; |
|
nextPosition = _().text.CharacterToInt(GetCharacter(), base); |
|
if (nextPosition < 0) break; |
|
|
|
result = result * base + nextPosition; |
|
consumedCodePoints += 1; |
|
ShiftPointer(); |
|
} |
|
if ( parsingFixedLength && consumedCodePoints != numberLength |
|
|| consumedCodePoints < 1) |
|
{ |
|
return Fail(); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses escaped sequence of the type that is usually used in |
|
* string literals: backslash "\"", followed by any character |
|
* (called escaped character later) or, in special cases, several characters. |
|
* For most characters escaped sequence resolved into |
|
* an escaped character's code point. |
|
* |
|
* Several escaped symbols: |
|
* \n, \r, \t, \b, \f, \v |
|
* are translated into a different code point corresponding to |
|
* a control symbols, normally denoted by these sequences. |
|
* |
|
* A Unicode code point can also be directly entered with either of the two |
|
* commands: |
|
* \U0056 |
|
* \u56 |
|
* The difference is that `\U` allows you to enter two-byte code point, while |
|
* `\u` only allows to define code points that fit into 1 byte, |
|
* but is more compact. |
|
* |
|
* @param denotedCodePoint If parsing is successful, parameter will contain |
|
* appropriate code point, denoted by a parsed escaped sequence; |
|
* If parsing is unsuccessful, value is undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MEscapedSequence |
|
( |
|
out Text.Character denotedCharacter |
|
) |
|
{ |
|
local int i; |
|
if (!Ok()) return self; |
|
// Need at least two characters to parse escaped sequence |
|
if (GetRemainingLength() < 2) return Fail(); |
|
if (GetCharacter().codePoint != CODEPOINT_BACKSLASH) return Fail(); |
|
|
|
denotedCharacter = GetCharacter(1); |
|
ShiftPointer(2); |
|
// Escaped character denotes some special code point |
|
for (i = 0; i < escapeCharactersMap.length; i += 1) |
|
{ |
|
if (escapeCharactersMap[i].from == denotedCharacter.codePoint) |
|
{ |
|
denotedCharacter.codePoint = escapeCharactersMap[i].to; |
|
return self; |
|
} |
|
} |
|
// Escaped character denotes declaration of arbitrary Unicode code point |
|
if (denotedCharacter.codePoint == CODEPOINT_ULARGE) |
|
{ |
|
MUnsignedInteger(denotedCharacter.codePoint, 16, 4); |
|
} |
|
else if (denotedCharacter.codePoint == CODEPOINT_USMALL) |
|
{ |
|
MUnsignedInteger(denotedCharacter.codePoint, 16, 2); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Attempts to parse a string literal: a string enclosed in either of |
|
* the following quotation marks: ", ', `. |
|
* String literals can contain escaped sequences. |
|
* String literals MUST end with closing quotation mark. |
|
* @see `MEscapedSequence()` |
|
* |
|
* @param result If parsing is successful, this array will contain the |
|
* contents of string literal with resolved escaped sequences; |
|
* if parsing has failed, it's value is undefined. |
|
* Any passed contents are simply discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MStringLiteralRaw(out array<Text.Character> result) |
|
{ |
|
local TextAPI api; |
|
local Text.Character nextCharacter; |
|
local Text.Character usedQuotationMark; |
|
local Text.Character escapedCharacter; |
|
if (!Ok()) return self; |
|
usedQuotationMark = GetCharacter(); |
|
if (!_().text.IsQuotationMark(usedQuotationMark)) return Fail(); |
|
|
|
ShiftPointer(); // Skip opening quotation mark |
|
api = _().text; |
|
result.length = 0; |
|
while (!HasFinished()) |
|
{ |
|
nextCharacter = GetCharacter(); |
|
// Closing quote |
|
if (api.AreEqual(nextCharacter, usedQuotationMark)) |
|
{ |
|
ShiftPointer(); |
|
return self; |
|
} |
|
// Escaped characters |
|
if (api.IsCodePoint(nextCharacter, CODEPOINT_BACKSLASH)) |
|
{ |
|
if (!MEscapedSequence(escapedCharacter).Ok()) |
|
{ |
|
return Fail(); // Backslash MUST mean valid escape sequence |
|
} |
|
result[result.length] = escapedCharacter; |
|
} |
|
// Any other code point |
|
else |
|
{ |
|
result[result.length] = nextCharacter; |
|
ShiftPointer(); |
|
} |
|
} |
|
// Content ended without a closing quote. |
|
return Fail(); |
|
} |
|
|
|
/** |
|
* Attempts to parse a string literal: a string enclosed in either of |
|
* the following quotation marks: ", ', `. |
|
* String literals can contain escaped sequences. |
|
* String literals MUST end with closing quotation mark. |
|
* @see `MEscapedSequence()` |
|
* |
|
* @param result If parsing is successful, this `string` will contain the |
|
* contents of string literal with resolved escaped sequences; |
|
* if parsing has failed, it's value is undefined. |
|
* Any passed contents are simply discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MStringLiteral(out string result) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
if (MStringLiteralRaw(rawResult).Ok()) |
|
{ |
|
result = _().text.RawToString(rawResult, STRING_Plain); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Attempts to parse a string literal: a string enclosed in either of |
|
* the following quotation marks: ", ', `. |
|
* String literals can contain escaped sequences. |
|
* String literals MUST end with closing quotation mark. |
|
* @see `MEscapedSequence()` |
|
* |
|
* @param result If parsing is successful, this `Text` will contain the |
|
* contents of string literal with resolved escaped sequences; |
|
* if parsing has failed, it's value is undefined. |
|
* Any passed contents are simply discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MStringLiteralT(out Text result) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
if (MStringLiteralRaw(rawResult).Ok()) |
|
{ |
|
result = _().text.FromRaw(rawResult); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Matches everything until it finds one of the breaking symbols: |
|
* 1. a specified code point (by default `0`); |
|
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); |
|
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). |
|
* This method cannot fail. |
|
* |
|
* @param result Any content before one of the break symbols |
|
* will be recorded into this array as a sequence of Unicode code points. |
|
* @param codePointBreak Method will stop parsing upon encountering this |
|
* code point (it will not be included in the `result`) |
|
* @param whitespacesBreak `true` if you want to also treat any |
|
* whitespace character as a break symbol |
|
* (@see `TextAPI.IsWhitespace()` for what symbols are |
|
* considered whitespaces) |
|
* @param quotesBreak `true` if you want to also treat any |
|
* quotation mark character as a break symbol |
|
* (@see `TextAPI.IsQuotation()` for what symbols are |
|
* considered quotation marks). |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MUntilRaw |
|
( |
|
out array<Text.Character> result, |
|
optional Text.Character characterBreak, |
|
optional bool whitespacesBreak, |
|
optional bool quotesBreak |
|
) |
|
{ |
|
local Text.Character nextCharacter; |
|
local TextAPI api; |
|
if (!Ok()) return self; |
|
|
|
api = _().text; |
|
result.length = 0; |
|
while (!HasFinished()) |
|
{ |
|
nextCharacter = GetCharacter(); |
|
if (api.AreEqual(nextCharacter, characterBreak)) break; |
|
if (whitespacesBreak && api.IsWhitespace(nextCharacter)) break; |
|
if (quotesBreak && api.IsQuotationMark(nextCharacter)) break; |
|
|
|
result[result.length] = nextCharacter; |
|
ShiftPointer(); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Matches everything until it finds one of the breaking symbols: |
|
* 1. a specified code point (by default `0`); |
|
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); |
|
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). |
|
* This method cannot fail. |
|
* |
|
* @param result Any content before one of the break symbols |
|
* will be recorded into this `string`. |
|
* @param codePointBreak Method will stop parsing upon encountering this |
|
* code point (it will not be included in the `result`) |
|
* @param whitespacesBreak `true` if you want to also treat any |
|
* whitespace character as a break symbol |
|
* (@see `TextAPI.IsWhitespace()` for what symbols are |
|
* considered whitespaces) |
|
* @param quotesBreak `true` if you want to also treat any |
|
* quotation mark character as a break symbol |
|
* (@see `TextAPI.IsQuotation()` for what symbols are |
|
* considered quotation marks). |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MUntil |
|
( |
|
out string result, |
|
optional Text.Character characterBreak, |
|
optional bool whitespacesBreak, |
|
optional bool quotesBreak |
|
) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak); |
|
result = _().text.RawToString(rawResult, STRING_Plain); |
|
return self; |
|
} |
|
|
|
/** |
|
* Matches everything until it finds one of the breaking symbols: |
|
* 1. a specified code point (by default `0`); |
|
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`); |
|
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`). |
|
* This method cannot fail. |
|
* |
|
* @param result Any content before one of the break symbols |
|
* will be recorded into this `Text`. |
|
* @param codePointBreak Method will stop parsing upon encountering this |
|
* code point (it will not be included in the `result`) |
|
* @param whitespacesBreak `true` if you want to also treat any |
|
* whitespace character as a break symbol |
|
* (@see `TextAPI.IsWhitespace()` for what symbols are |
|
* considered whitespaces) |
|
* @param quotesBreak `true` if you want to also treat any |
|
* quotation mark character as a break symbol |
|
* (@see `TextAPI.IsQuotation()` for what symbols are |
|
* considered quotation marks). |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MUntilT |
|
( |
|
out Text result, |
|
optional Text.Character characterBreak, |
|
optional bool whitespacesBreak, |
|
optional bool quotesBreak |
|
) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak); |
|
result = _().text.FromRaw(rawResult); |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses a string as either "simple" or "quoted". |
|
* Not being able to read any symbols is not considered a failure. |
|
* |
|
* Reading empty string (either to lack of further data or |
|
* instantly encountering a break symbol) is not considered a failure. |
|
* |
|
* Quoted string starts with quotation mark and ends either |
|
* at the corresponding closing (un-escaped) mark |
|
* or when `Parser`'s input has been fully consumed. |
|
* If string started with a quotation mark, this method will act exactly |
|
* like `MStringLiteralRaw()`. |
|
* |
|
* @param result If parsing is successful - string's contents will be |
|
* recorded here; if parsing has failed - value is undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MStringRaw(out array<Text.Character> result) |
|
{ |
|
if (!Ok()) return self; |
|
|
|
if (_().text.IsQuotationMark(GetCharacter())) |
|
{ |
|
MStringLiteralRaw(result); |
|
} |
|
else |
|
{ |
|
MUntilRaw(result,, true, true); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses a string as either "simple" or "quoted". |
|
* Not being able to read any symbols is not considered a failure. |
|
* |
|
* Reading empty string (either to lack of further data or |
|
* instantly encountering a break symbol) is not considered a failure. |
|
* |
|
* Quoted string starts with quotation mark and ends either |
|
* at the corresponding closing (un-escaped) mark |
|
* or when `Parser`'s input has been fully consumed. |
|
* If string started with a quotation mark, this method will act exactly |
|
* like `MStringLiteral()`. |
|
* |
|
* @param result If parsing is successful - string's contents will be |
|
* recorded here; if parsing has failed - value is undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MString(out string result) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
MStringRaw(rawResult); |
|
result = _().text.RawToString(rawResult, STRING_Plain); |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses a string as either "simple" or "quoted". |
|
* Not being able to read any symbols is not considered a failure. |
|
* |
|
* Reading empty string (either to lack of further data or |
|
* instantly encountering a break symbol) is not considered a failure. |
|
* |
|
* Quoted string starts with quotation mark and ends either |
|
* at the corresponding closing (un-escaped) mark |
|
* or when `Parser`'s input has been fully consumed. |
|
* If string started with a quotation mark, this method will act exactly |
|
* like `MStringLiteralT()`. |
|
* |
|
* @param result If parsing is successful - string's contents will be |
|
* recorded here; if parsing has failed - value is undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MStringT(out Text result) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
MStringRaw(rawResult); |
|
result = _().text.FromRaw(rawResult); |
|
return self; |
|
} |
|
|
|
/** |
|
* Matches a non-empty sequence of whitespace symbols. |
|
* |
|
* Cannot fail (not being able to read any input is not considered a failure). |
|
* |
|
* @param result If parsing was successful - whitespaces' Unicode code points |
|
* will be recorded in this array, otherwise - undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MWhitespacesRaw(out array<Text.Character> result) |
|
{ |
|
local Text.Character nextCharacter; |
|
local TextAPI api; |
|
if (!Ok()) return self; |
|
|
|
api = _().text; |
|
result.length = 0; |
|
while (!HasFinished()) |
|
{ |
|
nextCharacter = GetCharacter(); |
|
if (!api.IsWhitespace(nextCharacter)) break; |
|
result[result.length] = nextCharacter; |
|
ShiftPointer(); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Matches a non-empty sequence of whitespace symbols. |
|
* |
|
* Cannot fail (not being able to read any input is not considered a failure). |
|
* |
|
* @param result If parsing was successful - whitespaces will be |
|
* recorded here, otherwise - undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MWhitespaces(out string result) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
MWhitespacesRaw(rawResult); |
|
result = _().text.RawToString(rawResult, STRING_Plain); |
|
return self; |
|
} |
|
|
|
/** |
|
* Matches a non-empty sequence of whitespace symbols. |
|
* |
|
* Cannot fail (not being able to read any input is not considered a failure). |
|
* |
|
* @param result If parsing was successful - whitespaces will be |
|
* recorded here, otherwise - undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MWhitespacesT(out Text result) |
|
{ |
|
local array<Text.Character> rawResult; |
|
if (!Ok()) return self; |
|
|
|
MWhitespacesRaw(rawResult); |
|
result = _().text.FromRaw(rawResult); |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses next code point as itself. |
|
* |
|
* Can only fail if caller `Parser` has already exhausted all available data. |
|
* |
|
* @param result If parsing was successful - next Unicode code point, |
|
* otherwise - value is undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MCharacter(out Text.Character result) |
|
{ |
|
if (!Ok()) return self; |
|
if (HasFinished()) return Fail(); |
|
|
|
result = GetCharacter(); |
|
ShiftPointer(); |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses next code point as as byte. |
|
* Can fail if caller `Parser` has already exhausted all available data or |
|
* next Unicode code point cannot fit into the `byte` value range. |
|
* |
|
* @param result If parsing was successful - next Unicode code point as |
|
* a byte, otherwise - value is undefined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MByte(out byte result) |
|
{ |
|
local Text.Character character; |
|
if (!Ok()) return self; |
|
|
|
if (!MCharacter(character).Ok()) |
|
{ |
|
return Fail(); |
|
} |
|
if (character.codePoint < 0 || character.codePoint > BYTE_MAX) |
|
{ |
|
return Fail(); |
|
} |
|
result = character.codePoint; |
|
return self; |
|
} |
|
|
|
/** |
|
* Tries to parse a sign: either "+" or "-". |
|
* |
|
* @param result Value of `ParsedSign` will be recorded here, |
|
* depending on what sign was encountered. |
|
* `SIGN_Missing` value is only possible if we allow sign to be missing. |
|
* @param allowMissingSign By default `false` means that parsing will fail |
|
* if next character is neither "+" or "-"; |
|
* `true` means that parsing will not fail even if there is not sign, - |
|
* method will then consume in input and will return `SIGN_Missing` |
|
* as a result. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MSign |
|
( |
|
out ParsedSign result, |
|
optional bool allowMissingSign |
|
) |
|
{ |
|
local ParserState checkpoint; |
|
if (!Ok()) return self; |
|
|
|
// Read sign |
|
checkpoint = GetCurrentState(); |
|
if (Match("-").Ok()) |
|
{ |
|
result = SIGN_Minus; |
|
} |
|
else if (RestoreState(checkpoint).Match("+").Ok()) |
|
{ |
|
result = SIGN_Plus; |
|
} |
|
else if (allowMissingSign) |
|
{ |
|
result = SIGN_Missing; |
|
RestoreState(checkpoint); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Tries to parse a number prefix that determines a base system for denoting |
|
* integer numbers: |
|
* 1. `0x` means hexadecimal; |
|
* 2. `0b` means binary; |
|
* 3. `0o` means octal; |
|
* 4. otherwise we use decimal system. |
|
* |
|
* This parsing method cannot fail. |
|
* |
|
* Parser consumes appropriate prefix; nothing if decimal system is determined. |
|
* |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MBase(out int base) |
|
{ |
|
local ParserState checkpoint; |
|
if (!Ok()) return self; |
|
|
|
checkpoint = GetCurrentState(); |
|
if (Match("0x").Ok()) |
|
{ |
|
base = 16; |
|
} |
|
else if (RestoreState(checkpoint).Match("0b").Ok()) |
|
{ |
|
base = 2; |
|
} |
|
else if (RestoreState(checkpoint).Match("0o").Ok()) |
|
{ |
|
base = 8; |
|
} |
|
else |
|
{ |
|
RestoreState(checkpoint); |
|
base = 10; |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses signed integer either in a directly given base (`base`) or in an |
|
* auto-determined one (based on prefix, @see `MBase()`). |
|
* |
|
* Integers are expected in form: (+/-)(0x/0b/0o)<sequence of digits>. |
|
* Examples: 78, 0o34, -2, 0b0101001, -0x78aC. |
|
* |
|
* @param result If parsing is successful - parsed value will be |
|
* recorded here; if parsing fails - value is undetermined. |
|
* Any passed value is discarded. |
|
* @param base base in which function must attempt to parse a number; |
|
* Default value (`0`) means function must auto-determine base, |
|
* based on the prefix, otherwise must be between 2 and 36. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MInteger(out int result, optional int base) |
|
{ |
|
local ParsedSign integerSign; |
|
if (!Ok()) return self; |
|
|
|
MSign(integerSign, true); |
|
if (base == 0) |
|
{ |
|
MBase(base); |
|
} |
|
MUnsignedInteger(result, base); |
|
if (integerSign == SIGN_Minus) |
|
{ |
|
result *= -1; |
|
} |
|
return self; |
|
} |
|
|
|
// Internal function for parsing fractional part (including the dot ".") |
|
// of the text representation for floating point number (decimal system only). |
|
// Cannot fail, returns `0.0` if it couldn't parse anything. |
|
protected final function Parser MFractionalPart(out float result) |
|
{ |
|
local ParserState checkpoint; |
|
local int fractionalInt; |
|
local int digitsRead; |
|
if (!Ok()) return self; |
|
|
|
result = 0.0; |
|
checkpoint = GetCurrentState(); |
|
if (!Match(".").Ok()) |
|
{ |
|
RestoreState(checkpoint); |
|
return self; |
|
} |
|
checkpoint = GetCurrentState(); |
|
if (!MUnsignedInteger(fractionalInt,,, digitsRead).Ok()) |
|
{ |
|
fractionalInt = 0.0; |
|
RestoreState(checkpoint); |
|
return self; |
|
} |
|
result = float(fractionalInt) * (0.1 ** digitsRead); |
|
return self; |
|
} |
|
|
|
// Internal function for parsing exponent part (including the symbol "e") |
|
// of the text representation for floating point number (decimal system only). |
|
// Can only fail if symbol "e" / "E" is present, but there is no valid |
|
// integer right after it (whitespace symbols in-between are forbidden). |
|
// Returns `0.0` if there was not exponent to parse. |
|
protected final function Parser MExponentPart(out int result) |
|
{ |
|
local ParserState checkpoint; |
|
local ParsedSign exponendSign; |
|
if (!Ok()) return self; |
|
|
|
// Is there even an exponential part? |
|
checkpoint = GetCurrentState(); |
|
if (!Match("e", true).Ok()) |
|
{ |
|
RestoreState(checkpoint); |
|
return self; |
|
} |
|
// If yes - parse it: |
|
result = 0.0; |
|
MSign(exponendSign, true).MUnsignedInteger(result, 10); |
|
if (exponendSign == SIGN_Minus) |
|
{ |
|
result *= -1; |
|
} |
|
return self; |
|
} |
|
|
|
// Internal function for parsing optional suffix of the text representation |
|
// for floating point number ("f" or "F"). |
|
// Cannot fail. Can only consume one Unicode code point, |
|
// when it is either "f" or "F". |
|
protected final function Parser MFloatSuffix() |
|
{ |
|
local ParserState checkpoint; |
|
if (!Ok()) return self; |
|
|
|
checkpoint = GetCurrentState(); |
|
if (!Match("f", true).Ok()) |
|
{ |
|
RestoreState(checkpoint); |
|
} |
|
return self; |
|
} |
|
|
|
/** |
|
* Parses signed floating point number in JSON form + optional "f" / "F" |
|
* suffix at the end. |
|
* |
|
* @param result If parsing is successful - parsed value will be |
|
* recorded here; if parsing fails - value is undetermined. |
|
* Any passed value is discarded. |
|
* @return Returns the calling object, to allow for function chaining. |
|
*/ |
|
public final function Parser MNumber(out float result) |
|
{ |
|
local ParsedSign sign; |
|
local int integerPart, exponentPart; |
|
local float fractionalPart; |
|
if (!Ok()) return self; |
|
|
|
self.MSign(sign, true) |
|
.MUnsignedInteger(integerPart, 10) |
|
.MFractionalPart(fractionalPart) |
|
.MExponentPart(exponentPart) |
|
.MFloatSuffix(); |
|
if (!Ok()) |
|
{ |
|
return self; |
|
} |
|
result = float(integerPart) + fractionalPart; |
|
result *= 10.0 ** exponentPart; |
|
if (sign == SIGN_Minus) |
|
{ |
|
result *= -1; |
|
} |
|
return self; |
|
} |
|
|
|
defaultproperties |
|
{ |
|
// Start with no initializations done |
|
version = 0 |
|
BYTE_MAX = 255 |
|
CODEPOINT_BACKSLASH = 92 // \ |
|
CODEPOINT_USMALL = 117 // u |
|
CODEPOINT_ULARGE = 85 // U |
|
escapeCharactersMap(0)=(from=110,to=10) // \n |
|
escapeCharactersMap(1)=(from=114,to=13) // \r |
|
escapeCharactersMap(2)=(from=116,to=9) // \t |
|
escapeCharactersMap(3)=(from=98,to=8) // \b |
|
escapeCharactersMap(4)=(from=102,to=12) // \f |
|
escapeCharactersMap(5)=(from=118,to=11) // \v |
|
} |